github.com/pdfcpu/pdfcpu@v0.11.1/pkg/api/selectPages.go (about) 1 /* 2 Copyright 2018 The pdfcpu Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package api 18 19 import ( 20 "fmt" 21 "regexp" 22 "sort" 23 "strconv" 24 "strings" 25 26 "github.com/pdfcpu/pdfcpu/pkg/log" 27 "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" 28 "github.com/pkg/errors" 29 ) 30 31 var ( 32 selectedPagesRegExp *regexp.Regexp 33 ) 34 35 func setupRegExpForPageSelection() *regexp.Regexp { 36 e := "(\\d+)?-l(-\\d+)?|l(-(\\d+)-?)?" 37 e = "[!n]?((-\\d+)|(\\d+(-(\\d+)?)?)|" + e + ")" 38 e = "\\Qeven\\E|\\Qodd\\E|" + e 39 exp := "^" + e + "(," + e + ")*$" 40 re, _ := regexp.Compile(exp) 41 return re 42 } 43 44 func init() { 45 selectedPagesRegExp = setupRegExpForPageSelection() 46 } 47 48 // ParsePageSelection ensures a correct page selection expression. 49 func ParsePageSelection(s string) ([]string, error) { 50 if s == "" { 51 return nil, nil 52 } 53 54 // Ensure valid comma separated expression of:{ {even|odd}{!}{-}# | {even|odd}{!}#-{#} }* 55 // 56 // Negated expressions: 57 // '!' negates an expression 58 // since '!' needs to be part of a single quoted string in bash 59 // as an alternative also 'n' works instead of "!" 60 // 61 // Extract all but page 4 may be expressed as: "1-,!4" or "1-,n4" 62 // 63 // The pageSelection is evaluated strictly from left to right! 64 // e.g. "!3,1-5" extracts pages 1-5 whereas "1-5,!3" extracts pages 1,2,4,5 65 // 66 67 if !selectedPagesRegExp.MatchString(s) { 68 return nil, errors.Errorf("-pages \"%s\" => syntax error\n", s) 69 } 70 71 //log.CLI.Printf("pageSelection: %s\n", s) 72 73 return strings.Split(s, ","), nil 74 } 75 76 func handlePrefix(v string, negated bool, pageCount int, selectedPages types.IntSet) error { 77 // -l 78 if v == "l" { 79 for j := 1; j <= pageCount; j++ { 80 selectedPages[j] = !negated 81 } 82 return nil 83 } 84 85 // -l-# 86 if strings.HasPrefix(v, "l-") { 87 i, err := strconv.Atoi(v[2:]) 88 if err != nil { 89 return err 90 } 91 if pageCount-i < 1 { 92 return nil 93 } 94 for j := 1; j <= pageCount-i; j++ { 95 selectedPages[j] = !negated 96 } 97 return nil 98 } 99 100 // -# 101 i, err := strconv.Atoi(v) 102 if err != nil { 103 return err 104 } 105 106 // Handle overflow gracefully 107 if i > pageCount { 108 i = pageCount 109 } 110 111 // identified 112 // -# ... select all pages up to and including # 113 // or !-# ... deselect all pages up to and including # 114 for j := 1; j <= i; j++ { 115 selectedPages[j] = !negated 116 } 117 118 return nil 119 } 120 121 func handleSuffix(v string, negated bool, pageCount int, selectedPages types.IntSet) error { 122 // must be #- ... select all pages from here until the end. 123 // or !#- ... deselect all pages from here until the end. 124 125 i, err := strconv.Atoi(v) 126 if err != nil { 127 return err 128 } 129 130 // Handle overflow gracefully 131 if i > pageCount { 132 return nil 133 } 134 135 for j := i; j <= pageCount; j++ { 136 selectedPages[j] = !negated 137 } 138 139 return nil 140 } 141 142 func handleSpecificPageOrLastXPages(s string, negated bool, pageCount int, selectedPages types.IntSet) error { 143 // l 144 if s == "l" { 145 selectedPages[pageCount] = !negated 146 return nil 147 } 148 149 // l-# 150 if strings.HasPrefix(s, "l-") { 151 pr := strings.Split(s[2:], "-") 152 i, err := strconv.Atoi(pr[0]) 153 if err != nil { 154 return err 155 } 156 if pageCount-i < 1 { 157 return nil 158 } 159 j := pageCount - i 160 161 // l-#- 162 if strings.HasSuffix(s, "-") { 163 j = pageCount 164 } 165 for i := pageCount - i; i <= j; i++ { 166 selectedPages[i] = !negated 167 } 168 return nil 169 } 170 171 // must be # ... select a specific page 172 // or !# ... deselect a specific page 173 i, err := strconv.Atoi(s) 174 if err != nil { 175 return err 176 } 177 178 // Handle overflow gracefully 179 if i > pageCount { 180 return nil 181 } 182 183 selectedPages[i] = !negated 184 185 return nil 186 } 187 188 func negation(c byte) bool { 189 return c == '!' || c == 'n' 190 } 191 192 func selectEvenPages(selectedPages types.IntSet, pageCount int) { 193 for i := 2; i <= pageCount; i += 2 { 194 _, found := selectedPages[i] 195 if !found { 196 selectedPages[i] = true 197 } 198 } 199 } 200 201 func selectOddPages(selectedPages types.IntSet, pageCount int) { 202 for i := 1; i <= pageCount; i += 2 { 203 _, found := selectedPages[i] 204 if !found { 205 selectedPages[i] = true 206 } 207 } 208 } 209 210 func parsePageRange(pr []string, pageCount int, negated bool, selectedPages types.IntSet) error { 211 from, err := strconv.Atoi(pr[0]) 212 if err != nil { 213 return err 214 } 215 216 // Handle overflow gracefully 217 if from > pageCount { 218 return nil 219 } 220 221 var thru int 222 if pr[1] == "l" { 223 // #-l 224 thru = pageCount 225 if len(pr) == 3 { 226 // #-l-# 227 i, err := strconv.Atoi(pr[2]) 228 if err != nil { 229 return err 230 } 231 thru -= i 232 } 233 } else { 234 // #-# 235 var err error 236 thru, err = strconv.Atoi(pr[1]) 237 if err != nil { 238 return err 239 } 240 } 241 242 // Handle overflow gracefully 243 if thru < from { 244 return nil 245 } 246 247 if thru > pageCount { 248 thru = pageCount 249 } 250 251 for i := from; i <= thru; i++ { 252 selectedPages[i] = !negated 253 } 254 255 return nil 256 } 257 258 func sortedPages(selectedPages types.IntSet) []int { 259 p := []int(nil) 260 for i, v := range selectedPages { 261 if v { 262 p = append(p, i) 263 } 264 } 265 sort.Ints(p) 266 return p 267 } 268 269 func logSelPages(selectedPages types.IntSet) { 270 if !log.CLIEnabled() || len(selectedPages) == 0 { 271 return 272 } 273 var b strings.Builder 274 for _, i := range sortedPages(selectedPages) { 275 fmt.Fprintf(&b, "%d,", i) 276 } 277 s := b.String() 278 if len(s) > 1 { 279 s = s[:len(s)-1] 280 } 281 // TODO Suppress for multifile cmds 282 if log.CLIEnabled() { 283 log.CLI.Printf("pages: %s\n", s) 284 } 285 } 286 287 func calcSelPages(pageCount int, pageSelection []string, selectedPages types.IntSet) error { 288 for _, v := range pageSelection { 289 290 //log.Stats.Printf("pageExp: <%s>\n", v) 291 292 if v == "even" { 293 selectEvenPages(selectedPages, pageCount) 294 continue 295 } 296 297 if v == "odd" { 298 selectOddPages(selectedPages, pageCount) 299 continue 300 } 301 302 var negated bool 303 if negation(v[0]) { 304 negated = true 305 //logInfoAPI.Printf("is a negated exp\n") 306 v = v[1:] 307 } 308 309 // -# 310 if v[0] == '-' { 311 312 v = v[1:] 313 314 if err := handlePrefix(v, negated, pageCount, selectedPages); err != nil { 315 return err 316 } 317 318 continue 319 } 320 321 // #- 322 if v[0] != 'l' && strings.HasSuffix(v, "-") { 323 324 if err := handleSuffix(v[:len(v)-1], negated, pageCount, selectedPages); err != nil { 325 return err 326 } 327 328 continue 329 } 330 331 // l l-# l-#- 332 if v[0] == 'l' { 333 if err := handleSpecificPageOrLastXPages(v, negated, pageCount, selectedPages); err != nil { 334 return err 335 } 336 continue 337 } 338 339 pr := strings.Split(v, "-") 340 if len(pr) >= 2 { 341 // v contains '-' somewhere in the middle 342 // #-# #-l #-l-# 343 if err := parsePageRange(pr, pageCount, negated, selectedPages); err != nil { 344 return err 345 } 346 347 continue 348 } 349 350 // # 351 if err := handleSpecificPageOrLastXPages(pr[0], negated, pageCount, selectedPages); err != nil { 352 return err 353 } 354 355 } 356 357 return nil 358 } 359 360 // selectedPages returns a set of used page numbers. 361 // key==page# => key 0 unused! 362 func selectedPages(pageCount int, pageSelection []string, log bool) (types.IntSet, error) { 363 selectedPages := types.IntSet{} 364 365 if err := calcSelPages(pageCount, pageSelection, selectedPages); err != nil { 366 return nil, err 367 } 368 369 if log { 370 logSelPages(selectedPages) 371 } 372 373 return selectedPages, nil 374 } 375 376 // PagesForPageSelection ensures a set of page numbers for an ascending page sequence 377 // where each page number may appear only once. 378 func PagesForPageSelection(pageCount int, pageSelection []string, ensureAllforNone bool, log bool) (types.IntSet, error) { 379 if len(pageSelection) > 0 { 380 return selectedPages(pageCount, pageSelection, log) 381 } 382 if !ensureAllforNone { 383 //log.CLI.Printf("pages: none\n") 384 return nil, nil 385 } 386 m := types.IntSet{} 387 for i := 1; i <= pageCount; i++ { 388 m[i] = true 389 } 390 //log.CLI.Printf("pages: all\n") 391 return m, nil 392 } 393 394 func RemainingPagesForPageRemoval(pageCount int, pageSelection []string, log bool) (types.IntSet, error) { 395 pagesToRemove, err := selectedPages(pageCount, pageSelection, log) 396 if err != nil { 397 return nil, err 398 } 399 400 m := types.IntSet{} 401 for i := 1; i <= pageCount; i++ { 402 m[i] = true 403 } 404 405 for k, v := range pagesToRemove { 406 if v { 407 m[k] = false 408 } 409 } 410 411 return m, nil 412 } 413 414 func deletePageFromCollection(cp *[]int, p int) { 415 a := []int{} 416 for _, i := range *cp { 417 if i != p { 418 a = append(a, i) 419 } 420 } 421 *cp = a 422 } 423 424 func processPageForCollection(cp *[]int, negated bool, i int) { 425 if !negated { 426 *cp = append(*cp, i) 427 } else { 428 deletePageFromCollection(cp, i) 429 } 430 } 431 432 func collectEvenPages(cp *[]int, pageCount int) { 433 for i := 2; i <= pageCount; i += 2 { 434 *cp = append(*cp, i) 435 } 436 } 437 438 func collectOddPages(cp *[]int, pageCount int) { 439 for i := 1; i <= pageCount; i += 2 { 440 *cp = append(*cp, i) 441 } 442 } 443 444 func handlePrefixForCollection(v string, negated bool, pageCount int, cp *[]int) error { 445 // -l 446 if v == "l" { 447 for j := 1; j <= pageCount; j++ { 448 processPageForCollection(cp, negated, j) 449 } 450 return nil 451 } 452 453 // -l-# 454 if strings.HasPrefix(v, "l-") { 455 i, err := strconv.Atoi(v[2:]) 456 if err != nil { 457 return err 458 } 459 if pageCount-i < 1 { 460 return nil 461 } 462 for j := 1; j <= pageCount-i; j++ { 463 processPageForCollection(cp, negated, j) 464 } 465 return nil 466 } 467 468 // -# 469 i, err := strconv.Atoi(v) 470 if err != nil { 471 return err 472 } 473 474 // Handle overflow gracefully 475 if i > pageCount { 476 i = pageCount 477 } 478 479 // identified 480 // -# ... select all pages up to and including # 481 // or !-# ... deselect all pages up to and including # 482 for j := 1; j <= i; j++ { 483 processPageForCollection(cp, negated, j) 484 } 485 486 return nil 487 } 488 489 func handleSuffixForCollection(v string, negated bool, pageCount int, cp *[]int) error { 490 // must be #- ... select all pages from here until the end. 491 // or !#- ... deselect all pages from here until the end. 492 493 i, err := strconv.Atoi(v) 494 if err != nil { 495 return err 496 } 497 498 // Handle overflow gracefully 499 if i > pageCount { 500 return nil 501 } 502 503 for j := i; j <= pageCount; j++ { 504 processPageForCollection(cp, negated, j) 505 } 506 507 return nil 508 } 509 510 func handleSpecificPageOrLastXPagesForCollection(s string, negated bool, pageCount int, cp *[]int) error { 511 // l 512 if s == "l" { 513 processPageForCollection(cp, negated, pageCount) 514 return nil 515 } 516 517 // l-# 518 if strings.HasPrefix(s, "l-") { 519 pr := strings.Split(s[2:], "-") 520 i, err := strconv.Atoi(pr[0]) 521 if err != nil { 522 return err 523 } 524 if pageCount-i < 1 { 525 return nil 526 } 527 j := pageCount - i 528 529 // l-#- 530 if strings.HasSuffix(s, "-") { 531 j = pageCount 532 } 533 for i := pageCount - i; i <= j; i++ { 534 processPageForCollection(cp, negated, i) 535 } 536 return nil 537 } 538 539 // must be # ... select a specific page 540 // or !# ... deselect a specific page 541 i, err := strconv.Atoi(s) 542 if err != nil { 543 return err 544 } 545 546 // Handle overflow gracefully 547 if i > pageCount { 548 return nil 549 } 550 551 processPageForCollection(cp, negated, i) 552 553 return nil 554 } 555 556 func parsePageRangeForCollection(pr []string, pageCount int, negated bool, cp *[]int) error { 557 from, err := strconv.Atoi(pr[0]) 558 if err != nil { 559 return err 560 } 561 562 // Handle overflow gracefully 563 if from > pageCount { 564 return nil 565 } 566 567 var thru int 568 if pr[1] == "l" { 569 // #-l 570 thru = pageCount 571 if len(pr) == 3 { 572 // #-l-# 573 i, err := strconv.Atoi(pr[2]) 574 if err != nil { 575 return err 576 } 577 thru -= i 578 } 579 } else { 580 // #-# 581 var err error 582 thru, err = strconv.Atoi(pr[1]) 583 if err != nil { 584 return err 585 } 586 } 587 588 // Handle overflow gracefully 589 if thru < from { 590 return nil 591 } 592 593 if thru > pageCount { 594 thru = pageCount 595 } 596 597 for i := from; i <= thru; i++ { 598 processPageForCollection(cp, negated, i) 599 } 600 601 return nil 602 } 603 604 func calcPagesForPageCollection(pageCount int, pageSelection []string) ([]int, error) { 605 collectedPages := []int{} 606 607 for _, v := range pageSelection { 608 609 if v == "even" { 610 collectEvenPages(&collectedPages, pageCount) 611 continue 612 } 613 614 if v == "odd" { 615 collectOddPages(&collectedPages, pageCount) 616 continue 617 } 618 619 var negated bool 620 if negation(v[0]) { 621 negated = true 622 //logInfoAPI.Printf("is a negated exp\n") 623 v = v[1:] 624 } 625 626 // -# 627 if v[0] == '-' { 628 629 v = v[1:] 630 631 if err := handlePrefixForCollection(v, negated, pageCount, &collectedPages); err != nil { 632 return nil, err 633 } 634 635 continue 636 } 637 638 // #- 639 if v[0] != 'l' && strings.HasSuffix(v, "-") { 640 641 if err := handleSuffixForCollection(v[:len(v)-1], negated, pageCount, &collectedPages); err != nil { 642 return nil, err 643 } 644 645 continue 646 } 647 648 // l l-# l-#- 649 if v[0] == 'l' { 650 if err := handleSpecificPageOrLastXPagesForCollection(v, negated, pageCount, &collectedPages); err != nil { 651 return nil, err 652 } 653 continue 654 } 655 656 pr := strings.Split(v, "-") 657 if len(pr) >= 2 { 658 // v contains '-' somewhere in the middle 659 // #-# #-l #-l-# 660 if err := parsePageRangeForCollection(pr, pageCount, negated, &collectedPages); err != nil { 661 return nil, err 662 } 663 664 continue 665 } 666 667 // # 668 if err := handleSpecificPageOrLastXPagesForCollection(pr[0], negated, pageCount, &collectedPages); err != nil { 669 return nil, err 670 } 671 } 672 673 return collectedPages, nil 674 } 675 676 // PagesForPageCollection returns a slice of page numbers for a page collection. 677 // Any page number in any order any number of times allowed. 678 func PagesForPageCollection(pageCount int, pageSelection []string) ([]int, error) { 679 collectedPages, err := calcPagesForPageCollection(pageCount, pageSelection) 680 if err != nil { 681 return nil, err 682 } 683 684 if len(collectedPages) == 0 { 685 return nil, errors.Errorf("pdfcpu: no page selected") 686 } 687 688 return collectedPages, nil 689 } 690 691 // PagesForPageRange returns a slice of page numbers for a page range. 692 func PagesForPageRange(from, thru int) []int { 693 s := make([]int, thru-from+1) 694 for i := 0; i < len(s); i++ { 695 s[i] = from + i 696 } 697 return s 698 }