github.com/pdfcpu/pdfcpu@v0.11.1/pkg/api/selectPages.go (about)

     1  /*
     2  Copyright 2018 The pdfcpu Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package api
    18  
    19  import (
    20  	"fmt"
    21  	"regexp"
    22  	"sort"
    23  	"strconv"
    24  	"strings"
    25  
    26  	"github.com/pdfcpu/pdfcpu/pkg/log"
    27  	"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types"
    28  	"github.com/pkg/errors"
    29  )
    30  
    31  var (
    32  	selectedPagesRegExp *regexp.Regexp
    33  )
    34  
    35  func setupRegExpForPageSelection() *regexp.Regexp {
    36  	e := "(\\d+)?-l(-\\d+)?|l(-(\\d+)-?)?"
    37  	e = "[!n]?((-\\d+)|(\\d+(-(\\d+)?)?)|" + e + ")"
    38  	e = "\\Qeven\\E|\\Qodd\\E|" + e
    39  	exp := "^" + e + "(," + e + ")*$"
    40  	re, _ := regexp.Compile(exp)
    41  	return re
    42  }
    43  
    44  func init() {
    45  	selectedPagesRegExp = setupRegExpForPageSelection()
    46  }
    47  
    48  // ParsePageSelection ensures a correct page selection expression.
    49  func ParsePageSelection(s string) ([]string, error) {
    50  	if s == "" {
    51  		return nil, nil
    52  	}
    53  
    54  	// Ensure valid comma separated expression of:{ {even|odd}{!}{-}# | {even|odd}{!}#-{#} }*
    55  	//
    56  	// Negated expressions:
    57  	// '!' negates an expression
    58  	// since '!' needs to be part of a single quoted string in bash
    59  	// as an alternative also 'n' works instead of "!"
    60  	//
    61  	// Extract all but page 4 may be expressed as: "1-,!4" or "1-,n4"
    62  	//
    63  	// The pageSelection is evaluated strictly from left to right!
    64  	// e.g. "!3,1-5" extracts pages 1-5 whereas "1-5,!3" extracts pages 1,2,4,5
    65  	//
    66  
    67  	if !selectedPagesRegExp.MatchString(s) {
    68  		return nil, errors.Errorf("-pages \"%s\" => syntax error\n", s)
    69  	}
    70  
    71  	//log.CLI.Printf("pageSelection: %s\n", s)
    72  
    73  	return strings.Split(s, ","), nil
    74  }
    75  
    76  func handlePrefix(v string, negated bool, pageCount int, selectedPages types.IntSet) error {
    77  	// -l
    78  	if v == "l" {
    79  		for j := 1; j <= pageCount; j++ {
    80  			selectedPages[j] = !negated
    81  		}
    82  		return nil
    83  	}
    84  
    85  	// -l-#
    86  	if strings.HasPrefix(v, "l-") {
    87  		i, err := strconv.Atoi(v[2:])
    88  		if err != nil {
    89  			return err
    90  		}
    91  		if pageCount-i < 1 {
    92  			return nil
    93  		}
    94  		for j := 1; j <= pageCount-i; j++ {
    95  			selectedPages[j] = !negated
    96  		}
    97  		return nil
    98  	}
    99  
   100  	// -#
   101  	i, err := strconv.Atoi(v)
   102  	if err != nil {
   103  		return err
   104  	}
   105  
   106  	// Handle overflow gracefully
   107  	if i > pageCount {
   108  		i = pageCount
   109  	}
   110  
   111  	// identified
   112  	// -# ... select all pages up to and including #
   113  	// or !-# ... deselect all pages up to and including #
   114  	for j := 1; j <= i; j++ {
   115  		selectedPages[j] = !negated
   116  	}
   117  
   118  	return nil
   119  }
   120  
   121  func handleSuffix(v string, negated bool, pageCount int, selectedPages types.IntSet) error {
   122  	// must be #- ... select all pages from here until the end.
   123  	// or !#- ... deselect all pages from here until the end.
   124  
   125  	i, err := strconv.Atoi(v)
   126  	if err != nil {
   127  		return err
   128  	}
   129  
   130  	// Handle overflow gracefully
   131  	if i > pageCount {
   132  		return nil
   133  	}
   134  
   135  	for j := i; j <= pageCount; j++ {
   136  		selectedPages[j] = !negated
   137  	}
   138  
   139  	return nil
   140  }
   141  
   142  func handleSpecificPageOrLastXPages(s string, negated bool, pageCount int, selectedPages types.IntSet) error {
   143  	// l
   144  	if s == "l" {
   145  		selectedPages[pageCount] = !negated
   146  		return nil
   147  	}
   148  
   149  	// l-#
   150  	if strings.HasPrefix(s, "l-") {
   151  		pr := strings.Split(s[2:], "-")
   152  		i, err := strconv.Atoi(pr[0])
   153  		if err != nil {
   154  			return err
   155  		}
   156  		if pageCount-i < 1 {
   157  			return nil
   158  		}
   159  		j := pageCount - i
   160  
   161  		// l-#-
   162  		if strings.HasSuffix(s, "-") {
   163  			j = pageCount
   164  		}
   165  		for i := pageCount - i; i <= j; i++ {
   166  			selectedPages[i] = !negated
   167  		}
   168  		return nil
   169  	}
   170  
   171  	// must be # ... select a specific page
   172  	// or !# ... deselect a specific page
   173  	i, err := strconv.Atoi(s)
   174  	if err != nil {
   175  		return err
   176  	}
   177  
   178  	// Handle overflow gracefully
   179  	if i > pageCount {
   180  		return nil
   181  	}
   182  
   183  	selectedPages[i] = !negated
   184  
   185  	return nil
   186  }
   187  
   188  func negation(c byte) bool {
   189  	return c == '!' || c == 'n'
   190  }
   191  
   192  func selectEvenPages(selectedPages types.IntSet, pageCount int) {
   193  	for i := 2; i <= pageCount; i += 2 {
   194  		_, found := selectedPages[i]
   195  		if !found {
   196  			selectedPages[i] = true
   197  		}
   198  	}
   199  }
   200  
   201  func selectOddPages(selectedPages types.IntSet, pageCount int) {
   202  	for i := 1; i <= pageCount; i += 2 {
   203  		_, found := selectedPages[i]
   204  		if !found {
   205  			selectedPages[i] = true
   206  		}
   207  	}
   208  }
   209  
   210  func parsePageRange(pr []string, pageCount int, negated bool, selectedPages types.IntSet) error {
   211  	from, err := strconv.Atoi(pr[0])
   212  	if err != nil {
   213  		return err
   214  	}
   215  
   216  	// Handle overflow gracefully
   217  	if from > pageCount {
   218  		return nil
   219  	}
   220  
   221  	var thru int
   222  	if pr[1] == "l" {
   223  		// #-l
   224  		thru = pageCount
   225  		if len(pr) == 3 {
   226  			// #-l-#
   227  			i, err := strconv.Atoi(pr[2])
   228  			if err != nil {
   229  				return err
   230  			}
   231  			thru -= i
   232  		}
   233  	} else {
   234  		// #-#
   235  		var err error
   236  		thru, err = strconv.Atoi(pr[1])
   237  		if err != nil {
   238  			return err
   239  		}
   240  	}
   241  
   242  	// Handle overflow gracefully
   243  	if thru < from {
   244  		return nil
   245  	}
   246  
   247  	if thru > pageCount {
   248  		thru = pageCount
   249  	}
   250  
   251  	for i := from; i <= thru; i++ {
   252  		selectedPages[i] = !negated
   253  	}
   254  
   255  	return nil
   256  }
   257  
   258  func sortedPages(selectedPages types.IntSet) []int {
   259  	p := []int(nil)
   260  	for i, v := range selectedPages {
   261  		if v {
   262  			p = append(p, i)
   263  		}
   264  	}
   265  	sort.Ints(p)
   266  	return p
   267  }
   268  
   269  func logSelPages(selectedPages types.IntSet) {
   270  	if !log.CLIEnabled() || len(selectedPages) == 0 {
   271  		return
   272  	}
   273  	var b strings.Builder
   274  	for _, i := range sortedPages(selectedPages) {
   275  		fmt.Fprintf(&b, "%d,", i)
   276  	}
   277  	s := b.String()
   278  	if len(s) > 1 {
   279  		s = s[:len(s)-1]
   280  	}
   281  	// TODO Suppress for multifile cmds
   282  	if log.CLIEnabled() {
   283  		log.CLI.Printf("pages: %s\n", s)
   284  	}
   285  }
   286  
   287  func calcSelPages(pageCount int, pageSelection []string, selectedPages types.IntSet) error {
   288  	for _, v := range pageSelection {
   289  
   290  		//log.Stats.Printf("pageExp: <%s>\n", v)
   291  
   292  		if v == "even" {
   293  			selectEvenPages(selectedPages, pageCount)
   294  			continue
   295  		}
   296  
   297  		if v == "odd" {
   298  			selectOddPages(selectedPages, pageCount)
   299  			continue
   300  		}
   301  
   302  		var negated bool
   303  		if negation(v[0]) {
   304  			negated = true
   305  			//logInfoAPI.Printf("is a negated exp\n")
   306  			v = v[1:]
   307  		}
   308  
   309  		// -#
   310  		if v[0] == '-' {
   311  
   312  			v = v[1:]
   313  
   314  			if err := handlePrefix(v, negated, pageCount, selectedPages); err != nil {
   315  				return err
   316  			}
   317  
   318  			continue
   319  		}
   320  
   321  		// #-
   322  		if v[0] != 'l' && strings.HasSuffix(v, "-") {
   323  
   324  			if err := handleSuffix(v[:len(v)-1], negated, pageCount, selectedPages); err != nil {
   325  				return err
   326  			}
   327  
   328  			continue
   329  		}
   330  
   331  		// l l-# l-#-
   332  		if v[0] == 'l' {
   333  			if err := handleSpecificPageOrLastXPages(v, negated, pageCount, selectedPages); err != nil {
   334  				return err
   335  			}
   336  			continue
   337  		}
   338  
   339  		pr := strings.Split(v, "-")
   340  		if len(pr) >= 2 {
   341  			// v contains '-' somewhere in the middle
   342  			// #-# #-l #-l-#
   343  			if err := parsePageRange(pr, pageCount, negated, selectedPages); err != nil {
   344  				return err
   345  			}
   346  
   347  			continue
   348  		}
   349  
   350  		// #
   351  		if err := handleSpecificPageOrLastXPages(pr[0], negated, pageCount, selectedPages); err != nil {
   352  			return err
   353  		}
   354  
   355  	}
   356  
   357  	return nil
   358  }
   359  
   360  // selectedPages returns a set of used page numbers.
   361  // key==page# => key 0 unused!
   362  func selectedPages(pageCount int, pageSelection []string, log bool) (types.IntSet, error) {
   363  	selectedPages := types.IntSet{}
   364  
   365  	if err := calcSelPages(pageCount, pageSelection, selectedPages); err != nil {
   366  		return nil, err
   367  	}
   368  
   369  	if log {
   370  		logSelPages(selectedPages)
   371  	}
   372  
   373  	return selectedPages, nil
   374  }
   375  
   376  // PagesForPageSelection ensures a set of page numbers for an ascending page sequence
   377  // where each page number may appear only once.
   378  func PagesForPageSelection(pageCount int, pageSelection []string, ensureAllforNone bool, log bool) (types.IntSet, error) {
   379  	if len(pageSelection) > 0 {
   380  		return selectedPages(pageCount, pageSelection, log)
   381  	}
   382  	if !ensureAllforNone {
   383  		//log.CLI.Printf("pages: none\n")
   384  		return nil, nil
   385  	}
   386  	m := types.IntSet{}
   387  	for i := 1; i <= pageCount; i++ {
   388  		m[i] = true
   389  	}
   390  	//log.CLI.Printf("pages: all\n")
   391  	return m, nil
   392  }
   393  
   394  func RemainingPagesForPageRemoval(pageCount int, pageSelection []string, log bool) (types.IntSet, error) {
   395  	pagesToRemove, err := selectedPages(pageCount, pageSelection, log)
   396  	if err != nil {
   397  		return nil, err
   398  	}
   399  
   400  	m := types.IntSet{}
   401  	for i := 1; i <= pageCount; i++ {
   402  		m[i] = true
   403  	}
   404  
   405  	for k, v := range pagesToRemove {
   406  		if v {
   407  			m[k] = false
   408  		}
   409  	}
   410  
   411  	return m, nil
   412  }
   413  
   414  func deletePageFromCollection(cp *[]int, p int) {
   415  	a := []int{}
   416  	for _, i := range *cp {
   417  		if i != p {
   418  			a = append(a, i)
   419  		}
   420  	}
   421  	*cp = a
   422  }
   423  
   424  func processPageForCollection(cp *[]int, negated bool, i int) {
   425  	if !negated {
   426  		*cp = append(*cp, i)
   427  	} else {
   428  		deletePageFromCollection(cp, i)
   429  	}
   430  }
   431  
   432  func collectEvenPages(cp *[]int, pageCount int) {
   433  	for i := 2; i <= pageCount; i += 2 {
   434  		*cp = append(*cp, i)
   435  	}
   436  }
   437  
   438  func collectOddPages(cp *[]int, pageCount int) {
   439  	for i := 1; i <= pageCount; i += 2 {
   440  		*cp = append(*cp, i)
   441  	}
   442  }
   443  
   444  func handlePrefixForCollection(v string, negated bool, pageCount int, cp *[]int) error {
   445  	// -l
   446  	if v == "l" {
   447  		for j := 1; j <= pageCount; j++ {
   448  			processPageForCollection(cp, negated, j)
   449  		}
   450  		return nil
   451  	}
   452  
   453  	// -l-#
   454  	if strings.HasPrefix(v, "l-") {
   455  		i, err := strconv.Atoi(v[2:])
   456  		if err != nil {
   457  			return err
   458  		}
   459  		if pageCount-i < 1 {
   460  			return nil
   461  		}
   462  		for j := 1; j <= pageCount-i; j++ {
   463  			processPageForCollection(cp, negated, j)
   464  		}
   465  		return nil
   466  	}
   467  
   468  	// -#
   469  	i, err := strconv.Atoi(v)
   470  	if err != nil {
   471  		return err
   472  	}
   473  
   474  	// Handle overflow gracefully
   475  	if i > pageCount {
   476  		i = pageCount
   477  	}
   478  
   479  	// identified
   480  	// -# ... select all pages up to and including #
   481  	// or !-# ... deselect all pages up to and including #
   482  	for j := 1; j <= i; j++ {
   483  		processPageForCollection(cp, negated, j)
   484  	}
   485  
   486  	return nil
   487  }
   488  
   489  func handleSuffixForCollection(v string, negated bool, pageCount int, cp *[]int) error {
   490  	// must be #- ... select all pages from here until the end.
   491  	// or !#- ... deselect all pages from here until the end.
   492  
   493  	i, err := strconv.Atoi(v)
   494  	if err != nil {
   495  		return err
   496  	}
   497  
   498  	// Handle overflow gracefully
   499  	if i > pageCount {
   500  		return nil
   501  	}
   502  
   503  	for j := i; j <= pageCount; j++ {
   504  		processPageForCollection(cp, negated, j)
   505  	}
   506  
   507  	return nil
   508  }
   509  
   510  func handleSpecificPageOrLastXPagesForCollection(s string, negated bool, pageCount int, cp *[]int) error {
   511  	// l
   512  	if s == "l" {
   513  		processPageForCollection(cp, negated, pageCount)
   514  		return nil
   515  	}
   516  
   517  	// l-#
   518  	if strings.HasPrefix(s, "l-") {
   519  		pr := strings.Split(s[2:], "-")
   520  		i, err := strconv.Atoi(pr[0])
   521  		if err != nil {
   522  			return err
   523  		}
   524  		if pageCount-i < 1 {
   525  			return nil
   526  		}
   527  		j := pageCount - i
   528  
   529  		// l-#-
   530  		if strings.HasSuffix(s, "-") {
   531  			j = pageCount
   532  		}
   533  		for i := pageCount - i; i <= j; i++ {
   534  			processPageForCollection(cp, negated, i)
   535  		}
   536  		return nil
   537  	}
   538  
   539  	// must be # ... select a specific page
   540  	// or !# ... deselect a specific page
   541  	i, err := strconv.Atoi(s)
   542  	if err != nil {
   543  		return err
   544  	}
   545  
   546  	// Handle overflow gracefully
   547  	if i > pageCount {
   548  		return nil
   549  	}
   550  
   551  	processPageForCollection(cp, negated, i)
   552  
   553  	return nil
   554  }
   555  
   556  func parsePageRangeForCollection(pr []string, pageCount int, negated bool, cp *[]int) error {
   557  	from, err := strconv.Atoi(pr[0])
   558  	if err != nil {
   559  		return err
   560  	}
   561  
   562  	// Handle overflow gracefully
   563  	if from > pageCount {
   564  		return nil
   565  	}
   566  
   567  	var thru int
   568  	if pr[1] == "l" {
   569  		// #-l
   570  		thru = pageCount
   571  		if len(pr) == 3 {
   572  			// #-l-#
   573  			i, err := strconv.Atoi(pr[2])
   574  			if err != nil {
   575  				return err
   576  			}
   577  			thru -= i
   578  		}
   579  	} else {
   580  		// #-#
   581  		var err error
   582  		thru, err = strconv.Atoi(pr[1])
   583  		if err != nil {
   584  			return err
   585  		}
   586  	}
   587  
   588  	// Handle overflow gracefully
   589  	if thru < from {
   590  		return nil
   591  	}
   592  
   593  	if thru > pageCount {
   594  		thru = pageCount
   595  	}
   596  
   597  	for i := from; i <= thru; i++ {
   598  		processPageForCollection(cp, negated, i)
   599  	}
   600  
   601  	return nil
   602  }
   603  
   604  func calcPagesForPageCollection(pageCount int, pageSelection []string) ([]int, error) {
   605  	collectedPages := []int{}
   606  
   607  	for _, v := range pageSelection {
   608  
   609  		if v == "even" {
   610  			collectEvenPages(&collectedPages, pageCount)
   611  			continue
   612  		}
   613  
   614  		if v == "odd" {
   615  			collectOddPages(&collectedPages, pageCount)
   616  			continue
   617  		}
   618  
   619  		var negated bool
   620  		if negation(v[0]) {
   621  			negated = true
   622  			//logInfoAPI.Printf("is a negated exp\n")
   623  			v = v[1:]
   624  		}
   625  
   626  		// -#
   627  		if v[0] == '-' {
   628  
   629  			v = v[1:]
   630  
   631  			if err := handlePrefixForCollection(v, negated, pageCount, &collectedPages); err != nil {
   632  				return nil, err
   633  			}
   634  
   635  			continue
   636  		}
   637  
   638  		// #-
   639  		if v[0] != 'l' && strings.HasSuffix(v, "-") {
   640  
   641  			if err := handleSuffixForCollection(v[:len(v)-1], negated, pageCount, &collectedPages); err != nil {
   642  				return nil, err
   643  			}
   644  
   645  			continue
   646  		}
   647  
   648  		// l l-# l-#-
   649  		if v[0] == 'l' {
   650  			if err := handleSpecificPageOrLastXPagesForCollection(v, negated, pageCount, &collectedPages); err != nil {
   651  				return nil, err
   652  			}
   653  			continue
   654  		}
   655  
   656  		pr := strings.Split(v, "-")
   657  		if len(pr) >= 2 {
   658  			// v contains '-' somewhere in the middle
   659  			// #-# #-l #-l-#
   660  			if err := parsePageRangeForCollection(pr, pageCount, negated, &collectedPages); err != nil {
   661  				return nil, err
   662  			}
   663  
   664  			continue
   665  		}
   666  
   667  		// #
   668  		if err := handleSpecificPageOrLastXPagesForCollection(pr[0], negated, pageCount, &collectedPages); err != nil {
   669  			return nil, err
   670  		}
   671  	}
   672  
   673  	return collectedPages, nil
   674  }
   675  
   676  // PagesForPageCollection returns a slice of page numbers for a page collection.
   677  // Any page number in any order any number of times allowed.
   678  func PagesForPageCollection(pageCount int, pageSelection []string) ([]int, error) {
   679  	collectedPages, err := calcPagesForPageCollection(pageCount, pageSelection)
   680  	if err != nil {
   681  		return nil, err
   682  	}
   683  
   684  	if len(collectedPages) == 0 {
   685  		return nil, errors.Errorf("pdfcpu: no page selected")
   686  	}
   687  
   688  	return collectedPages, nil
   689  }
   690  
   691  // PagesForPageRange returns a slice of page numbers for a page range.
   692  func PagesForPageRange(from, thru int) []int {
   693  	s := make([]int, thru-from+1)
   694  	for i := 0; i < len(s); i++ {
   695  		s[i] = from + i
   696  	}
   697  	return s
   698  }