github.com/pdfcpu/pdfcpu@v0.11.1/pkg/api/split.go (about)

     1  /*
     2  	Copyright 2020 The pdfcpu Authors.
     3  
     4  	Licensed under the Apache License, Version 2.0 (the "License");
     5  	you may not use this file except in compliance with the License.
     6  	You may obtain a copy of the License at
     7  
     8  		http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  	Unless required by applicable law or agreed to in writing, software
    11  	distributed under the License is distributed on an "AS IS" BASIS,
    12  	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  	See the License for the specific language governing permissions and
    14  	limitations under the License.
    15  */
    16  
    17  package api
    18  
    19  import (
    20  	"bytes"
    21  	"io"
    22  	"os"
    23  	"path/filepath"
    24  	"strconv"
    25  	"strings"
    26  
    27  	"github.com/pdfcpu/pdfcpu/pkg/log"
    28  	"github.com/pdfcpu/pdfcpu/pkg/pdfcpu"
    29  	"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
    30  	"github.com/pkg/errors"
    31  )
    32  
    33  type PageSpan struct {
    34  	From   int
    35  	Thru   int
    36  	Reader io.Reader
    37  }
    38  
    39  func pageSpan(ctx *model.Context, from, thru int) (*PageSpan, error) {
    40  	ctxNew, err := pdfcpu.ExtractPages(ctx, PagesForPageRange(from, thru), false)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  
    45  	var b bytes.Buffer
    46  	if err := WriteContext(ctxNew, &b); err != nil {
    47  		return nil, err
    48  	}
    49  
    50  	return &PageSpan{From: from, Thru: thru, Reader: &b}, nil
    51  }
    52  
    53  func spanFileName(fileName string, from, thru int) string {
    54  	baseFileName := filepath.Base(fileName)
    55  	fn := strings.TrimSuffix(baseFileName, ".pdf")
    56  	fn = fn + "_" + strconv.Itoa(from)
    57  	if from == thru {
    58  		return fn + ".pdf"
    59  	}
    60  	return fn + "-" + strconv.Itoa(thru) + ".pdf"
    61  }
    62  
    63  func splitOutPath(outDir, fileName string, forBookmark bool, from, thru int) string {
    64  	p := filepath.Join(outDir, fileName+".pdf")
    65  	if !forBookmark {
    66  		p = filepath.Join(outDir, spanFileName(fileName, from, thru))
    67  	}
    68  	return p
    69  }
    70  
    71  func writePageSpan(ctx *model.Context, from, thru int, outPath string) error {
    72  	ps, err := pageSpan(ctx, from, thru)
    73  	if err != nil {
    74  		return err
    75  	}
    76  	logWritingTo(outPath)
    77  	return pdfcpu.WriteReader(outPath, ps.Reader)
    78  }
    79  
    80  func context(rs io.ReadSeeker, conf *model.Configuration) (*model.Context, error) {
    81  	if conf == nil {
    82  		conf = model.NewDefaultConfiguration()
    83  	}
    84  	conf.Cmd = model.SPLIT
    85  
    86  	return ReadValidateAndOptimize(rs, conf)
    87  }
    88  
    89  func pageSpansSplitAlongBookmarks(ctx *model.Context) ([]*PageSpan, error) {
    90  	pss := []*PageSpan{}
    91  
    92  	bms, err := pdfcpu.Bookmarks(ctx)
    93  	if err != nil {
    94  		return nil, err
    95  	}
    96  
    97  	for _, bm := range bms {
    98  
    99  		from, thru := bm.PageFrom, bm.PageThru
   100  		if thru == 0 {
   101  			thru = ctx.PageCount
   102  		}
   103  
   104  		ps, err := pageSpan(ctx, from, thru)
   105  		if err != nil {
   106  			return nil, err
   107  		}
   108  		pss = append(pss, ps)
   109  
   110  	}
   111  
   112  	return pss, nil
   113  }
   114  
   115  func pageSpans(ctx *model.Context, span int) ([]*PageSpan, error) {
   116  	pss := []*PageSpan{}
   117  
   118  	for i := 0; i < ctx.PageCount/span; i++ {
   119  		start := i * span
   120  		from := start + 1
   121  		thru := start + span
   122  		ps, err := pageSpan(ctx, from, thru)
   123  		if err != nil {
   124  			return nil, err
   125  		}
   126  		pss = append(pss, ps)
   127  	}
   128  
   129  	// A possible last file has less than span pages.
   130  	if ctx.PageCount%span > 0 {
   131  		start := (ctx.PageCount / span) * span
   132  		from := start + 1
   133  		thru := ctx.PageCount
   134  		ps, err := pageSpan(ctx, from, thru)
   135  		if err != nil {
   136  			return nil, err
   137  		}
   138  		pss = append(pss, ps)
   139  	}
   140  
   141  	return pss, nil
   142  }
   143  
   144  func writePageSpans(ctx *model.Context, span int, outDir, fileName string) error {
   145  	forBookmark := false
   146  
   147  	for i := 0; i < ctx.PageCount/span; i++ {
   148  		start := i * span
   149  		from, thru := start+1, start+span
   150  		path := splitOutPath(outDir, fileName, forBookmark, from, thru)
   151  		if err := writePageSpan(ctx, from, thru, path); err != nil {
   152  			return err
   153  		}
   154  	}
   155  
   156  	// A possible last file has less than span pages.
   157  	if ctx.PageCount%span > 0 {
   158  		start := (ctx.PageCount / span) * span
   159  		from, thru := start+1, ctx.PageCount
   160  		path := splitOutPath(outDir, fileName, forBookmark, from, thru)
   161  		if err := writePageSpan(ctx, from, thru, path); err != nil {
   162  			return err
   163  		}
   164  	}
   165  
   166  	return nil
   167  }
   168  
   169  func writePageSpansSplitAlongBookmarks(ctx *model.Context, outDir string) error {
   170  	forBookmark := true
   171  
   172  	bms, err := pdfcpu.Bookmarks(ctx)
   173  	if err != nil {
   174  		return err
   175  	}
   176  
   177  	for _, bm := range bms {
   178  		fileName := strings.Replace(bm.Title, " ", "_", -1)
   179  		from, thru := bm.PageFrom, bm.PageThru
   180  		if thru == 0 {
   181  			thru = ctx.PageCount
   182  		}
   183  		path := splitOutPath(outDir, fileName, forBookmark, from, thru)
   184  		if err := writePageSpan(ctx, from, thru, path); err != nil {
   185  			return err
   186  		}
   187  	}
   188  
   189  	return nil
   190  }
   191  
   192  func writePageSpansSplitAlongPages(ctx *model.Context, pageNrs []int, outDir, fileName string) error {
   193  	// pageNumbers is a a sorted sequence of page numbers.
   194  	forBookmark := false
   195  	from, thru := 1, 0
   196  
   197  	if len(pageNrs) < 1 {
   198  		return errors.New("pdfcpu: split along pageNrs - missing pageNrs")
   199  	}
   200  
   201  	if pageNrs[0] > ctx.PageCount {
   202  		return errors.New("pdfcpu: split along pageNrs - invalid page number sequence.")
   203  	}
   204  
   205  	for i := 0; i < len(pageNrs); i++ {
   206  		thru = pageNrs[i] - 1
   207  		if thru >= ctx.PageCount {
   208  			break
   209  		}
   210  		path := splitOutPath(outDir, fileName, forBookmark, from, thru)
   211  		if err := writePageSpan(ctx, from, thru, path); err != nil {
   212  			return err
   213  		}
   214  		from = thru + 1
   215  	}
   216  
   217  	thru = ctx.PageCount
   218  	path := splitOutPath(outDir, fileName, forBookmark, from, thru)
   219  	return writePageSpan(ctx, from, thru, path)
   220  }
   221  
   222  // SplitRaw returns page spans for the PDF stream read from rs obeying given split span.
   223  // If span == 1 splitting results in single page PDFs.
   224  // If span == 0 we split along given bookmarks (level 1 only).
   225  // Default span: 1
   226  func SplitRaw(rs io.ReadSeeker, span int, conf *model.Configuration) ([]*PageSpan, error) {
   227  	if rs == nil {
   228  		return nil, errors.New("pdfcpu: SplitRaw: missing rs")
   229  	}
   230  
   231  	ctx, err := context(rs, conf)
   232  	if err != nil {
   233  		return nil, err
   234  	}
   235  
   236  	if span == 0 {
   237  		return pageSpansSplitAlongBookmarks(ctx)
   238  	}
   239  	return pageSpans(ctx, span)
   240  }
   241  
   242  // Split generates a sequence of PDF files in outDir for the PDF stream read from rs obeying given split span.
   243  // If span == 1 splitting results in single page PDFs.
   244  // If span == 0 we split along given bookmarks (level 1 only).
   245  // Default span: 1
   246  func Split(rs io.ReadSeeker, outDir, fileName string, span int, conf *model.Configuration) error {
   247  	if rs == nil {
   248  		return errors.New("pdfcpu: Split: missing rs")
   249  	}
   250  
   251  	ctx, err := context(rs, conf)
   252  	if err != nil {
   253  		return err
   254  	}
   255  
   256  	if span == 0 {
   257  		return writePageSpansSplitAlongBookmarks(ctx, outDir)
   258  	}
   259  	return writePageSpans(ctx, span, outDir, fileName)
   260  }
   261  
   262  // SplitFile generates a sequence of PDF files in outDir for inFile obeying given split span.
   263  // If span == 1 splitting results in single page PDFs.
   264  // If span == 0 we split along given bookmarks (level 1 only).
   265  // Default span: 1
   266  func SplitFile(inFile, outDir string, span int, conf *model.Configuration) error {
   267  	f, err := os.Open(inFile)
   268  	if err != nil {
   269  		return err
   270  	}
   271  	if log.CLIEnabled() {
   272  		log.CLI.Printf("splitting %s to %s/...\n", inFile, outDir)
   273  	}
   274  
   275  	defer func() {
   276  		if err != nil {
   277  			f.Close()
   278  			return
   279  		}
   280  		err = f.Close()
   281  	}()
   282  
   283  	return Split(f, outDir, filepath.Base(inFile), span, conf)
   284  }
   285  
   286  // SplitFile generates a sequence of PDF files in outDir for rs splitting along pageNrs.
   287  func SplitByPageNr(rs io.ReadSeeker, outDir, fileName string, pageNrs []int, conf *model.Configuration) error {
   288  	if rs == nil {
   289  		return errors.New("pdfcpu: SplitByPageNr: missing rs")
   290  	}
   291  
   292  	ctx, err := context(rs, conf)
   293  	if err != nil {
   294  		return err
   295  	}
   296  
   297  	return writePageSpansSplitAlongPages(ctx, pageNrs, outDir, fileName)
   298  }
   299  
   300  // SplitFile generates a sequence of PDF files in outDir for inFile splitting it along pageNrs.
   301  func SplitByPageNrFile(inFile, outDir string, pageNrs []int, conf *model.Configuration) error {
   302  	f, err := os.Open(inFile)
   303  	if err != nil {
   304  		return err
   305  	}
   306  	if log.CLIEnabled() {
   307  		log.CLI.Printf("splitting %s to %s/...\n", inFile, outDir)
   308  	}
   309  
   310  	defer func() {
   311  		if err != nil {
   312  			f.Close()
   313  			return
   314  		}
   315  		err = f.Close()
   316  	}()
   317  
   318  	return SplitByPageNr(f, outDir, filepath.Base(inFile), pageNrs, conf)
   319  }