github.com/pdfcpu/pdfcpu@v0.11.1/pkg/api/attach.go (about)

     1  /*
     2  	Copyright 2019 The pdfcpu Authors.
     3  
     4  	Licensed under the Apache License, Version 2.0 (the "License");
     5  	you may not use this file except in compliance with the License.
     6  	You may obtain a copy of the License at
     7  
     8  		http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  	Unless required by applicable law or agreed to in writing, software
    11  	distributed under the License is distributed on an "AS IS" BASIS,
    12  	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  	See the License for the specific language governing permissions and
    14  	limitations under the License.
    15  */
    16  
    17  package api
    18  
    19  import (
    20  	"io"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  
    25  	"github.com/pdfcpu/pdfcpu/pkg/log"
    26  	"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
    27  	"github.com/pkg/errors"
    28  )
    29  
    30  // Attachments returns rs's attachments.
    31  func Attachments(rs io.ReadSeeker, conf *model.Configuration) ([]model.Attachment, error) {
    32  	if rs == nil {
    33  		return nil, errors.New("pdfcpu: Attachments: missing rs")
    34  	}
    35  
    36  	if conf == nil {
    37  		conf = model.NewDefaultConfiguration()
    38  	}
    39  	conf.Cmd = model.LISTATTACHMENTS
    40  
    41  	ctx, err := ReadValidateAndOptimize(rs, conf)
    42  	if err != nil {
    43  		return nil, err
    44  	}
    45  
    46  	return ctx.ListAttachments()
    47  }
    48  
    49  // AddAttachments embeds files into a PDF context read from rs and writes the result to w.
    50  // file is either a file name or a file name and a description separated by a comma.
    51  func AddAttachments(rs io.ReadSeeker, w io.Writer, files []string, coll bool, conf *model.Configuration) error {
    52  	if rs == nil {
    53  		return errors.New("pdfcpu: AddAttachments: missing rs")
    54  	}
    55  
    56  	if w == nil {
    57  		return errors.New("pdfcpu: AddAttachments: missing w")
    58  	}
    59  
    60  	if conf == nil {
    61  		conf = model.NewDefaultConfiguration()
    62  	}
    63  	conf.Cmd = model.ADDATTACHMENTS
    64  
    65  	ctx, err := ReadValidateAndOptimize(rs, conf)
    66  	if err != nil {
    67  		return err
    68  	}
    69  
    70  	var ok bool
    71  
    72  	for _, fn := range files {
    73  		s := strings.Split(fn, ",")
    74  		if len(s) == 0 || len(s) > 2 {
    75  			continue
    76  		}
    77  
    78  		fileName := s[0]
    79  		desc := ""
    80  		if len(s) == 2 {
    81  			desc = s[1]
    82  		}
    83  
    84  		if log.CLIEnabled() {
    85  			log.CLI.Printf("adding %s\n", fileName)
    86  		}
    87  		f, err := os.Open(fileName)
    88  		if err != nil {
    89  			return err
    90  		}
    91  		defer f.Close()
    92  
    93  		fi, err := f.Stat()
    94  		if err != nil {
    95  			return err
    96  		}
    97  		mt := fi.ModTime()
    98  
    99  		a := model.Attachment{Reader: f, ID: filepath.Base(fileName), Desc: desc, ModTime: &mt}
   100  		if err = ctx.AddAttachment(a, coll); err != nil {
   101  			return err
   102  		}
   103  		ok = true
   104  	}
   105  
   106  	if !ok {
   107  		return errors.New("pdfcpu: AddAttachments: No attachment added")
   108  	}
   109  
   110  	return Write(ctx, w, conf)
   111  }
   112  
   113  // AddAttachmentsFile embeds files into a PDF context read from inFile and writes the result to outFile.
   114  func AddAttachmentsFile(inFile, outFile string, files []string, coll bool, conf *model.Configuration) (err error) {
   115  	var f1, f2 *os.File
   116  
   117  	if f1, err = os.Open(inFile); err != nil {
   118  		return err
   119  	}
   120  
   121  	tmpFile := inFile + ".tmp"
   122  	if outFile != "" && inFile != outFile {
   123  		tmpFile = outFile
   124  	}
   125  	if f2, err = os.Create(tmpFile); err != nil {
   126  		f1.Close()
   127  		return err
   128  	}
   129  
   130  	defer func() {
   131  		if err != nil {
   132  			f2.Close()
   133  			f1.Close()
   134  			os.Remove(tmpFile)
   135  			return
   136  		}
   137  		if err = f2.Close(); err != nil {
   138  			return
   139  		}
   140  		if err = f1.Close(); err != nil {
   141  			return
   142  		}
   143  		if outFile == "" || inFile == outFile {
   144  			err = os.Rename(tmpFile, inFile)
   145  		}
   146  	}()
   147  
   148  	return AddAttachments(f1, f2, files, coll, conf)
   149  }
   150  
   151  // RemoveAttachments deletes embedded files from a PDF context read from rs and writes the result to w.
   152  func RemoveAttachments(rs io.ReadSeeker, w io.Writer, files []string, conf *model.Configuration) error {
   153  	if rs == nil {
   154  		return errors.New("pdfcpu: RemoveAttachments: missing rs")
   155  	}
   156  
   157  	if w == nil {
   158  		return errors.New("pdfcpu: RemoveAttachments: missing w")
   159  	}
   160  
   161  	if conf == nil {
   162  		conf = model.NewDefaultConfiguration()
   163  	}
   164  	conf.Cmd = model.ADDATTACHMENTS
   165  
   166  	ctx, err := ReadValidateAndOptimize(rs, conf)
   167  	if err != nil {
   168  		return err
   169  	}
   170  
   171  	var ok bool
   172  	if ok, err = ctx.RemoveAttachments(files); err != nil {
   173  		return err
   174  	}
   175  	if !ok {
   176  		return errors.New("pdfcpu: RemoveAttachments: No attachment removed")
   177  	}
   178  
   179  	return Write(ctx, w, conf)
   180  }
   181  
   182  // RemoveAttachmentsFile deletes embedded files from a PDF context read from inFile and writes the result to outFile.
   183  func RemoveAttachmentsFile(inFile, outFile string, files []string, conf *model.Configuration) (err error) {
   184  	var f1, f2 *os.File
   185  
   186  	if f1, err = os.Open(inFile); err != nil {
   187  		return err
   188  	}
   189  
   190  	tmpFile := inFile + ".tmp"
   191  	if outFile != "" && inFile != outFile {
   192  		tmpFile = outFile
   193  	}
   194  	if f2, err = os.Create(tmpFile); err != nil {
   195  		f1.Close()
   196  		return err
   197  	}
   198  
   199  	defer func() {
   200  		if err != nil {
   201  			f2.Close()
   202  			f1.Close()
   203  			os.Remove(tmpFile)
   204  			return
   205  		}
   206  		if err = f2.Close(); err != nil {
   207  			return
   208  		}
   209  		if err = f1.Close(); err != nil {
   210  			return
   211  		}
   212  		if outFile == "" || inFile == outFile {
   213  			err = os.Rename(tmpFile, inFile)
   214  		}
   215  	}()
   216  
   217  	return RemoveAttachments(f1, f2, files, conf)
   218  }
   219  
   220  // ExtractAttachmentsRaw extracts embedded files from a PDF context read from rs.
   221  func ExtractAttachmentsRaw(rs io.ReadSeeker, outDir string, fileNames []string, conf *model.Configuration) ([]model.Attachment, error) {
   222  	if rs == nil {
   223  		return nil, errors.New("pdfcpu: ExtractAttachmentsRaw: missing rs")
   224  	}
   225  
   226  	if conf == nil {
   227  		conf = model.NewDefaultConfiguration()
   228  	}
   229  	conf.Cmd = model.EXTRACTATTACHMENTS
   230  
   231  	ctx, err := ReadAndValidate(rs, conf)
   232  	if err != nil {
   233  		return nil, err
   234  	}
   235  
   236  	return ctx.ExtractAttachments(fileNames)
   237  }
   238  
   239  func SanitizePath(path string) string {
   240  
   241  	// Do not process "'" and "..".
   242  
   243  	if path == "" || path == "." || path == ".." {
   244  		return "attachment"
   245  	}
   246  
   247  	path = strings.TrimPrefix(path, string(filepath.Separator))
   248  
   249  	parts := strings.Split(path, string(filepath.Separator))
   250  
   251  	cleanParts := []string{}
   252  	for i := 0; i < len(parts); i++ {
   253  		if parts[i] != "" && parts[i] != "." && parts[i] != ".." {
   254  			cleanParts = append(cleanParts, parts[i])
   255  			continue
   256  		}
   257  		if i == len(parts)-1 {
   258  			cleanParts = append(cleanParts, "attachment")
   259  		}
   260  	}
   261  
   262  	if len(cleanParts) == 0 {
   263  		return "attachment"
   264  	}
   265  
   266  	return filepath.Join(cleanParts...)
   267  }
   268  
   269  // ExtractAttachments extracts embedded files from a PDF context read from rs into outDir.
   270  func ExtractAttachments(rs io.ReadSeeker, outDir string, fileNames []string, conf *model.Configuration) error {
   271  	aa, err := ExtractAttachmentsRaw(rs, outDir, fileNames, conf)
   272  	if err != nil {
   273  		return err
   274  	}
   275  
   276  	for _, a := range aa {
   277  
   278  		fn := SanitizePath(a.FileName)
   279  		fileName := filepath.Join(outDir, fn)
   280  
   281  		f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm)
   282  		if err != nil {
   283  			fileName = filepath.Base(a.FileName)
   284  			f, err = os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm)
   285  			if err != nil {
   286  				return err
   287  			}
   288  		}
   289  		logWritingTo(fileName)
   290  		if _, err = io.Copy(f, a); err != nil {
   291  			return err
   292  		}
   293  		if err := f.Close(); err != nil {
   294  			return err
   295  		}
   296  	}
   297  
   298  	return nil
   299  }
   300  
   301  // ExtractAttachmentsFile extracts embedded files from a PDF context read from inFile into outDir.
   302  func ExtractAttachmentsFile(inFile, outDir string, files []string, conf *model.Configuration) error {
   303  	f, err := os.Open(inFile)
   304  	if err != nil {
   305  		return err
   306  	}
   307  	defer f.Close()
   308  
   309  	return ExtractAttachments(f, outDir, files, conf)
   310  }