github.com/haalcala/mattermost-server-change-repo@v0.0.0-20210713015153-16753fbeee5f/services/docextractor/pdf.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package docextractor 5 6 import ( 7 "bytes" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "os" 12 "path" 13 "strings" 14 15 "github.com/ledongthuc/pdf" 16 ) 17 18 type pdfExtractor struct{} 19 20 func (pe *pdfExtractor) Match(filename string) bool { 21 supportedExtensions := map[string]bool{ 22 "pdf": true, 23 } 24 extension := strings.TrimPrefix(path.Ext(filename), ".") 25 return supportedExtensions[extension] 26 } 27 28 func (pe *pdfExtractor) Extract(filename string, r io.Reader) (string, error) { 29 f, err := ioutil.TempFile(os.TempDir(), "pdflib") 30 if err != nil { 31 return "", fmt.Errorf("error creating temporary file: %v", err) 32 } 33 defer f.Close() 34 defer os.Remove(f.Name()) 35 size, err := io.Copy(f, r) 36 if err != nil { 37 return "", fmt.Errorf("error copying data into temporary file: %v", err) 38 } 39 40 reader, err := pdf.NewReader(f, size) 41 if err != nil { 42 return "", err 43 } 44 45 var buf bytes.Buffer 46 b, err := reader.GetPlainText() 47 if err != nil { 48 return "", err 49 } 50 buf.ReadFrom(b) 51 return buf.String(), nil 52 }