github.com/haalcala/mattermost-server-change-repo@v0.0.0-20210713015153-16753fbeee5f/services/docextractor/mmpreview.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package docextractor 5 6 // MMPreview is a micro-service to convert from any libreoffice supported 7 // format into a PDF file, and then we use the regular pdf extractor to convert 8 // it into plain text. 9 10 import ( 11 "bytes" 12 "io" 13 "mime/multipart" 14 "net/http" 15 "path" 16 "strings" 17 18 "github.com/pkg/errors" 19 ) 20 21 type mmPreviewExtractor struct { 22 url string 23 secret string 24 pdfExtractor pdfExtractor 25 } 26 27 var mmpreviewSupportedExtensions = map[string]bool{ 28 "ppt": true, 29 "odp": true, 30 "xls": true, 31 "xlsx": true, 32 "ods": true, 33 } 34 35 func newMMPreviewExtractor(url string, secret string, pdfExtractor pdfExtractor) *mmPreviewExtractor { 36 return &mmPreviewExtractor{url: url, secret: secret, pdfExtractor: pdfExtractor} 37 } 38 39 func (mpe *mmPreviewExtractor) Match(filename string) bool { 40 extension := strings.TrimPrefix(path.Ext(filename), ".") 41 return mmpreviewSupportedExtensions[extension] 42 } 43 44 func (mpe *mmPreviewExtractor) Extract(filename string, file io.Reader) (string, error) { 45 b, w, err := createMultipartFormData("file", filename, file) 46 if err != nil { 47 return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.") 48 } 49 req, err := http.NewRequest("POST", mpe.url+"/toPDF", &b) 50 if err != nil { 51 return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.") 52 } 53 req.Header.Set("Content-Type", w.FormDataContentType()) 54 if mpe.secret != "" { 55 req.Header.Add("Authentication", mpe.secret) 56 } 57 resp, err := http.DefaultClient.Do(req) 58 if err != nil { 59 return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.") 60 } 61 defer resp.Body.Close() 62 if resp.StatusCode != 200 { 63 return "", errors.New("Unable to generate file preview using mmpreview (The server has replied with an error)") 64 } 65 return mpe.pdfExtractor.Extract(filename, resp.Body) 66 } 67 68 func createMultipartFormData(fieldName, fileName string, fileData io.Reader) (bytes.Buffer, *multipart.Writer, error) { 69 var b bytes.Buffer 70 var err error 71 w := multipart.NewWriter(&b) 72 var fw io.Writer 73 if fw, err = w.CreateFormFile(fieldName, fileName); err != nil { 74 return b, nil, err 75 } 76 if _, err = io.Copy(fw, fileData); err != nil { 77 return b, nil, err 78 } 79 w.Close() 80 return b, w, nil 81 }