github.com/masterhung0112/hk_server/v5@v5.0.0-20220302090640-ec71aef15e1c/services/docextractor/mmpreview.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package docextractor 5 6 // MMPreview is a micro-service to convert from any libreoffice supported 7 // format into a PDF file, and then we use the regular pdf extractor to convert 8 // it into plain text. 9 10 import ( 11 "bytes" 12 "io" 13 "io/ioutil" 14 "mime/multipart" 15 "net/http" 16 "path" 17 "strings" 18 19 "github.com/pkg/errors" 20 ) 21 22 type mmPreviewExtractor struct { 23 url string 24 secret string 25 pdfExtractor pdfExtractor 26 } 27 28 var mmpreviewSupportedExtensions = map[string]bool{ 29 "ppt": true, 30 "odp": true, 31 "xls": true, 32 "xlsx": true, 33 "ods": true, 34 } 35 36 func newMMPreviewExtractor(url string, secret string, pdfExtractor pdfExtractor) *mmPreviewExtractor { 37 return &mmPreviewExtractor{url: url, secret: secret, pdfExtractor: pdfExtractor} 38 } 39 40 func (mpe *mmPreviewExtractor) Match(filename string) bool { 41 extension := strings.TrimPrefix(path.Ext(filename), ".") 42 return mmpreviewSupportedExtensions[extension] 43 } 44 45 func (mpe *mmPreviewExtractor) Extract(filename string, file io.ReadSeeker) (string, error) { 46 b, w, err := createMultipartFormData("file", filename, file) 47 if err != nil { 48 return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.") 49 } 50 req, err := http.NewRequest("POST", mpe.url+"/toPDF", &b) 51 if err != nil { 52 return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.") 53 } 54 req.Header.Set("Content-Type", w.FormDataContentType()) 55 if mpe.secret != "" { 56 req.Header.Add("Authentication", mpe.secret) 57 } 58 resp, err := http.DefaultClient.Do(req) 59 if err != nil { 60 return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.") 61 } 62 defer resp.Body.Close() 63 if resp.StatusCode != 200 { 64 return "", errors.New("Unable to generate file preview using mmpreview (The server has replied with an error)") 65 } 66 data, err := ioutil.ReadAll(resp.Body) 67 if err != nil { 68 return "", errors.Wrap(err, "unable to read the response from mmpreview") 69 } 70 return mpe.pdfExtractor.Extract(filename, bytes.NewReader(data)) 71 } 72 73 func createMultipartFormData(fieldName, fileName string, fileData io.ReadSeeker) (bytes.Buffer, *multipart.Writer, error) { 74 var b bytes.Buffer 75 var err error 76 w := multipart.NewWriter(&b) 77 var fw io.Writer 78 if fw, err = w.CreateFormFile(fieldName, fileName); err != nil { 79 return b, nil, err 80 } 81 if _, err = io.Copy(fw, fileData); err != nil { 82 return b, nil, err 83 } 84 w.Close() 85 return b, w, nil 86 }