github.com/haalcala/mattermost-server-change-repo@v0.0.0-20210713015153-16753fbeee5f/services/docextractor/mmpreview.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package docextractor
     5  
     6  // MMPreview is a micro-service to convert from any libreoffice supported
     7  // format into a PDF file, and then we use the regular pdf extractor to convert
     8  // it into plain text.
     9  
    10  import (
    11  	"bytes"
    12  	"io"
    13  	"mime/multipart"
    14  	"net/http"
    15  	"path"
    16  	"strings"
    17  
    18  	"github.com/pkg/errors"
    19  )
    20  
    21  type mmPreviewExtractor struct {
    22  	url          string
    23  	secret       string
    24  	pdfExtractor pdfExtractor
    25  }
    26  
    27  var mmpreviewSupportedExtensions = map[string]bool{
    28  	"ppt":  true,
    29  	"odp":  true,
    30  	"xls":  true,
    31  	"xlsx": true,
    32  	"ods":  true,
    33  }
    34  
    35  func newMMPreviewExtractor(url string, secret string, pdfExtractor pdfExtractor) *mmPreviewExtractor {
    36  	return &mmPreviewExtractor{url: url, secret: secret, pdfExtractor: pdfExtractor}
    37  }
    38  
    39  func (mpe *mmPreviewExtractor) Match(filename string) bool {
    40  	extension := strings.TrimPrefix(path.Ext(filename), ".")
    41  	return mmpreviewSupportedExtensions[extension]
    42  }
    43  
    44  func (mpe *mmPreviewExtractor) Extract(filename string, file io.Reader) (string, error) {
    45  	b, w, err := createMultipartFormData("file", filename, file)
    46  	if err != nil {
    47  		return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.")
    48  	}
    49  	req, err := http.NewRequest("POST", mpe.url+"/toPDF", &b)
    50  	if err != nil {
    51  		return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.")
    52  	}
    53  	req.Header.Set("Content-Type", w.FormDataContentType())
    54  	if mpe.secret != "" {
    55  		req.Header.Add("Authentication", mpe.secret)
    56  	}
    57  	resp, err := http.DefaultClient.Do(req)
    58  	if err != nil {
    59  		return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.")
    60  	}
    61  	defer resp.Body.Close()
    62  	if resp.StatusCode != 200 {
    63  		return "", errors.New("Unable to generate file preview using mmpreview (The server has replied with an error)")
    64  	}
    65  	return mpe.pdfExtractor.Extract(filename, resp.Body)
    66  }
    67  
    68  func createMultipartFormData(fieldName, fileName string, fileData io.Reader) (bytes.Buffer, *multipart.Writer, error) {
    69  	var b bytes.Buffer
    70  	var err error
    71  	w := multipart.NewWriter(&b)
    72  	var fw io.Writer
    73  	if fw, err = w.CreateFormFile(fieldName, fileName); err != nil {
    74  		return b, nil, err
    75  	}
    76  	if _, err = io.Copy(fw, fileData); err != nil {
    77  		return b, nil, err
    78  	}
    79  	w.Close()
    80  	return b, w, nil
    81  }