github.com/masterhung0112/hk_server/v5@v5.0.0-20220302090640-ec71aef15e1c/services/docextractor/mmpreview.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package docextractor
     5  
     6  // MMPreview is a micro-service to convert from any libreoffice supported
     7  // format into a PDF file, and then we use the regular pdf extractor to convert
     8  // it into plain text.
     9  
    10  import (
    11  	"bytes"
    12  	"io"
    13  	"io/ioutil"
    14  	"mime/multipart"
    15  	"net/http"
    16  	"path"
    17  	"strings"
    18  
    19  	"github.com/pkg/errors"
    20  )
    21  
    22  type mmPreviewExtractor struct {
    23  	url          string
    24  	secret       string
    25  	pdfExtractor pdfExtractor
    26  }
    27  
    28  var mmpreviewSupportedExtensions = map[string]bool{
    29  	"ppt":  true,
    30  	"odp":  true,
    31  	"xls":  true,
    32  	"xlsx": true,
    33  	"ods":  true,
    34  }
    35  
    36  func newMMPreviewExtractor(url string, secret string, pdfExtractor pdfExtractor) *mmPreviewExtractor {
    37  	return &mmPreviewExtractor{url: url, secret: secret, pdfExtractor: pdfExtractor}
    38  }
    39  
    40  func (mpe *mmPreviewExtractor) Match(filename string) bool {
    41  	extension := strings.TrimPrefix(path.Ext(filename), ".")
    42  	return mmpreviewSupportedExtensions[extension]
    43  }
    44  
    45  func (mpe *mmPreviewExtractor) Extract(filename string, file io.ReadSeeker) (string, error) {
    46  	b, w, err := createMultipartFormData("file", filename, file)
    47  	if err != nil {
    48  		return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.")
    49  	}
    50  	req, err := http.NewRequest("POST", mpe.url+"/toPDF", &b)
    51  	if err != nil {
    52  		return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.")
    53  	}
    54  	req.Header.Set("Content-Type", w.FormDataContentType())
    55  	if mpe.secret != "" {
    56  		req.Header.Add("Authentication", mpe.secret)
    57  	}
    58  	resp, err := http.DefaultClient.Do(req)
    59  	if err != nil {
    60  		return "", errors.Wrap(err, "Unable to generate file preview using mmpreview.")
    61  	}
    62  	defer resp.Body.Close()
    63  	if resp.StatusCode != 200 {
    64  		return "", errors.New("Unable to generate file preview using mmpreview (The server has replied with an error)")
    65  	}
    66  	data, err := ioutil.ReadAll(resp.Body)
    67  	if err != nil {
    68  		return "", errors.Wrap(err, "unable to read the response from mmpreview")
    69  	}
    70  	return mpe.pdfExtractor.Extract(filename, bytes.NewReader(data))
    71  }
    72  
    73  func createMultipartFormData(fieldName, fileName string, fileData io.ReadSeeker) (bytes.Buffer, *multipart.Writer, error) {
    74  	var b bytes.Buffer
    75  	var err error
    76  	w := multipart.NewWriter(&b)
    77  	var fw io.Writer
    78  	if fw, err = w.CreateFormFile(fieldName, fileName); err != nil {
    79  		return b, nil, err
    80  	}
    81  	if _, err = io.Copy(fw, fileData); err != nil {
    82  		return b, nil, err
    83  	}
    84  	w.Close()
    85  	return b, w, nil
    86  }