github.com/haalcala/mattermost-server-change-repo@v0.0.0-20210713015153-16753fbeee5f/services/docextractor/documents.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package docextractor
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"os"
    12  	"path"
    13  	"strings"
    14  
    15  	"code.sajari.com/docconv"
    16  )
    17  
    18  type documentExtractor struct{}
    19  
    20  var doconvConverterByExtensions = map[string]func(io.Reader) (string, map[string]string, error){
    21  	"doc":   docconv.ConvertDoc,
    22  	"docx":  docconv.ConvertDocx,
    23  	"pptx":  docconv.ConvertPptx,
    24  	"odt":   docconv.ConvertODT,
    25  	"html":  func(r io.Reader) (string, map[string]string, error) { return docconv.ConvertHTML(r, true) },
    26  	"pages": docconv.ConvertPages,
    27  	"rtf":   docconv.ConvertRTF,
    28  }
    29  
    30  func (de *documentExtractor) Match(filename string) bool {
    31  	extension := strings.TrimPrefix(path.Ext(filename), ".")
    32  	_, ok := doconvConverterByExtensions[extension]
    33  	return ok
    34  }
    35  
    36  func (de *documentExtractor) Extract(filename string, r io.Reader) (string, error) {
    37  	extension := strings.TrimPrefix(path.Ext(filename), ".")
    38  	converter, ok := doconvConverterByExtensions[extension]
    39  	if !ok {
    40  		return "", errors.New("Unknown converter")
    41  	}
    42  
    43  	f, err := ioutil.TempFile(os.TempDir(), "docconv")
    44  	if err != nil {
    45  		return "", fmt.Errorf("error creating temporary file: %v", err)
    46  	}
    47  	defer f.Close()
    48  	defer os.Remove(f.Name())
    49  
    50  	_, err = io.Copy(f, r)
    51  	if err != nil {
    52  		return "", fmt.Errorf("error copying data into temporary file: %v", err)
    53  	}
    54  
    55  	text, _, err := converter(f)
    56  	if err != nil {
    57  		return "", err
    58  	}
    59  
    60  	return text, nil
    61  }