github.com/haalcala/mattermost-server-change-repo@v0.0.0-20210713015153-16753fbeee5f/services/docextractor/archive.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package docextractor
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  
    14  	"github.com/mholt/archiver/v3"
    15  )
    16  
    17  type archiveExtractor struct {
    18  	SubExtractor Extractor
    19  }
    20  
    21  func (ae *archiveExtractor) Match(filename string) bool {
    22  	_, err := archiver.ByExtension(filename)
    23  	return err == nil
    24  }
    25  
    26  func (ae *archiveExtractor) Extract(name string, r io.Reader) (string, error) {
    27  	dir, err := ioutil.TempDir(os.TempDir(), "archiver")
    28  	if err != nil {
    29  		return "", fmt.Errorf("error creating temporary file: %v", err)
    30  	}
    31  	defer os.RemoveAll(dir)
    32  
    33  	f, err := os.Create(filepath.Join(dir, name))
    34  	if err != nil {
    35  		return "", fmt.Errorf("error copying data into temporary file: %v", err)
    36  	}
    37  	_, err = io.Copy(f, r)
    38  	f.Close()
    39  	if err != nil {
    40  		return "", fmt.Errorf("error copying data into temporary file: %v", err)
    41  	}
    42  
    43  	var text strings.Builder
    44  	err = archiver.Walk(f.Name(), func(file archiver.File) error {
    45  		text.WriteString(file.Name() + " ")
    46  		if ae.SubExtractor != nil {
    47  			filename := filepath.Base(file.Name())
    48  			subtext, extractErr := ae.SubExtractor.Extract(filename, file)
    49  			if extractErr == nil {
    50  				text.WriteString(subtext + " ")
    51  			}
    52  		}
    53  		return nil
    54  	})
    55  	if err != nil {
    56  		return "", err
    57  	}
    58  
    59  	return text.String(), nil
    60  }