github.com/masterhung0112/hk_server/v5@v5.0.0-20220302090640-ec71aef15e1c/services/docextractor/archive.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package docextractor
     5  
     6  import (
     7  	"bytes"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"os"
    12  	"path/filepath"
    13  	"strings"
    14  
    15  	"github.com/mholt/archiver/v3"
    16  )
    17  
    18  type archiveExtractor struct {
    19  	SubExtractor Extractor
    20  }
    21  
    22  func (ae *archiveExtractor) Match(filename string) bool {
    23  	_, err := archiver.ByExtension(filename)
    24  	return err == nil
    25  }
    26  
    27  func (ae *archiveExtractor) Extract(name string, r io.ReadSeeker) (string, error) {
    28  	dir, err := ioutil.TempDir(os.TempDir(), "archiver")
    29  	if err != nil {
    30  		return "", fmt.Errorf("error creating temporary file: %v", err)
    31  	}
    32  	defer os.RemoveAll(dir)
    33  
    34  	f, err := os.Create(filepath.Join(dir, name))
    35  	if err != nil {
    36  		return "", fmt.Errorf("error copying data into temporary file: %v", err)
    37  	}
    38  	_, err = io.Copy(f, r)
    39  	f.Close()
    40  	if err != nil {
    41  		return "", fmt.Errorf("error copying data into temporary file: %v", err)
    42  	}
    43  
    44  	var text strings.Builder
    45  	err = archiver.Walk(f.Name(), func(file archiver.File) error {
    46  		text.WriteString(file.Name() + " ")
    47  		if ae.SubExtractor != nil {
    48  			filename := filepath.Base(file.Name())
    49  			filename = strings.ReplaceAll(filename, "-", " ")
    50  			filename = strings.ReplaceAll(filename, ".", " ")
    51  			filename = strings.ReplaceAll(filename, ",", " ")
    52  			data, err2 := ioutil.ReadAll(file)
    53  			if err2 != nil {
    54  				return err2
    55  			}
    56  			subtext, extractErr := ae.SubExtractor.Extract(filename, bytes.NewReader(data))
    57  			if extractErr == nil {
    58  				text.WriteString(subtext + " ")
    59  			}
    60  		}
    61  		return nil
    62  	})
    63  	if err != nil {
    64  		return "", err
    65  	}
    66  
    67  	return text.String(), nil
    68  }