github.com/masterhung0112/hk_server/v5@v5.0.0-20220302090640-ec71aef15e1c/services/docextractor/archive.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package docextractor 5 6 import ( 7 "bytes" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "os" 12 "path/filepath" 13 "strings" 14 15 "github.com/mholt/archiver/v3" 16 ) 17 18 type archiveExtractor struct { 19 SubExtractor Extractor 20 } 21 22 func (ae *archiveExtractor) Match(filename string) bool { 23 _, err := archiver.ByExtension(filename) 24 return err == nil 25 } 26 27 func (ae *archiveExtractor) Extract(name string, r io.ReadSeeker) (string, error) { 28 dir, err := ioutil.TempDir(os.TempDir(), "archiver") 29 if err != nil { 30 return "", fmt.Errorf("error creating temporary file: %v", err) 31 } 32 defer os.RemoveAll(dir) 33 34 f, err := os.Create(filepath.Join(dir, name)) 35 if err != nil { 36 return "", fmt.Errorf("error copying data into temporary file: %v", err) 37 } 38 _, err = io.Copy(f, r) 39 f.Close() 40 if err != nil { 41 return "", fmt.Errorf("error copying data into temporary file: %v", err) 42 } 43 44 var text strings.Builder 45 err = archiver.Walk(f.Name(), func(file archiver.File) error { 46 text.WriteString(file.Name() + " ") 47 if ae.SubExtractor != nil { 48 filename := filepath.Base(file.Name()) 49 filename = strings.ReplaceAll(filename, "-", " ") 50 filename = strings.ReplaceAll(filename, ".", " ") 51 filename = strings.ReplaceAll(filename, ",", " ") 52 data, err2 := ioutil.ReadAll(file) 53 if err2 != nil { 54 return err2 55 } 56 subtext, extractErr := ae.SubExtractor.Extract(filename, bytes.NewReader(data)) 57 if extractErr == nil { 58 text.WriteString(subtext + " ") 59 } 60 } 61 return nil 62 }) 63 if err != nil { 64 return "", err 65 } 66 67 return text.String(), nil 68 }