github.com/masterhung0112/hk_server/v5@v5.0.0-20220302090640-ec71aef15e1c/services/docextractor/docextractor.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package docextractor 5 6 import ( 7 "io" 8 ) 9 10 // ExtractSettings defines the features enabled/disable during the document text extraction. 11 type ExtractSettings struct { 12 ArchiveRecursion bool 13 MMPreviewURL string 14 MMPreviewSecret string 15 } 16 17 // Extract extract the text from a document using the system default extractors 18 func Extract(filename string, r io.ReadSeeker, settings ExtractSettings) (string, error) { 19 return ExtractWithExtraExtractors(filename, r, settings, []Extractor{}) 20 } 21 22 // ExtractWithExtraExtractors extract the text from a document using the provided extractors beside the system default extractors. 23 func ExtractWithExtraExtractors(filename string, r io.ReadSeeker, settings ExtractSettings, extraExtractors []Extractor) (string, error) { 24 enabledExtractors := &combineExtractor{} 25 for _, extraExtractor := range extraExtractors { 26 enabledExtractors.Add(extraExtractor) 27 } 28 enabledExtractors.Add(&documentExtractor{}) 29 enabledExtractors.Add(&pdfExtractor{}) 30 31 if settings.ArchiveRecursion { 32 enabledExtractors.Add(&archiveExtractor{SubExtractor: enabledExtractors}) 33 } else { 34 enabledExtractors.Add(&archiveExtractor{}) 35 } 36 37 if settings.MMPreviewURL != "" { 38 enabledExtractors.Add(newMMPreviewExtractor(settings.MMPreviewURL, settings.MMPreviewSecret, pdfExtractor{})) 39 } 40 enabledExtractors.Add(&plainExtractor{}) 41 42 if enabledExtractors.Match(filename) { 43 return enabledExtractors.Extract(filename, r) 44 } 45 return "", nil 46 }