github.com/masterhung0112/hk_server/v5@v5.0.0-20220302090640-ec71aef15e1c/services/docextractor/docextractor.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package docextractor
     5  
     6  import (
     7  	"io"
     8  )
     9  
    10  // ExtractSettings defines the features enabled/disable during the document text extraction.
    11  type ExtractSettings struct {
    12  	ArchiveRecursion bool
    13  	MMPreviewURL     string
    14  	MMPreviewSecret  string
    15  }
    16  
    17  // Extract extract the text from a document using the system default extractors
    18  func Extract(filename string, r io.ReadSeeker, settings ExtractSettings) (string, error) {
    19  	return ExtractWithExtraExtractors(filename, r, settings, []Extractor{})
    20  }
    21  
    22  // ExtractWithExtraExtractors extract the text from a document using the provided extractors beside the system default extractors.
    23  func ExtractWithExtraExtractors(filename string, r io.ReadSeeker, settings ExtractSettings, extraExtractors []Extractor) (string, error) {
    24  	enabledExtractors := &combineExtractor{}
    25  	for _, extraExtractor := range extraExtractors {
    26  		enabledExtractors.Add(extraExtractor)
    27  	}
    28  	enabledExtractors.Add(&documentExtractor{})
    29  	enabledExtractors.Add(&pdfExtractor{})
    30  
    31  	if settings.ArchiveRecursion {
    32  		enabledExtractors.Add(&archiveExtractor{SubExtractor: enabledExtractors})
    33  	} else {
    34  		enabledExtractors.Add(&archiveExtractor{})
    35  	}
    36  
    37  	if settings.MMPreviewURL != "" {
    38  		enabledExtractors.Add(newMMPreviewExtractor(settings.MMPreviewURL, settings.MMPreviewSecret, pdfExtractor{}))
    39  	}
    40  	enabledExtractors.Add(&plainExtractor{})
    41  
    42  	if enabledExtractors.Match(filename) {
    43  		return enabledExtractors.Extract(filename, r)
    44  	}
    45  	return "", nil
    46  }