github.com/haalcala/mattermost-server-change-repo@v0.0.0-20210713015153-16753fbeee5f/services/docextractor/plain.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package docextractor
     5  
     6  import (
     7  	"io"
     8  	"io/ioutil"
     9  	"unicode"
    10  	"unicode/utf8"
    11  )
    12  
    13  type plainExtractor struct{}
    14  
    15  func (pe *plainExtractor) Match(filename string) bool {
    16  	return true
    17  }
    18  
    19  func (pe *plainExtractor) Extract(filename string, r io.Reader) (string, error) {
    20  	// This detects any visible character plus any whitespace
    21  	validRanges := append(unicode.GraphicRanges, unicode.White_Space)
    22  
    23  	text, _ := ioutil.ReadAll(r)
    24  	count := 0
    25  	for {
    26  		c, size := utf8.DecodeRune(text[count:])
    27  		if !unicode.In(c, validRanges...) {
    28  			return "", nil
    29  		}
    30  		if size == 0 {
    31  			break
    32  		}
    33  		count += size
    34  		if count > 1024 {
    35  			break
    36  		}
    37  	}
    38  
    39  	return string(text), nil
    40  }