github.com/haalcala/mattermost-server-change-repo@v0.0.0-20210713015153-16753fbeee5f/services/docextractor/plain.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package docextractor 5 6 import ( 7 "io" 8 "io/ioutil" 9 "unicode" 10 "unicode/utf8" 11 ) 12 13 type plainExtractor struct{} 14 15 func (pe *plainExtractor) Match(filename string) bool { 16 return true 17 } 18 19 func (pe *plainExtractor) Extract(filename string, r io.Reader) (string, error) { 20 // This detects any visible character plus any whitespace 21 validRanges := append(unicode.GraphicRanges, unicode.White_Space) 22 23 text, _ := ioutil.ReadAll(r) 24 count := 0 25 for { 26 c, size := utf8.DecodeRune(text[count:]) 27 if !unicode.In(c, validRanges...) { 28 return "", nil 29 } 30 if size == 0 { 31 break 32 } 33 count += size 34 if count > 1024 { 35 break 36 } 37 } 38 39 return string(text), nil 40 }