github.com/pkumar631/talisman@v0.3.2/detector/word_check.go (about) 1 package detector 2 3 import ( 4 "bufio" 5 "strings" 6 "os" 7 8 log "github.com/Sirupsen/logrus" 9 ) 10 11 type WordCheck struct { 12 } 13 14 const AVERAGE_LENGTH_OF_WORDS_IN_ENGLISH = 5 //See http://bit.ly/2qYFzFf for reference 15 const UNIX_WORDS_PATH = "/usr/share/dict/words" //See https://en.wikipedia.org/wiki/Words_(Unix) for reference 16 const UNIX_WORDS_ALTERNATIVE_PATH = "/usr/dict/words" //See https://en.wikipedia.org/wiki/Words_(Unix) for reference 17 18 func (en *WordCheck) containsWordsOnly(text string) bool { 19 text = strings.ToLower(text) 20 file := &os.File{} 21 defer file.Close() 22 reader := getWordsFileReader(file, UNIX_WORDS_PATH, UNIX_WORDS_ALTERNATIVE_PATH) 23 if reader == nil { 24 return false 25 } 26 wordCount := howManyWordsExistInText(reader, text) 27 if wordCount >= (len(text) / (AVERAGE_LENGTH_OF_WORDS_IN_ENGLISH)) { 28 return true 29 } 30 return false 31 } 32 33 func getWordsFileReader(file *os.File, filePaths... string) *bufio.Reader { 34 for _, filePath := range filePaths { 35 var err error = nil 36 file, err = os.Open(filePath) 37 if err != nil { 38 continue 39 } 40 return bufio.NewReader(file) 41 } 42 return nil 43 } 44 45 func howManyWordsExistInText(reader *bufio.Reader, text string) int { 46 wordCount := 0 47 for { 48 word, err := reader.ReadString('\n') 49 word = strings.Trim(word, "\n") 50 51 if word != "" && len(word) > 2 && strings.Contains(text, word) { 52 text = strings.Replace(text, word, "", 1) //already matched 53 wordCount++ 54 } 55 56 if err != nil { //EOF 57 log.Debugf("[WordChecker]: Found %d words", wordCount) 58 break 59 } 60 } 61 return wordCount 62 }