github.com/pkumar631/talisman@v0.3.2/detector/word_check.go (about)

     1  package detector
     2  
     3  import (
     4  	"bufio"
     5  	"strings"
     6  	"os"
     7  
     8  	log "github.com/Sirupsen/logrus"
     9  )
    10  
    11  type WordCheck struct {
    12  }
    13  
    14  const AVERAGE_LENGTH_OF_WORDS_IN_ENGLISH = 5 //See http://bit.ly/2qYFzFf for reference
    15  const UNIX_WORDS_PATH = "/usr/share/dict/words" //See https://en.wikipedia.org/wiki/Words_(Unix) for reference
    16  const UNIX_WORDS_ALTERNATIVE_PATH = "/usr/dict/words" //See https://en.wikipedia.org/wiki/Words_(Unix) for reference
    17  
    18  func (en *WordCheck) containsWordsOnly(text string) bool {
    19  	text = strings.ToLower(text)
    20  	file := &os.File{}
    21  	defer file.Close()
    22  	reader := getWordsFileReader(file, UNIX_WORDS_PATH, UNIX_WORDS_ALTERNATIVE_PATH)
    23  	if reader == nil {
    24  		return false
    25  	}
    26  	wordCount := howManyWordsExistInText(reader, text)
    27  	if wordCount >= (len(text) / (AVERAGE_LENGTH_OF_WORDS_IN_ENGLISH)) {
    28  		return true
    29  	}
    30  	return false
    31  }
    32  
    33  func getWordsFileReader(file *os.File, filePaths... string) *bufio.Reader {
    34  	for _, filePath := range filePaths {
    35  		var err error = nil
    36  		file, err = os.Open(filePath)
    37  		if err != nil {
    38  			continue
    39  		}
    40  		return bufio.NewReader(file)
    41  	}
    42  	return nil
    43  }
    44  
    45  func howManyWordsExistInText(reader *bufio.Reader, text string) int {
    46  	wordCount := 0
    47  	for {
    48  		word, err := reader.ReadString('\n')
    49  		word = strings.Trim(word, "\n")
    50  
    51  		if word != "" && len(word) > 2 && strings.Contains(text, word) {
    52  			text = strings.Replace(text, word, "", 1) //already matched
    53  			wordCount++
    54  		}
    55  
    56  		if err != nil { //EOF
    57  			log.Debugf("[WordChecker]: Found %d words", wordCount)
    58  			break
    59  		}
    60  	}
    61  	return wordCount
    62  }