github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/text/levenshtein/word/type-specific.go (about) 1 // Package levenshtein/word does tokenization on word level. 2 package word 3 4 import ( 5 "sort" 6 7 "github.com/pbberlin/tools/stringspb" 8 ls_core "github.com/pbberlin/tools/text/levenshtein" 9 ) 10 11 type Token string // we could use []rune instead of string 12 13 func (tk1 Token) Equal(compareTo interface{}) bool { 14 tk2, ok := compareTo.(Token) 15 if !ok { 16 panic("Not the same type") 17 } 18 return tk1 == tk2 19 } 20 21 // WrapAsEqualer breaks string into a slice of strings. 22 // Each string is then converted to <Token> to <Equaler>. 23 // []<Equaler> can then be pumped into the generic core. 24 // We could as well create slices of Equalers in the first place 25 // but first leads to a var farTooUglyLiteral = 26 // []ls_core.Equaler{ls_core.Equaler(Token("trink")), ls_core.Equaler(Token("nicht"))} 27 func WrapAsEqualer(s string, sorted bool) []ls_core.Equaler { 28 29 ss := stringspb.SplitByWhitespace(s) 30 if sorted { 31 sort.Strings(ss) 32 33 // weed out doublettes 34 su, prev := make([]string, 0, len(ss)), "" 35 for _, v := range ss { 36 if v == prev { 37 continue 38 } 39 su = append(su, v) 40 prev = v 41 } 42 ss = su 43 44 } 45 46 ret := make([]ls_core.Equaler, 0, len(ss)) 47 for _, v := range ss { 48 cnv := ls_core.Equaler(Token(v)) 49 ret = append(ret, cnv) 50 } 51 return ret 52 }