github.com/wangyougui/gf/v2@v2.6.5/text/gstr/gstr_similar.go (about) 1 // Copyright GoFrame Author(https://goframe.org). All Rights Reserved. 2 // 3 // This Source Code Form is subject to the terms of the MIT License. 4 // If a copy of the MIT was not distributed with this file, 5 // You can obtain one at https://github.com/wangyougui/gf. 6 7 package gstr 8 9 // Levenshtein calculates Levenshtein distance between two strings. 10 // costIns: Defines the cost of insertion. 11 // costRep: Defines the cost of replacement. 12 // costDel: Defines the cost of deletion. 13 // See http://php.net/manual/en/function.levenshtein.php. 14 func Levenshtein(str1, str2 string, costIns, costRep, costDel int) int { 15 var maxLen = 255 16 l1 := len(str1) 17 l2 := len(str2) 18 if l1 == 0 { 19 return l2 * costIns 20 } 21 if l2 == 0 { 22 return l1 * costDel 23 } 24 if l1 > maxLen || l2 > maxLen { 25 return -1 26 } 27 28 tmp := make([]int, l2+1) 29 p1 := make([]int, l2+1) 30 p2 := make([]int, l2+1) 31 var c0, c1, c2 int 32 var i1, i2 int 33 for i2 := 0; i2 <= l2; i2++ { 34 p1[i2] = i2 * costIns 35 } 36 for i1 = 0; i1 < l1; i1++ { 37 p2[0] = p1[0] + costDel 38 for i2 = 0; i2 < l2; i2++ { 39 if str1[i1] == str2[i2] { 40 c0 = p1[i2] 41 } else { 42 c0 = p1[i2] + costRep 43 } 44 c1 = p1[i2+1] + costDel 45 if c1 < c0 { 46 c0 = c1 47 } 48 c2 = p2[i2] + costIns 49 if c2 < c0 { 50 c0 = c2 51 } 52 p2[i2+1] = c0 53 } 54 tmp = p1 55 p1 = p2 56 p2 = tmp 57 } 58 c0 = p1[l2] 59 60 return c0 61 } 62 63 // SimilarText calculates the similarity between two strings. 64 // See http://php.net/manual/en/function.similar-text.php. 65 func SimilarText(first, second string, percent *float64) int { 66 var similarText func(string, string, int, int) int 67 similarText = func(str1, str2 string, len1, len2 int) int { 68 var sum, max int 69 pos1, pos2 := 0, 0 70 71 // Find the longest segment of the same section in two strings 72 for i := 0; i < len1; i++ { 73 for j := 0; j < len2; j++ { 74 for l := 0; (i+l < len1) && (j+l < len2) && (str1[i+l] == str2[j+l]); l++ { 75 if l+1 > max { 76 max = l + 1 77 pos1 = i 78 pos2 = j 79 } 80 } 81 } 82 } 83 84 if sum = max; sum > 0 { 85 if pos1 > 0 && pos2 > 0 { 86 sum += similarText(str1, str2, pos1, pos2) 87 } 88 if (pos1+max < len1) && (pos2+max < len2) { 89 s1 := []byte(str1) 90 s2 := []byte(str2) 91 sum += similarText(string(s1[pos1+max:]), string(s2[pos2+max:]), len1-pos1-max, len2-pos2-max) 92 } 93 } 94 95 return sum 96 } 97 98 l1, l2 := len(first), len(second) 99 if l1+l2 == 0 { 100 return 0 101 } 102 sim := similarText(first, second, l1, l2) 103 if percent != nil { 104 *percent = float64(sim*200) / float64(l1+l2) 105 } 106 return sim 107 } 108 109 // Soundex calculates the soundex key of a string. 110 // See http://php.net/manual/en/function.soundex.php. 111 func Soundex(str string) string { 112 if str == "" { 113 panic("str: cannot be an empty string") 114 } 115 table := [26]rune{ 116 '0', '1', '2', '3', // A, B, C, D 117 '0', '1', '2', // E, F, G 118 '0', // H 119 '0', '2', '2', '4', '5', '5', // I, J, K, L, M, N 120 '0', '1', '2', '6', '2', '3', // O, P, Q, R, S, T 121 '0', '1', // U, V 122 '0', '2', // W, X 123 '0', '2', // Y, Z 124 } 125 last, code, small := -1, 0, 0 126 sd := make([]rune, 4) 127 // build soundex string 128 for i := 0; i < len(str) && small < 4; i++ { 129 // ToUpper 130 char := str[i] 131 if char < '\u007F' && 'a' <= char && char <= 'z' { 132 code = int(char - 'a' + 'A') 133 } else { 134 code = int(char) 135 } 136 if code >= 'A' && code <= 'Z' { 137 if small == 0 { 138 sd[small] = rune(code) 139 small++ 140 last = int(table[code-'A']) 141 } else { 142 code = int(table[code-'A']) 143 if code != last { 144 if code != 0 { 145 sd[small] = rune(code) 146 small++ 147 } 148 last = code 149 } 150 } 151 } 152 } 153 // pad with "0" 154 for ; small < 4; small++ { 155 sd[small] = '0' 156 } 157 return string(sd) 158 }