github.com/wangyougui/gf/v2@v2.6.5/text/gstr/gstr_similar.go (about)

     1  // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
     2  //
     3  // This Source Code Form is subject to the terms of the MIT License.
     4  // If a copy of the MIT was not distributed with this file,
     5  // You can obtain one at https://github.com/wangyougui/gf.
     6  
     7  package gstr
     8  
     9  // Levenshtein calculates Levenshtein distance between two strings.
    10  // costIns: Defines the cost of insertion.
    11  // costRep: Defines the cost of replacement.
    12  // costDel: Defines the cost of deletion.
    13  // See http://php.net/manual/en/function.levenshtein.php.
    14  func Levenshtein(str1, str2 string, costIns, costRep, costDel int) int {
    15  	var maxLen = 255
    16  	l1 := len(str1)
    17  	l2 := len(str2)
    18  	if l1 == 0 {
    19  		return l2 * costIns
    20  	}
    21  	if l2 == 0 {
    22  		return l1 * costDel
    23  	}
    24  	if l1 > maxLen || l2 > maxLen {
    25  		return -1
    26  	}
    27  
    28  	tmp := make([]int, l2+1)
    29  	p1 := make([]int, l2+1)
    30  	p2 := make([]int, l2+1)
    31  	var c0, c1, c2 int
    32  	var i1, i2 int
    33  	for i2 := 0; i2 <= l2; i2++ {
    34  		p1[i2] = i2 * costIns
    35  	}
    36  	for i1 = 0; i1 < l1; i1++ {
    37  		p2[0] = p1[0] + costDel
    38  		for i2 = 0; i2 < l2; i2++ {
    39  			if str1[i1] == str2[i2] {
    40  				c0 = p1[i2]
    41  			} else {
    42  				c0 = p1[i2] + costRep
    43  			}
    44  			c1 = p1[i2+1] + costDel
    45  			if c1 < c0 {
    46  				c0 = c1
    47  			}
    48  			c2 = p2[i2] + costIns
    49  			if c2 < c0 {
    50  				c0 = c2
    51  			}
    52  			p2[i2+1] = c0
    53  		}
    54  		tmp = p1
    55  		p1 = p2
    56  		p2 = tmp
    57  	}
    58  	c0 = p1[l2]
    59  
    60  	return c0
    61  }
    62  
    63  // SimilarText calculates the similarity between two strings.
    64  // See http://php.net/manual/en/function.similar-text.php.
    65  func SimilarText(first, second string, percent *float64) int {
    66  	var similarText func(string, string, int, int) int
    67  	similarText = func(str1, str2 string, len1, len2 int) int {
    68  		var sum, max int
    69  		pos1, pos2 := 0, 0
    70  
    71  		// Find the longest segment of the same section in two strings
    72  		for i := 0; i < len1; i++ {
    73  			for j := 0; j < len2; j++ {
    74  				for l := 0; (i+l < len1) && (j+l < len2) && (str1[i+l] == str2[j+l]); l++ {
    75  					if l+1 > max {
    76  						max = l + 1
    77  						pos1 = i
    78  						pos2 = j
    79  					}
    80  				}
    81  			}
    82  		}
    83  
    84  		if sum = max; sum > 0 {
    85  			if pos1 > 0 && pos2 > 0 {
    86  				sum += similarText(str1, str2, pos1, pos2)
    87  			}
    88  			if (pos1+max < len1) && (pos2+max < len2) {
    89  				s1 := []byte(str1)
    90  				s2 := []byte(str2)
    91  				sum += similarText(string(s1[pos1+max:]), string(s2[pos2+max:]), len1-pos1-max, len2-pos2-max)
    92  			}
    93  		}
    94  
    95  		return sum
    96  	}
    97  
    98  	l1, l2 := len(first), len(second)
    99  	if l1+l2 == 0 {
   100  		return 0
   101  	}
   102  	sim := similarText(first, second, l1, l2)
   103  	if percent != nil {
   104  		*percent = float64(sim*200) / float64(l1+l2)
   105  	}
   106  	return sim
   107  }
   108  
   109  // Soundex calculates the soundex key of a string.
   110  // See http://php.net/manual/en/function.soundex.php.
   111  func Soundex(str string) string {
   112  	if str == "" {
   113  		panic("str: cannot be an empty string")
   114  	}
   115  	table := [26]rune{
   116  		'0', '1', '2', '3', // A, B, C, D
   117  		'0', '1', '2', // E, F, G
   118  		'0',                          // H
   119  		'0', '2', '2', '4', '5', '5', // I, J, K, L, M, N
   120  		'0', '1', '2', '6', '2', '3', // O, P, Q, R, S, T
   121  		'0', '1', // U, V
   122  		'0', '2', // W, X
   123  		'0', '2', // Y, Z
   124  	}
   125  	last, code, small := -1, 0, 0
   126  	sd := make([]rune, 4)
   127  	// build soundex string
   128  	for i := 0; i < len(str) && small < 4; i++ {
   129  		// ToUpper
   130  		char := str[i]
   131  		if char < '\u007F' && 'a' <= char && char <= 'z' {
   132  			code = int(char - 'a' + 'A')
   133  		} else {
   134  			code = int(char)
   135  		}
   136  		if code >= 'A' && code <= 'Z' {
   137  			if small == 0 {
   138  				sd[small] = rune(code)
   139  				small++
   140  				last = int(table[code-'A'])
   141  			} else {
   142  				code = int(table[code-'A'])
   143  				if code != last {
   144  					if code != 0 {
   145  						sd[small] = rune(code)
   146  						small++
   147  					}
   148  					last = code
   149  				}
   150  			}
   151  		}
   152  	}
   153  	// pad with "0"
   154  	for ; small < 4; small++ {
   155  		sd[small] = '0'
   156  	}
   157  	return string(sd)
   158  }