github.com/dolthub/go-mysql-server@v0.18.0/internal/similartext/similartext.go (about)

     1  // Copyright 2020-2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package similartext
    16  
    17  import (
    18  	"fmt"
    19  	"reflect"
    20  	"strings"
    21  )
    22  
    23  func min(a, b int) int {
    24  	if a < b {
    25  		return a
    26  	}
    27  	return b
    28  }
    29  
    30  // DistanceForStrings returns the edit distance between source and target.
    31  // It has a runtime proportional to len(source) * len(target) and memory use
    32  // proportional to len(target).
    33  // Taken (simplified, for strings and with default options) from:
    34  // https://github.com/texttheater/golang-levenshtein
    35  func distanceForStrings(source, target string) int {
    36  	height := len(source) + 1
    37  	width := len(target) + 1
    38  	matrix := make([][]int, 2)
    39  
    40  	for i := 0; i < 2; i++ {
    41  		matrix[i] = make([]int, width)
    42  		matrix[i][0] = i
    43  	}
    44  	for j := 1; j < width; j++ {
    45  		matrix[0][j] = j
    46  	}
    47  
    48  	for i := 1; i < height; i++ {
    49  		cur := matrix[i%2]
    50  		prev := matrix[(i-1)%2]
    51  		cur[0] = i
    52  		for j := 1; j < width; j++ {
    53  			delCost := prev[j] + 1
    54  			matchSubCost := prev[j-1]
    55  			if source[i-1] != target[j-1] {
    56  				matchSubCost += 2
    57  			}
    58  			insCost := cur[j-1] + 1
    59  			cur[j] = min(delCost, min(matchSubCost, insCost))
    60  		}
    61  	}
    62  	return matrix[(height-1)%2][width-1]
    63  }
    64  
    65  // MaxDistanceIgnored is the maximum Levenshtein distance from which
    66  // we won't consider a string similar at all and thus will be ignored.
    67  var DistanceSkipped = 3
    68  
    69  // Find returns a string with suggestions for name(s) in `names`
    70  // similar to the string `src` until a max distance of `DistanceSkipped`.
    71  func Find(names []string, src string) string {
    72  	if len(src) == 0 {
    73  		return ""
    74  	}
    75  
    76  	minDistance := -1
    77  	matchMap := make(map[int][]string)
    78  
    79  	for _, name := range names {
    80  		dist := distanceForStrings(name, src)
    81  		if dist >= DistanceSkipped {
    82  			continue
    83  		}
    84  
    85  		if minDistance == -1 || dist < minDistance {
    86  			minDistance = dist
    87  		}
    88  
    89  		matchMap[dist] = append(matchMap[dist], name)
    90  	}
    91  
    92  	if len(matchMap) == 0 {
    93  		return ""
    94  	}
    95  
    96  	return fmt.Sprintf(", maybe you mean %s?",
    97  		strings.Join(matchMap[minDistance], " or "))
    98  }
    99  
   100  // FindFromMap does the same as Find but taking a map instead
   101  // of a string array as first argument.
   102  func FindFromMap(names interface{}, src string) string {
   103  	rnames := reflect.ValueOf(names)
   104  	if rnames.Kind() != reflect.Map {
   105  		panic("Implementation error: non map used as first argument " +
   106  			"to FindFromMap")
   107  	}
   108  
   109  	t := rnames.Type()
   110  	if t.Key().Kind() != reflect.String {
   111  		panic("Implementation error: non string key for map used as " +
   112  			"first argument to FindFromMap")
   113  	}
   114  
   115  	var namesList []string
   116  	for _, kv := range rnames.MapKeys() {
   117  		namesList = append(namesList, kv.String())
   118  	}
   119  
   120  	return Find(namesList, src)
   121  }