github.com/seeker-insurance/kit@v0.0.13/address/compare.go (about)

     1  package address
     2  
     3  import (
     4  	"github.com/seeker-insurance/kit/str"
     5  	"github.com/seeker-insurance/kit/stringslice"
     6  )
     7  
     8  //TotalDistance returns the sum of the levenshtein distance of the components of the addresses.
     9  func TotalDistance(placeA, placeB *Address) (distance int) {
    10  	a, b := placeA.StringSlice(), placeB.StringSlice()
    11  	distance, _ = stringslice.TotalDistance(a, b)
    12  	return distance
    13  }
    14  
    15  //SharedComponentDistance returns the sum of the levenshtein distance of the shared components of the addresses
    16  func SharedComponentDistance(placeA, placeB *Address) (distance int) {
    17  	filteredA, filteredB := SharedComponents(*placeA, *placeB)
    18  	a, b := filteredA.StringSliceFromNonempty(), filteredB.StringSliceFromNonempty()
    19  	distance, _ = stringslice.TotalDistance(a, b)
    20  	return distance
    21  }
    22  
    23  //NormalizedTotalDistance Performs heavy normalization on both addresses, then compares the distance.
    24  func NormalizedTotalDistance(placeA, placeB *Address) (distance int) {
    25  	filteredA, filteredB := SharedComponents(*placeA, *placeB)
    26  	a, b := filteredA.StringSlice(), filteredB.StringSlice()
    27  	a, b = str.Map(normalizeComponent, a), str.Map(normalizeComponent, b)
    28  	distance, _ = stringslice.TotalDistance(a, b)
    29  	return distance
    30  }
    31  
    32  //NormalizedSharedComponentDistance performs heavy normalization on both addresses, then compares the distance.
    33  func NormalizedSharedComponentDistance(placeA, placeB *Address) (distance int) {
    34  	filteredA, filteredB := SharedComponents(*placeA, *placeB)
    35  	a, b := filteredA.StringSliceFromNonempty(), filteredB.StringSliceFromNonempty()
    36  	a, b = str.Map(normalizeComponent, a), str.Map(normalizeComponent, b)
    37  	distance, _ = stringslice.TotalDistance(a, b)
    38  	return distance
    39  }
    40  
    41  func NormalizedSharedComponentDistanceSlice(placeA, placeB Address) (distances []int) {
    42  	filteredA, filteredB := SharedComponents(placeA, placeB)
    43  	a, b := filteredA.StringSliceFromNonempty(), filteredB.StringSliceFromNonempty()
    44  	a, b = str.Map(normalizeComponent, a), str.Map(normalizeComponent, b)
    45  	distances = make([]int, len(a))
    46  	for i := range distances {
    47  		distances[i] = levenshteinDistance(a[i], b[i])
    48  	}
    49  	return distances
    50  }
    51  
    52  func NonOverlappingComponents(placeA, placeB Address) (uniqueToA, uniqueToB int) {
    53  	filteredA, filteredB := SharedComponents(placeA, placeB)
    54  	uniqueToA = len(placeA.StringSliceFromNonempty()) - len(filteredA.StringSliceFromNonempty())
    55  	uniqueToB = len(placeB.StringSliceFromNonempty()) - len(filteredB.StringSliceFromNonempty())
    56  	return uniqueToA, uniqueToB
    57  }
    58  
    59  // levenshteinDistance measures the difference between two strings.
    60  // The Levenshtein distance between two words is the minimum number of
    61  // single-character edits (i.e. insertions, deletions or substitutions)
    62  // required to change one word into the other.
    63  //
    64  // This implemention is optimized to use O(min(m,n)) space and is based on the
    65  // optimized C version found here:
    66  // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
    67  // This implentation Copyright (c) 2015 Peter Renström under the MIT license: https://github.com/renstrom/fuzzysearch/blob/master/LICENSE
    68  func levenshteinDistance(s, t string) int {
    69  	r1, r2 := []rune(s), []rune(t)
    70  	column := make([]int, len(r1)+1)
    71  
    72  	for y := 1; y <= len(r1); y++ {
    73  		column[y] = y
    74  	}
    75  
    76  	for x := 1; x <= len(r2); x++ {
    77  		column[0] = x
    78  
    79  		for y, lastDiag := 1, x-1; y <= len(r1); y++ {
    80  			oldDiag := column[y]
    81  			cost := 0
    82  			if r1[y-1] != r2[x-1] {
    83  				cost = 1
    84  			}
    85  			column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost)
    86  			lastDiag = oldDiag
    87  		}
    88  	}
    89  
    90  	return column[len(r1)]
    91  }
    92  
    93  func min(a int, ints ...int) int {
    94  	min := a
    95  	for _, n := range ints {
    96  		min = min2(min, n)
    97  	}
    98  	return min
    99  }
   100  
   101  func min2(a, b int) int {
   102  	if a < b {
   103  		return a
   104  	}
   105  	return b
   106  }