github.com/seeker-insurance/kit@v0.0.13/address/compare.go (about) 1 package address 2 3 import ( 4 "github.com/seeker-insurance/kit/str" 5 "github.com/seeker-insurance/kit/stringslice" 6 ) 7 8 //TotalDistance returns the sum of the levenshtein distance of the components of the addresses. 9 func TotalDistance(placeA, placeB *Address) (distance int) { 10 a, b := placeA.StringSlice(), placeB.StringSlice() 11 distance, _ = stringslice.TotalDistance(a, b) 12 return distance 13 } 14 15 //SharedComponentDistance returns the sum of the levenshtein distance of the shared components of the addresses 16 func SharedComponentDistance(placeA, placeB *Address) (distance int) { 17 filteredA, filteredB := SharedComponents(*placeA, *placeB) 18 a, b := filteredA.StringSliceFromNonempty(), filteredB.StringSliceFromNonempty() 19 distance, _ = stringslice.TotalDistance(a, b) 20 return distance 21 } 22 23 //NormalizedTotalDistance Performs heavy normalization on both addresses, then compares the distance. 24 func NormalizedTotalDistance(placeA, placeB *Address) (distance int) { 25 filteredA, filteredB := SharedComponents(*placeA, *placeB) 26 a, b := filteredA.StringSlice(), filteredB.StringSlice() 27 a, b = str.Map(normalizeComponent, a), str.Map(normalizeComponent, b) 28 distance, _ = stringslice.TotalDistance(a, b) 29 return distance 30 } 31 32 //NormalizedSharedComponentDistance performs heavy normalization on both addresses, then compares the distance. 33 func NormalizedSharedComponentDistance(placeA, placeB *Address) (distance int) { 34 filteredA, filteredB := SharedComponents(*placeA, *placeB) 35 a, b := filteredA.StringSliceFromNonempty(), filteredB.StringSliceFromNonempty() 36 a, b = str.Map(normalizeComponent, a), str.Map(normalizeComponent, b) 37 distance, _ = stringslice.TotalDistance(a, b) 38 return distance 39 } 40 41 func NormalizedSharedComponentDistanceSlice(placeA, placeB Address) (distances []int) { 42 filteredA, filteredB := SharedComponents(placeA, placeB) 43 a, b := filteredA.StringSliceFromNonempty(), filteredB.StringSliceFromNonempty() 44 a, b = str.Map(normalizeComponent, a), str.Map(normalizeComponent, b) 45 distances = make([]int, len(a)) 46 for i := range distances { 47 distances[i] = levenshteinDistance(a[i], b[i]) 48 } 49 return distances 50 } 51 52 func NonOverlappingComponents(placeA, placeB Address) (uniqueToA, uniqueToB int) { 53 filteredA, filteredB := SharedComponents(placeA, placeB) 54 uniqueToA = len(placeA.StringSliceFromNonempty()) - len(filteredA.StringSliceFromNonempty()) 55 uniqueToB = len(placeB.StringSliceFromNonempty()) - len(filteredB.StringSliceFromNonempty()) 56 return uniqueToA, uniqueToB 57 } 58 59 // levenshteinDistance measures the difference between two strings. 60 // The Levenshtein distance between two words is the minimum number of 61 // single-character edits (i.e. insertions, deletions or substitutions) 62 // required to change one word into the other. 63 // 64 // This implemention is optimized to use O(min(m,n)) space and is based on the 65 // optimized C version found here: 66 // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C 67 // This implentation Copyright (c) 2015 Peter Renström under the MIT license: https://github.com/renstrom/fuzzysearch/blob/master/LICENSE 68 func levenshteinDistance(s, t string) int { 69 r1, r2 := []rune(s), []rune(t) 70 column := make([]int, len(r1)+1) 71 72 for y := 1; y <= len(r1); y++ { 73 column[y] = y 74 } 75 76 for x := 1; x <= len(r2); x++ { 77 column[0] = x 78 79 for y, lastDiag := 1, x-1; y <= len(r1); y++ { 80 oldDiag := column[y] 81 cost := 0 82 if r1[y-1] != r2[x-1] { 83 cost = 1 84 } 85 column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost) 86 lastDiag = oldDiag 87 } 88 } 89 90 return column[len(r1)] 91 } 92 93 func min(a int, ints ...int) int { 94 min := a 95 for _, n := range ints { 96 min = min2(min, n) 97 } 98 return min 99 } 100 101 func min2(a, b int) int { 102 if a < b { 103 return a 104 } 105 return b 106 }