github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/unique/unique.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package unique 12 13 import ( 14 "bytes" 15 "reflect" 16 "sort" 17 ) 18 19 // UniquifyByteSlices takes as input a slice of slices of bytes, and 20 // deduplicates them using a sort and unique. The output will not contain any 21 // duplicates but it will be sorted. 22 func UniquifyByteSlices(slices [][]byte) [][]byte { 23 if len(slices) == 0 { 24 return slices 25 } 26 // First sort: 27 sort.Slice(slices, func(i int, j int) bool { 28 return bytes.Compare(slices[i], slices[j]) < 0 29 }) 30 // Then distinct: (wouldn't it be nice if Go had generics?) 31 lastUniqueIdx := 0 32 for i := 1; i < len(slices); i++ { 33 if !bytes.Equal(slices[i], slices[lastUniqueIdx]) { 34 // We found a unique entry, at index i. The last unique entry in the array 35 // was at lastUniqueIdx, so set the entry after that one to our new unique 36 // entry, and bump lastUniqueIdx for the next loop iteration. 37 lastUniqueIdx++ 38 slices[lastUniqueIdx] = slices[i] 39 } 40 } 41 slices = slices[:lastUniqueIdx+1] 42 return slices 43 } 44 45 // UniquifyAcrossSlices removes elements from both slices that are duplicated 46 // across both of the slices. For example, inputs [1,2,3], [2,3,4] would remove 47 // 2 and 3 from both lists. 48 // It assumes that both slices are pre-sorted using the same comparison metric 49 // as cmpFunc provides, and also already free of duplicates internally. It 50 // returns the slices, which will have also been sorted as a side effect. 51 // cmpFunc compares the lth index of left to the rth index of right. It must 52 // return less than 0 if the left element is less than the right element, 0 if 53 // equal, and greater than 0 otherwise. 54 // setLeft sets the ith index of left to the jth index of left. 55 // setRight sets the ith index of right to the jth index of right. 56 // The function returns the new lengths of both input slices, whose elements 57 // will have been mutated, but whose lengths must be set the new lengths by 58 // the caller. 59 func UniquifyAcrossSlices( 60 left interface{}, 61 right interface{}, 62 cmpFunc func(l, r int) int, 63 setLeft func(i, j int), 64 setRight func(i, j int), 65 ) (leftLen, rightLen int) { 66 leftSlice := reflect.ValueOf(left) 67 rightSlice := reflect.ValueOf(right) 68 69 lLen := leftSlice.Len() 70 rLen := rightSlice.Len() 71 72 var lIn, lOut int 73 var rIn, rOut int 74 75 // Remove entries that are duplicated across both entry lists. 76 // This loop walks through both lists using a merge strategy. Two pointers per 77 // list are maintained. One is the "input pointer", which is always the ith 78 // element of the input list. One is the "output pointer", which is the index 79 // after the most recent unique element in the list. Every time we bump the 80 // input pointer, we also set the element at the output pointer to that at 81 // the input pointer, so we don't have to use extra space - we're 82 // deduplicating in-place. 83 for rIn < rLen || lIn < lLen { 84 var cmp int 85 if lIn == lLen { 86 cmp = 1 87 } else if rIn == rLen { 88 cmp = -1 89 } else { 90 cmp = cmpFunc(lIn, rIn) 91 } 92 if cmp < 0 { 93 setLeft(lOut, lIn) 94 lIn++ 95 lOut++ 96 } else if cmp > 0 { 97 setRight(rOut, rIn) 98 rIn++ 99 rOut++ 100 } else { 101 // Elements are identical - we want to remove them from the list. So 102 // we increment our input indices without touching our output indices. 103 // Next time through the loop, we'll shift the next element back to 104 // the last output index which is now lagging behind the input index. 105 lIn++ 106 rIn++ 107 } 108 } 109 return lOut, rOut 110 }