github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/cursor/cursor.go (about) 1 package cursor 2 3 import ( 4 "bytes" 5 "encoding/ascii85" 6 "encoding/binary" 7 "fmt" 8 "io" 9 "math" 10 "math/bits" 11 "sync" 12 "unsafe" 13 14 "github.com/FastFilter/xorfilter" 15 "github.com/coyove/sdss/contrib/bitmap" 16 ) 17 18 const compactBFHash = 3 19 20 var ( 21 compactThreshold = []int{128, 256, 512, 1024, 2048, 4096, 8192} 22 compactBytesSize = func() (a []int) { 23 for i := range compactThreshold { 24 bf := compactBFHash 25 capacity := 32 + uint32(math.Ceil(1.23*float64(compactThreshold[i]*bf))) 26 capacity = capacity / 3 * 3 // round it down to a multiple of 3 27 a = append(a, int(capacity)) 28 } 29 return 30 }() 31 ) 32 33 func at(a []int, idx int) int { 34 if idx < len(a) { 35 return a[idx] 36 } 37 return a[len(a)-1] 38 } 39 40 type Cursor struct { 41 PrevMap int64 42 PrevId int64 43 NextMap int64 44 NextId int64 45 46 pendings []uint64 47 compacts []xorfilter.Xor8 48 49 _dedup map[uint64]struct{} 50 _mu sync.RWMutex 51 } 52 53 func New() *Cursor { 54 c := &Cursor{} 55 c._dedup = map[uint64]struct{}{} 56 return c 57 } 58 59 func Parse(buf []byte) (*Cursor, bool) { 60 for i, b := range buf { 61 switch b { 62 case '~': 63 buf[i] = '\\' 64 case '{': 65 buf[i] = '"' 66 case '}': 67 buf[i] = '\'' 68 } 69 } 70 return Read(ascii85.NewDecoder(bytes.NewReader(buf))) 71 } 72 73 func Read(rd io.Reader) (*Cursor, bool) { 74 c := &Cursor{} 75 if err := binary.Read(rd, binary.BigEndian, &c.PrevMap); err != nil { 76 return nil, false 77 } 78 if err := binary.Read(rd, binary.BigEndian, &c.PrevId); err != nil { 79 return nil, false 80 } 81 if err := binary.Read(rd, binary.BigEndian, &c.NextMap); err != nil { 82 return nil, false 83 } 84 if err := binary.Read(rd, binary.BigEndian, &c.NextId); err != nil { 85 return nil, false 86 } 87 88 var pendingsCount uint16 89 if err := binary.Read(rd, binary.BigEndian, &pendingsCount); err != nil { 90 return nil, false 91 } 92 93 c.pendings = make([]uint64, pendingsCount) 94 c._dedup = map[uint64]struct{}{} 95 if err := binary.Read(rd, binary.BigEndian, c.pendings); err != nil { 96 return nil, false 97 } 98 for _, p := range c.pendings { 99 c._dedup[p] = struct{}{} 100 } 101 102 var compactsCount uint16 103 if err := binary.Read(rd, binary.BigEndian, &compactsCount); err != nil { 104 return nil, false 105 } 106 107 c.compacts = make([]xorfilter.Xor8, compactsCount) 108 tmp := make([]byte, compactBytesSize[len(compactBytesSize)-1]+8) 109 for i := range c.compacts { 110 sz := at(compactBytesSize, i) 111 tmp = tmp[:sz+8] 112 if err := binary.Read(rd, binary.BigEndian, tmp); err != nil { 113 return nil, false 114 } 115 c.compacts[i].BlockLength = uint32(sz) / 3 116 c.compacts[i].Seed = binary.BigEndian.Uint64(tmp[:8]) 117 c.compacts[i].Fingerprints = append([]byte{}, tmp[8:]...) 118 } 119 return c, true 120 } 121 122 func (c *Cursor) clearDedup() { 123 for k := range c._dedup { 124 delete(c._dedup, k) 125 } 126 } 127 128 func (c *Cursor) Add(key bitmap.Key) bool { 129 c._mu.Lock() 130 defer c._mu.Unlock() 131 132 h := hashCode(key) 133 _, exist := c.contains(h, expandHash(h)) 134 if exist { 135 return false 136 } 137 138 c.pendings = append(c.pendings, h) 139 c._dedup[h] = struct{}{} 140 141 if len(c.pendings) == at(compactThreshold, len(c.compacts)) { 142 bf := compactBFHash 143 tmp := make([]uint64, 0, len(c.pendings)*bf) 144 c.clearDedup() 145 for _, p := range c.pendings { 146 h := expandHash(p) 147 for i := 0; i < bf; i++ { 148 for { 149 if _, ok := c._dedup[h[i]]; ok { 150 h[i]++ 151 } else { 152 break 153 } 154 } 155 tmp = append(tmp, h[i]) 156 c._dedup[h[i]] = struct{}{} 157 } 158 } 159 xf, _ := xorfilter.Populate(tmp) 160 c.pendings = c.pendings[:0] 161 c.compacts = append(c.compacts, *xf) 162 c.clearDedup() 163 } 164 return true 165 } 166 167 func (c *Cursor) Contains(key bitmap.Key) bool { 168 c._mu.RLock() 169 defer c._mu.RUnlock() 170 h := hashCode(key) 171 _, ok := c.contains(h, expandHash(h)) 172 return ok 173 } 174 175 func (c *Cursor) contains(h uint64, bfh [compactBFHash]uint64) (int, bool) { 176 if _, ok := c._dedup[h]; ok { 177 return -1, true 178 } 179 180 NEXT: 181 for i, cp := range c.compacts { 182 bf := compactBFHash // at(compactBFHash, i) 183 for i := 0; i < bf; i++ { 184 if !cp.Contains(bfh[i]) { 185 continue NEXT 186 } 187 } 188 return i, true 189 } 190 return -1, false 191 } 192 193 func (c *Cursor) GoString() string { 194 x := fmt.Sprintf("next: %x-%x, pendings: %d", c.NextMap, c.NextId, len(c.pendings)) 195 return x 196 } 197 198 func (c *Cursor) MarshalBinary() []byte { 199 out := &bytes.Buffer{} 200 binary.Write(out, binary.BigEndian, c.PrevMap) 201 binary.Write(out, binary.BigEndian, c.PrevId) 202 binary.Write(out, binary.BigEndian, c.NextMap) 203 binary.Write(out, binary.BigEndian, c.NextId) 204 binary.Write(out, binary.BigEndian, uint16(len(c.pendings))) 205 binary.Write(out, binary.BigEndian, c.pendings) 206 binary.Write(out, binary.BigEndian, uint16(len(c.compacts))) 207 for _, cp := range c.compacts { 208 binary.Write(out, binary.BigEndian, cp.Seed) 209 binary.Write(out, binary.BigEndian, cp.Fingerprints) 210 } 211 return out.Bytes() 212 } 213 214 func (c *Cursor) String() string { 215 buf := &bytes.Buffer{} 216 w := ascii85.NewEncoder(buf) 217 w.Write(c.MarshalBinary()) 218 w.Close() 219 for i, b := range buf.Bytes() { 220 switch b { 221 case '\\': 222 buf.Bytes()[i] = '~' 223 case '"': 224 buf.Bytes()[i] = '{' 225 case '\'': 226 buf.Bytes()[i] = '}' 227 } 228 } 229 return buf.String() 230 } 231 232 func hashCode(k bitmap.Key) uint64 { 233 a := *(*[2]uint64)(unsafe.Pointer(&k)) 234 return hash2(a[0], a[1]) 235 } 236 237 func hash2(a, b uint64) uint64 { 238 const ( 239 offset64 = 14695981039346656037 240 prime64 = 1099511628211 241 ) 242 h := uint64(offset64) 243 h ^= a 244 h *= prime64 245 h ^= b 246 h *= prime64 247 return h 248 } 249 250 func expandHash(h uint64) (a [compactBFHash]uint64) { 251 a[0] = h 252 a[1] = ^h 253 a[2] = bits.ReverseBytes64(h) 254 return 255 }