github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/cursor/cursor.go (about)

     1  package cursor
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/ascii85"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"io"
     9  	"math"
    10  	"math/bits"
    11  	"sync"
    12  	"unsafe"
    13  
    14  	"github.com/FastFilter/xorfilter"
    15  	"github.com/coyove/sdss/contrib/bitmap"
    16  )
    17  
    18  const compactBFHash = 3
    19  
    20  var (
    21  	compactThreshold = []int{128, 256, 512, 1024, 2048, 4096, 8192}
    22  	compactBytesSize = func() (a []int) {
    23  		for i := range compactThreshold {
    24  			bf := compactBFHash
    25  			capacity := 32 + uint32(math.Ceil(1.23*float64(compactThreshold[i]*bf)))
    26  			capacity = capacity / 3 * 3 // round it down to a multiple of 3
    27  			a = append(a, int(capacity))
    28  		}
    29  		return
    30  	}()
    31  )
    32  
    33  func at(a []int, idx int) int {
    34  	if idx < len(a) {
    35  		return a[idx]
    36  	}
    37  	return a[len(a)-1]
    38  }
    39  
    40  type Cursor struct {
    41  	PrevMap int64
    42  	PrevId  int64
    43  	NextMap int64
    44  	NextId  int64
    45  
    46  	pendings []uint64
    47  	compacts []xorfilter.Xor8
    48  
    49  	_dedup map[uint64]struct{}
    50  	_mu    sync.RWMutex
    51  }
    52  
    53  func New() *Cursor {
    54  	c := &Cursor{}
    55  	c._dedup = map[uint64]struct{}{}
    56  	return c
    57  }
    58  
    59  func Parse(buf []byte) (*Cursor, bool) {
    60  	for i, b := range buf {
    61  		switch b {
    62  		case '~':
    63  			buf[i] = '\\'
    64  		case '{':
    65  			buf[i] = '"'
    66  		case '}':
    67  			buf[i] = '\''
    68  		}
    69  	}
    70  	return Read(ascii85.NewDecoder(bytes.NewReader(buf)))
    71  }
    72  
    73  func Read(rd io.Reader) (*Cursor, bool) {
    74  	c := &Cursor{}
    75  	if err := binary.Read(rd, binary.BigEndian, &c.PrevMap); err != nil {
    76  		return nil, false
    77  	}
    78  	if err := binary.Read(rd, binary.BigEndian, &c.PrevId); err != nil {
    79  		return nil, false
    80  	}
    81  	if err := binary.Read(rd, binary.BigEndian, &c.NextMap); err != nil {
    82  		return nil, false
    83  	}
    84  	if err := binary.Read(rd, binary.BigEndian, &c.NextId); err != nil {
    85  		return nil, false
    86  	}
    87  
    88  	var pendingsCount uint16
    89  	if err := binary.Read(rd, binary.BigEndian, &pendingsCount); err != nil {
    90  		return nil, false
    91  	}
    92  
    93  	c.pendings = make([]uint64, pendingsCount)
    94  	c._dedup = map[uint64]struct{}{}
    95  	if err := binary.Read(rd, binary.BigEndian, c.pendings); err != nil {
    96  		return nil, false
    97  	}
    98  	for _, p := range c.pendings {
    99  		c._dedup[p] = struct{}{}
   100  	}
   101  
   102  	var compactsCount uint16
   103  	if err := binary.Read(rd, binary.BigEndian, &compactsCount); err != nil {
   104  		return nil, false
   105  	}
   106  
   107  	c.compacts = make([]xorfilter.Xor8, compactsCount)
   108  	tmp := make([]byte, compactBytesSize[len(compactBytesSize)-1]+8)
   109  	for i := range c.compacts {
   110  		sz := at(compactBytesSize, i)
   111  		tmp = tmp[:sz+8]
   112  		if err := binary.Read(rd, binary.BigEndian, tmp); err != nil {
   113  			return nil, false
   114  		}
   115  		c.compacts[i].BlockLength = uint32(sz) / 3
   116  		c.compacts[i].Seed = binary.BigEndian.Uint64(tmp[:8])
   117  		c.compacts[i].Fingerprints = append([]byte{}, tmp[8:]...)
   118  	}
   119  	return c, true
   120  }
   121  
   122  func (c *Cursor) clearDedup() {
   123  	for k := range c._dedup {
   124  		delete(c._dedup, k)
   125  	}
   126  }
   127  
   128  func (c *Cursor) Add(key bitmap.Key) bool {
   129  	c._mu.Lock()
   130  	defer c._mu.Unlock()
   131  
   132  	h := hashCode(key)
   133  	_, exist := c.contains(h, expandHash(h))
   134  	if exist {
   135  		return false
   136  	}
   137  
   138  	c.pendings = append(c.pendings, h)
   139  	c._dedup[h] = struct{}{}
   140  
   141  	if len(c.pendings) == at(compactThreshold, len(c.compacts)) {
   142  		bf := compactBFHash
   143  		tmp := make([]uint64, 0, len(c.pendings)*bf)
   144  		c.clearDedup()
   145  		for _, p := range c.pendings {
   146  			h := expandHash(p)
   147  			for i := 0; i < bf; i++ {
   148  				for {
   149  					if _, ok := c._dedup[h[i]]; ok {
   150  						h[i]++
   151  					} else {
   152  						break
   153  					}
   154  				}
   155  				tmp = append(tmp, h[i])
   156  				c._dedup[h[i]] = struct{}{}
   157  			}
   158  		}
   159  		xf, _ := xorfilter.Populate(tmp)
   160  		c.pendings = c.pendings[:0]
   161  		c.compacts = append(c.compacts, *xf)
   162  		c.clearDedup()
   163  	}
   164  	return true
   165  }
   166  
   167  func (c *Cursor) Contains(key bitmap.Key) bool {
   168  	c._mu.RLock()
   169  	defer c._mu.RUnlock()
   170  	h := hashCode(key)
   171  	_, ok := c.contains(h, expandHash(h))
   172  	return ok
   173  }
   174  
   175  func (c *Cursor) contains(h uint64, bfh [compactBFHash]uint64) (int, bool) {
   176  	if _, ok := c._dedup[h]; ok {
   177  		return -1, true
   178  	}
   179  
   180  NEXT:
   181  	for i, cp := range c.compacts {
   182  		bf := compactBFHash //  at(compactBFHash, i)
   183  		for i := 0; i < bf; i++ {
   184  			if !cp.Contains(bfh[i]) {
   185  				continue NEXT
   186  			}
   187  		}
   188  		return i, true
   189  	}
   190  	return -1, false
   191  }
   192  
   193  func (c *Cursor) GoString() string {
   194  	x := fmt.Sprintf("next: %x-%x, pendings: %d", c.NextMap, c.NextId, len(c.pendings))
   195  	return x
   196  }
   197  
   198  func (c *Cursor) MarshalBinary() []byte {
   199  	out := &bytes.Buffer{}
   200  	binary.Write(out, binary.BigEndian, c.PrevMap)
   201  	binary.Write(out, binary.BigEndian, c.PrevId)
   202  	binary.Write(out, binary.BigEndian, c.NextMap)
   203  	binary.Write(out, binary.BigEndian, c.NextId)
   204  	binary.Write(out, binary.BigEndian, uint16(len(c.pendings)))
   205  	binary.Write(out, binary.BigEndian, c.pendings)
   206  	binary.Write(out, binary.BigEndian, uint16(len(c.compacts)))
   207  	for _, cp := range c.compacts {
   208  		binary.Write(out, binary.BigEndian, cp.Seed)
   209  		binary.Write(out, binary.BigEndian, cp.Fingerprints)
   210  	}
   211  	return out.Bytes()
   212  }
   213  
   214  func (c *Cursor) String() string {
   215  	buf := &bytes.Buffer{}
   216  	w := ascii85.NewEncoder(buf)
   217  	w.Write(c.MarshalBinary())
   218  	w.Close()
   219  	for i, b := range buf.Bytes() {
   220  		switch b {
   221  		case '\\':
   222  			buf.Bytes()[i] = '~'
   223  		case '"':
   224  			buf.Bytes()[i] = '{'
   225  		case '\'':
   226  			buf.Bytes()[i] = '}'
   227  		}
   228  	}
   229  	return buf.String()
   230  }
   231  
   232  func hashCode(k bitmap.Key) uint64 {
   233  	a := *(*[2]uint64)(unsafe.Pointer(&k))
   234  	return hash2(a[0], a[1])
   235  }
   236  
   237  func hash2(a, b uint64) uint64 {
   238  	const (
   239  		offset64 = 14695981039346656037
   240  		prime64  = 1099511628211
   241  	)
   242  	h := uint64(offset64)
   243  	h ^= a
   244  	h *= prime64
   245  	h ^= b
   246  	h *= prime64
   247  	return h
   248  }
   249  
   250  func expandHash(h uint64) (a [compactBFHash]uint64) {
   251  	a[0] = h
   252  	a[1] = ^h
   253  	a[2] = bits.ReverseBytes64(h)
   254  	return
   255  }