github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/deserializer_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"bufio"
    16  	"bytes"
    17  	"encoding/binary"
    18  	"math/rand"
    19  	"testing"
    20  
    21  	"github.com/sirupsen/logrus/hooks/test"
    22  	"github.com/stretchr/testify/assert"
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func BenchmarkDeserializer2ReadUint64(b *testing.B) {
    27  	b.StopTimer()
    28  
    29  	randUint64 := rand.Uint64()
    30  
    31  	val := make([]byte, 8)
    32  	binary.LittleEndian.PutUint64(val, uint64(randUint64))
    33  	data := bytes.NewReader(val)
    34  	logger, _ := test.NewNullLogger()
    35  	d := NewDeserializer(logger)
    36  	reader := bufio.NewReader(data)
    37  	b.StartTimer()
    38  
    39  	for i := 0; i < b.N; i++ {
    40  		d.readUint64(reader)
    41  	}
    42  }
    43  
    44  func BenchmarkDeserializer2ReadUint16(b *testing.B) {
    45  	b.StopTimer()
    46  
    47  	randUint16 := uint16(rand.Uint32())
    48  
    49  	val := make([]byte, 2)
    50  	binary.LittleEndian.PutUint16(val, randUint16)
    51  	data := bytes.NewReader(val)
    52  	logger, _ := test.NewNullLogger()
    53  	d := NewDeserializer(logger)
    54  	reader := bufio.NewReader(data)
    55  	b.StartTimer()
    56  
    57  	for i := 0; i < b.N; i++ {
    58  		d.readUint16(reader)
    59  	}
    60  }
    61  
    62  func BenchmarkDeserializer2ReadCommitType(b *testing.B) {
    63  	b.StopTimer()
    64  
    65  	commitType := SetEntryPointMaxLevel
    66  
    67  	val := make([]byte, 1)
    68  	val[0] = byte(commitType)
    69  	data := bytes.NewReader(val)
    70  	logger, _ := test.NewNullLogger()
    71  	d := NewDeserializer(logger)
    72  	reader := bufio.NewReader(data)
    73  	b.StartTimer()
    74  
    75  	for i := 0; i < b.N; i++ {
    76  		d.ReadCommitType(reader)
    77  	}
    78  }
    79  
    80  func BenchmarkDeserializer2ReadUint64Slice(b *testing.B) {
    81  	b.StopTimer()
    82  
    83  	uint64Slice := []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
    84  
    85  	val := make([]byte, len(uint64Slice)*8)
    86  	for i, v := range uint64Slice {
    87  		binary.LittleEndian.PutUint64(val[i*8:], uint64(v))
    88  	}
    89  
    90  	data := bytes.NewReader(val)
    91  	logger, _ := test.NewNullLogger()
    92  	d := NewDeserializer(logger)
    93  	reader := bufio.NewReader(data)
    94  	b.StartTimer()
    95  
    96  	for i := 0; i < b.N; i++ {
    97  		d.readUint64Slice(reader, len(uint64Slice))
    98  	}
    99  }
   100  
   101  func TestDeserializer2ReadCommitType(t *testing.T) {
   102  	commitTypes := []HnswCommitType{
   103  		AddNode,
   104  		SetEntryPointMaxLevel,
   105  		AddLinkAtLevel,
   106  		ReplaceLinksAtLevel,
   107  		AddTombstone,
   108  		RemoveTombstone,
   109  		ClearLinks,
   110  		DeleteNode,
   111  		ResetIndex,
   112  		AddPQ,
   113  	}
   114  	for _, commitType := range commitTypes {
   115  		b := make([]byte, 1)
   116  		b[0] = byte(commitType)
   117  		data := bytes.NewReader(b)
   118  		logger, _ := test.NewNullLogger()
   119  		d := NewDeserializer(logger)
   120  		reader := bufio.NewReader(data)
   121  		res, err := d.ReadCommitType(reader)
   122  		if err != nil {
   123  			t.Errorf("Error reading commit type: %v", err)
   124  		}
   125  		if res != commitType {
   126  			t.Errorf("Commit type is not equal")
   127  		}
   128  
   129  	}
   130  }
   131  
   132  func TestDeserializerReadDeleteNode(t *testing.T) {
   133  	nodes := generateDummyVertices(4)
   134  	res := &DeserializationResult{
   135  		Nodes: nodes,
   136  	}
   137  	ids := []uint64{2, 3, 4, 5, 6}
   138  
   139  	for _, id := range ids {
   140  		val := make([]byte, 8)
   141  		binary.LittleEndian.PutUint64(val, id)
   142  		data := bytes.NewReader(val)
   143  		logger, _ := test.NewNullLogger()
   144  		d := NewDeserializer(logger)
   145  		reader := bufio.NewReader(data)
   146  
   147  		err := d.ReadDeleteNode(reader, res)
   148  		if err != nil {
   149  			t.Errorf("Error reading commit type: %v", err)
   150  		}
   151  	}
   152  }
   153  
   154  func TestDeserializerReadClearLinks(t *testing.T) {
   155  	nodes := generateDummyVertices(4)
   156  	res := &DeserializationResult{
   157  		Nodes: nodes,
   158  	}
   159  	ids := []uint64{2, 3, 4, 5, 6}
   160  
   161  	for _, id := range ids {
   162  		val := make([]byte, 8)
   163  		binary.LittleEndian.PutUint64(val, id)
   164  		data := bytes.NewReader(val)
   165  		logger, _ := test.NewNullLogger()
   166  		d := NewDeserializer(logger)
   167  
   168  		reader := bufio.NewReader(data)
   169  
   170  		err := d.ReadClearLinks(reader, res, true)
   171  		if err != nil {
   172  			t.Errorf("Error reading links: %v", err)
   173  		}
   174  	}
   175  }
   176  
   177  func dummyInitialDeserializerState() *DeserializationResult {
   178  	return &DeserializationResult{
   179  		LinksReplaced: make(map[uint64]map[uint16]struct{}),
   180  		Nodes: []*vertex{
   181  			nil,
   182  			nil,
   183  			{
   184  				// This is a lower level than we will read, so this node will require
   185  				// growing
   186  				level: 1,
   187  			},
   188  			{
   189  				// This is a lower level than we will read, so this node will require
   190  				// growing
   191  				level:       8,
   192  				connections: make([][]uint64, 16),
   193  			},
   194  		},
   195  	}
   196  }
   197  
   198  func TestDeserializerReadNode(t *testing.T) {
   199  	res := dummyInitialDeserializerState()
   200  	ids := []uint64{2, 3, 4, 5, 6}
   201  
   202  	for _, id := range ids {
   203  		val := make([]byte, 10)
   204  		level := uint16(id * 2)
   205  		binary.LittleEndian.PutUint64(val[:8], id)
   206  		binary.LittleEndian.PutUint16(val[8:10], level)
   207  		data := bytes.NewReader(val)
   208  		logger, _ := test.NewNullLogger()
   209  		d := NewDeserializer(logger)
   210  
   211  		reader := bufio.NewReader(data)
   212  
   213  		err := d.ReadNode(reader, res)
   214  		require.Nil(t, err)
   215  		require.NotNil(t, res.Nodes[id])
   216  		assert.Equal(t, int(level), res.Nodes[id].level)
   217  	}
   218  }
   219  
   220  func TestDeserializerReadEP(t *testing.T) {
   221  	ids := []uint64{2, 3, 4, 5, 6}
   222  
   223  	for _, id := range ids {
   224  		val := make([]byte, 10)
   225  		level := uint16(id * 2)
   226  		binary.LittleEndian.PutUint64(val[:8], id)
   227  		binary.LittleEndian.PutUint16(val[8:10], level)
   228  		data := bytes.NewReader(val)
   229  		logger, _ := test.NewNullLogger()
   230  		d := NewDeserializer(logger)
   231  
   232  		reader := bufio.NewReader(data)
   233  
   234  		ep, l, err := d.ReadEP(reader)
   235  		require.Nil(t, err)
   236  		assert.Equal(t, id, ep)
   237  		assert.Equal(t, level, l)
   238  	}
   239  }
   240  
   241  func TestDeserializerReadLink(t *testing.T) {
   242  	res := dummyInitialDeserializerState()
   243  	ids := []uint64{2, 3, 4, 5, 6}
   244  
   245  	for _, id := range ids {
   246  		level := uint16(id * 2)
   247  		target := id * 3
   248  		val := make([]byte, 18)
   249  		binary.LittleEndian.PutUint64(val[:8], id)
   250  		binary.LittleEndian.PutUint16(val[8:10], level)
   251  		binary.LittleEndian.PutUint64(val[10:18], target)
   252  		data := bytes.NewReader(val)
   253  		logger, _ := test.NewNullLogger()
   254  		d := NewDeserializer(logger)
   255  
   256  		reader := bufio.NewReader(data)
   257  
   258  		err := d.ReadLink(reader, res)
   259  		require.Nil(t, err)
   260  		require.NotNil(t, res.Nodes[id])
   261  		lastAddedConnection := res.Nodes[id].connections[level][len(res.Nodes[id].connections[level])-1]
   262  		assert.Equal(t, target, lastAddedConnection)
   263  	}
   264  }
   265  
   266  func TestDeserializerReadLinks(t *testing.T) {
   267  	res := dummyInitialDeserializerState()
   268  	ids := []uint64{2, 3, 4, 5, 6}
   269  
   270  	for _, id := range ids {
   271  		level := uint16(id * 2)
   272  		connLen := uint16(id * 4)
   273  		val := make([]byte, 12+connLen*8)
   274  		binary.LittleEndian.PutUint64(val[:8], id)
   275  		binary.LittleEndian.PutUint16(val[8:10], level)
   276  		binary.LittleEndian.PutUint16(val[10:12], connLen)
   277  		for i := 0; i < int(connLen); i++ {
   278  			target := id + uint64(i)
   279  			binary.LittleEndian.PutUint64(val[12+(i*8):12+(i*8+8)], target)
   280  		}
   281  		data := bytes.NewReader(val)
   282  		logger, _ := test.NewNullLogger()
   283  		d := NewDeserializer(logger)
   284  
   285  		reader := bufio.NewReader(data)
   286  
   287  		_, err := d.ReadLinks(reader, res, true)
   288  		require.Nil(t, err)
   289  		require.NotNil(t, res.Nodes[id])
   290  		lastAddedConnection := res.Nodes[id].connections[level][len(res.Nodes[id].connections[level])-1]
   291  		assert.Equal(t, id+uint64(connLen)-1, lastAddedConnection)
   292  	}
   293  }
   294  
   295  func TestDeserializerReadAddLinks(t *testing.T) {
   296  	res := dummyInitialDeserializerState()
   297  	ids := []uint64{2, 3, 4, 5, 6}
   298  
   299  	for _, id := range ids {
   300  		level := uint16(id * 2)
   301  		connLen := uint16(id * 4)
   302  		val := make([]byte, 12+connLen*8)
   303  		binary.LittleEndian.PutUint64(val[:8], id)
   304  		binary.LittleEndian.PutUint16(val[8:10], level)
   305  		binary.LittleEndian.PutUint16(val[10:12], connLen)
   306  		for i := 0; i < int(connLen); i++ {
   307  			target := id + uint64(i)
   308  			binary.LittleEndian.PutUint64(val[12+(i*8):12+(i*8+8)], target)
   309  		}
   310  		data := bytes.NewReader(val)
   311  		logger, _ := test.NewNullLogger()
   312  		d := NewDeserializer(logger)
   313  
   314  		reader := bufio.NewReader(data)
   315  
   316  		_, err := d.ReadAddLinks(reader, res)
   317  		require.Nil(t, err)
   318  		require.NotNil(t, res.Nodes[id])
   319  		lastAddedConnection := res.Nodes[id].connections[level][len(res.Nodes[id].connections[level])-1]
   320  		assert.Equal(t, id+uint64(connLen)-1, lastAddedConnection)
   321  	}
   322  }
   323  
   324  func TestDeserializerAddTombstone(t *testing.T) {
   325  	tombstones := map[uint64]struct{}{}
   326  	ids := []uint64{2, 3, 4, 5, 6}
   327  
   328  	for _, id := range ids {
   329  		val := make([]byte, 8)
   330  		binary.LittleEndian.PutUint64(val[:8], id)
   331  		data := bytes.NewReader(val)
   332  		logger, _ := test.NewNullLogger()
   333  		d := NewDeserializer(logger)
   334  
   335  		reader := bufio.NewReader(data)
   336  
   337  		err := d.ReadAddTombstone(reader, tombstones)
   338  		require.Nil(t, err)
   339  	}
   340  
   341  	expected := map[uint64]struct{}{
   342  		2: {},
   343  		3: {},
   344  		4: {},
   345  		5: {},
   346  		6: {},
   347  	}
   348  
   349  	assert.Equal(t, expected, tombstones)
   350  }
   351  
   352  func TestDeserializerRemoveTombstone(t *testing.T) {
   353  	tombstones := map[uint64]struct{}{
   354  		1: {},
   355  		2: {},
   356  		3: {},
   357  		4: {},
   358  		5: {},
   359  	}
   360  	ids := []uint64{2, 3, 4, 5, 6}
   361  
   362  	for _, id := range ids {
   363  		val := make([]byte, 8)
   364  		binary.LittleEndian.PutUint64(val[:8], id)
   365  		data := bytes.NewReader(val)
   366  		logger, _ := test.NewNullLogger()
   367  		d := NewDeserializer(logger)
   368  
   369  		reader := bufio.NewReader(data)
   370  
   371  		err := d.ReadRemoveTombstone(reader, tombstones)
   372  		require.Nil(t, err)
   373  	}
   374  
   375  	expected := map[uint64]struct{}{
   376  		1: {},
   377  	}
   378  
   379  	assert.Equal(t, expected, tombstones)
   380  }
   381  
   382  func TestDeserializerClearLinksAtLevel(t *testing.T) {
   383  	res := &DeserializationResult{
   384  		LinksReplaced: make(map[uint64]map[uint16]struct{}),
   385  		Nodes: []*vertex{
   386  			nil,
   387  			nil,
   388  			{
   389  				// This is a lower level than we will read, so this node will require
   390  				// growing
   391  				level: 1,
   392  			},
   393  			{
   394  				// This is a lower level than we will read, so this node will require
   395  				// growing
   396  				level:       4,
   397  				connections: make([][]uint64, 4),
   398  			},
   399  			nil,
   400  			nil,
   401  		},
   402  	}
   403  	ids := []uint64{2, 3, 4, 5, 6}
   404  
   405  	for _, id := range ids {
   406  		level := uint16(id * 2)
   407  		val := make([]byte, 10)
   408  		binary.LittleEndian.PutUint64(val[:8], id)
   409  		binary.LittleEndian.PutUint16(val[8:10], level)
   410  		data := bytes.NewReader(val)
   411  		logger, _ := test.NewNullLogger()
   412  		d := NewDeserializer(logger)
   413  
   414  		reader := bufio.NewReader(data)
   415  
   416  		err := d.ReadClearLinksAtLevel(reader, res, true)
   417  		require.Nil(t, err)
   418  	}
   419  }