github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor_mmap_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "os" 16 "strings" 17 "testing" 18 19 "github.com/sirupsen/logrus/hooks/test" 20 "github.com/stretchr/testify/assert" 21 "github.com/stretchr/testify/require" 22 "github.com/weaviate/weaviate/entities/cyclemanager" 23 ) 24 25 func TestMmapCondensor(t *testing.T) { 26 t.Skip() // TODO 27 28 rootPath := t.TempDir() 29 30 logger, _ := test.NewNullLogger() 31 uncondensed, err := NewCommitLogger(rootPath, "uncondensed", logger, 32 cyclemanager.NewCallbackGroupNoop()) 33 require.Nil(t, err) 34 35 perfect, err := NewCommitLogger(rootPath, "perfect", logger, 36 cyclemanager.NewCallbackGroupNoop()) 37 require.Nil(t, err) 38 39 t.Run("add redundant data to the original log", func(t *testing.T) { 40 uncondensed.AddNode(&vertex{id: 0, level: 3}) 41 uncondensed.AddNode(&vertex{id: 1, level: 3}) 42 uncondensed.AddNode(&vertex{id: 2, level: 3}) 43 uncondensed.AddNode(&vertex{id: 3, level: 3}) 44 45 // below are some pointless connection replacements, we expect that most of 46 // these will be gone after condensing, this gives us a good way of testing 47 // whether they're really gone 48 for level := 0; level <= 3; level++ { 49 uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3}) 50 uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2}) 51 uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1}) 52 uncondensed.ReplaceLinksAtLevel(0, level, []uint64{2}) 53 uncondensed.ReplaceLinksAtLevel(0, level, []uint64{3}) 54 uncondensed.ReplaceLinksAtLevel(0, level, []uint64{2, 3}) 55 uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3}) 56 uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3}) 57 uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2}) 58 uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0}) 59 uncondensed.ReplaceLinksAtLevel(1, level, []uint64{2}) 60 uncondensed.ReplaceLinksAtLevel(1, level, []uint64{3}) 61 uncondensed.ReplaceLinksAtLevel(1, level, []uint64{2, 3}) 62 uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3}) 63 uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3}) 64 uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1}) 65 uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0}) 66 uncondensed.ReplaceLinksAtLevel(2, level, []uint64{1}) 67 uncondensed.ReplaceLinksAtLevel(2, level, []uint64{3}) 68 uncondensed.ReplaceLinksAtLevel(2, level, []uint64{1, 3}) 69 uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3}) 70 uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2}) 71 uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1}) 72 uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0}) 73 uncondensed.ReplaceLinksAtLevel(3, level, []uint64{1}) 74 uncondensed.ReplaceLinksAtLevel(3, level, []uint64{2}) 75 uncondensed.ReplaceLinksAtLevel(3, level, []uint64{1, 2}) 76 uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2}) 77 } 78 uncondensed.SetEntryPointWithMaxLayer(3, 3) 79 uncondensed.AddTombstone(2) 80 81 require.Nil(t, uncondensed.Flush()) 82 }) 83 84 t.Run("create a hypothetical perfect log", func(t *testing.T) { 85 perfect.AddNode(&vertex{id: 0, level: 3}) 86 perfect.AddNode(&vertex{id: 1, level: 3}) 87 perfect.AddNode(&vertex{id: 2, level: 3}) 88 perfect.AddNode(&vertex{id: 3, level: 3}) 89 90 // below are some pointless connection replacements, we expect that most of 91 // these will be gone after condensing, this gives us a good way of testing 92 // whether they're really gone 93 for level := 0; level <= 3; level++ { 94 perfect.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3}) 95 perfect.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3}) 96 perfect.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3}) 97 perfect.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2}) 98 } 99 perfect.SetEntryPointWithMaxLayer(3, 3) 100 perfect.AddTombstone(2) 101 102 require.Nil(t, perfect.Flush()) 103 }) 104 105 t.Run("condense the original and verify against the perfect one", func(t *testing.T) { 106 input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed")) 107 require.Nil(t, err) 108 require.True(t, ok) 109 110 err = NewMmapCondensor(3).Do(commitLogFileName(rootPath, "uncondensed", input)) 111 require.Nil(t, err) 112 113 control, ok, err := getCurrentCommitLogFileName( 114 commitLogDirectory(rootPath, "perfect")) 115 require.Nil(t, err) 116 require.True(t, ok) 117 118 actual, ok, err := getCurrentCommitLogFileName( 119 commitLogDirectory(rootPath, "uncondensed")) 120 require.Nil(t, err) 121 require.True(t, ok) 122 123 assert.True(t, strings.HasSuffix(actual, ".condensed"), 124 "commit log is now saved as condensed") 125 126 controlStat, err := os.Stat(commitLogFileName(rootPath, "perfect", control)) 127 require.Nil(t, err) 128 129 actualStat, err := os.Stat(commitLogFileName(rootPath, "uncondensed", actual)) 130 require.Nil(t, err) 131 132 assert.Equal(t, controlStat.Size(), actualStat.Size()) 133 134 // dumpIndexFromCommitLog(t, commitLogFileName(rootPath, "uncondensed", actual)) 135 // dumpIndexFromCommitLog(t, commitLogFileName(rootPath, "perfect", control)) 136 }) 137 } 138 139 // func TestCondensorWithoutEntrypoint(t *testing.T) { 140 // rand.Seed(time.Now().UnixNano()) 141 // rootPath := t.TempDir() 142 143 // logger, _ := test.NewNullLogger() 144 // uncondensed, err := NewCommitLogger(rootPath, "uncondensed", logger, 145 // cyclemanager.NewCallbackGroupNoop()) 146 // require.Nil(t, err) 147 148 // t.Run("add data, but do not set an entrypoint", func(t *testing.T) { 149 // uncondensed.AddNode(&vertex{id: 0, level: 3}) 150 151 // require.Nil(t, uncondensed.Flush()) 152 // }) 153 154 // t.Run("condense the original and verify it doesn't overwrite the EP", func(t *testing.T) { 155 // input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed")) 156 // require.Nil(t, err) 157 // require.True(t, ok) 158 159 // err = NewMemoryCondensor2(logger).Do(commitLogFileName(rootPath, "uncondensed", input)) 160 // require.Nil(t, err) 161 162 // actual, ok, err := getCurrentCommitLogFileName( 163 // commitLogDirectory(rootPath, "uncondensed")) 164 // require.Nil(t, err) 165 // require.True(t, ok) 166 167 // assert.True(t, strings.HasSuffix(actual, ".condensed"), 168 // "commit log is now saved as condensed") 169 170 // initialState := DeserializationResult{ 171 // Nodes: nil, 172 // Entrypoint: 17, 173 // Level: 3, 174 // } 175 // fd, err := os.Open(commitLogFileName(rootPath, "uncondensed", actual)) 176 // require.Nil(t, err) 177 178 // bufr := bufio.NewReader(fd) 179 // res, err := NewDeserializer(logger).Do(bufr, &initialState) 180 // require.Nil(t, err) 181 182 // assert.Contains(t, res.Nodes, &vertex{id: 0, level: 3, connections: map[int][]uint64{}}) 183 // assert.Equal(t, uint64(17), res.Entrypoint) 184 // assert.Equal(t, uint16(3), res.Level) 185 186 // }) 187 // }