github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "bufio" 16 "encoding/binary" 17 "fmt" 18 "math" 19 "os" 20 21 "github.com/pkg/errors" 22 "github.com/sirupsen/logrus" 23 "github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers" 24 "github.com/weaviate/weaviate/entities/errorcompounder" 25 ) 26 27 type MemoryCondensor struct { 28 newLogFile *os.File 29 newLog *bufWriter 30 logger logrus.FieldLogger 31 } 32 33 func (c *MemoryCondensor) Do(fileName string) error { 34 fd, err := os.Open(fileName) 35 if err != nil { 36 return errors.Wrap(err, "open commit log to be condensed") 37 } 38 defer fd.Close() 39 fdBuf := bufio.NewReaderSize(fd, 256*1024) 40 41 res, _, err := NewDeserializer(c.logger).Do(fdBuf, nil, true) 42 if err != nil { 43 return errors.Wrap(err, "read commit log to be condensed") 44 } 45 46 newLogFile, err := os.OpenFile(fmt.Sprintf("%s.condensed", fileName), 47 os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666) 48 if err != nil { 49 return errors.Wrap(err, "open new commit log file for writing") 50 } 51 52 c.newLogFile = newLogFile 53 54 c.newLog = NewWriterSize(c.newLogFile, 1*1024*1024) 55 56 if res.Compressed { 57 if err := c.AddPQ(res.PQData); err != nil { 58 return fmt.Errorf("write pq data: %w", err) 59 } 60 } 61 62 for _, node := range res.Nodes { 63 if node == nil { 64 // nil nodes occur when we've grown, but not inserted anything yet 65 continue 66 } 67 68 if node.level > 0 { 69 // nodes are implicitly added when they are first linked, if the level is 70 // not zero we know this node was new. If the level is zero it doesn't 71 // matter if it gets added explicitly or implicitly 72 if err := c.AddNode(node); err != nil { 73 return errors.Wrapf(err, "write node %d to commit log", node.id) 74 } 75 } 76 77 for level, links := range node.connections { 78 if res.ReplaceLinks(node.id, uint16(level)) { 79 if err := c.SetLinksAtLevel(node.id, level, links); err != nil { 80 return errors.Wrapf(err, 81 "write links for node %d at level %d to commit log", node.id, level) 82 } 83 } else { 84 if err := c.AddLinksAtLevel(node.id, uint16(level), links); err != nil { 85 return errors.Wrapf(err, 86 "write links for node %d at level %d to commit log", node.id, level) 87 } 88 } 89 } 90 } 91 92 if res.EntrypointChanged { 93 if err := c.SetEntryPointWithMaxLayer(res.Entrypoint, 94 int(res.Level)); err != nil { 95 return errors.Wrap(err, "write entrypoint to commit log") 96 } 97 } 98 99 for ts := range res.Tombstones { 100 if err := c.AddTombstone(ts); err != nil { 101 return errors.Wrapf(err, 102 "write tombstone for node %d to commit log", ts) 103 } 104 } 105 106 if err := c.newLog.Flush(); err != nil { 107 return errors.Wrap(err, "close new commit log") 108 } 109 110 if err := c.newLogFile.Close(); err != nil { 111 return errors.Wrap(err, "close new commit log") 112 } 113 114 if err := os.Remove(fileName); err != nil { 115 return errors.Wrap(err, "cleanup old (uncondensed) commit log") 116 } 117 118 return nil 119 } 120 121 func (c *MemoryCondensor) writeUint64(w *bufWriter, in uint64) error { 122 toWrite := make([]byte, 8) 123 binary.LittleEndian.PutUint64(toWrite[0:8], in) 124 _, err := w.Write(toWrite) 125 if err != nil { 126 return err 127 } 128 129 return nil 130 } 131 132 func (c *MemoryCondensor) writeUint16(w *bufWriter, in uint16) error { 133 toWrite := make([]byte, 2) 134 binary.LittleEndian.PutUint16(toWrite[0:2], in) 135 _, err := w.Write(toWrite) 136 if err != nil { 137 return err 138 } 139 140 return nil 141 } 142 143 func (c *MemoryCondensor) writeCommitType(w *bufWriter, in HnswCommitType) error { 144 toWrite := make([]byte, 1) 145 toWrite[0] = byte(in) 146 _, err := w.Write(toWrite) 147 if err != nil { 148 return err 149 } 150 151 return nil 152 } 153 154 func (c *MemoryCondensor) writeUint64Slice(w *bufWriter, in []uint64) error { 155 for _, v := range in { 156 err := c.writeUint64(w, v) 157 if err != nil { 158 return err 159 } 160 } 161 162 return nil 163 } 164 165 // AddNode adds an empty node 166 func (c *MemoryCondensor) AddNode(node *vertex) error { 167 ec := &errorcompounder.ErrorCompounder{} 168 ec.Add(c.writeCommitType(c.newLog, AddNode)) 169 ec.Add(c.writeUint64(c.newLog, node.id)) 170 ec.Add(c.writeUint16(c.newLog, uint16(node.level))) 171 172 return ec.ToError() 173 } 174 175 func (c *MemoryCondensor) SetLinksAtLevel(nodeid uint64, level int, targets []uint64) error { 176 ec := &errorcompounder.ErrorCompounder{} 177 ec.Add(c.writeCommitType(c.newLog, ReplaceLinksAtLevel)) 178 ec.Add(c.writeUint64(c.newLog, nodeid)) 179 ec.Add(c.writeUint16(c.newLog, uint16(level))) 180 181 targetLength := len(targets) 182 if targetLength > math.MaxUint16 { 183 // TODO: investigate why we get such massive connections 184 targetLength = math.MaxUint16 185 c.logger.WithField("action", "condense_commit_log"). 186 WithField("original_length", len(targets)). 187 WithField("maximum_length", targetLength). 188 Warning("condensor length of connections would overflow uint16, cutting off") 189 } 190 ec.Add(c.writeUint16(c.newLog, uint16(targetLength))) 191 ec.Add(c.writeUint64Slice(c.newLog, targets[:targetLength])) 192 193 return ec.ToError() 194 } 195 196 func (c *MemoryCondensor) AddLinksAtLevel(nodeid uint64, level uint16, targets []uint64) error { 197 toWrite := make([]byte, 13+len(targets)*8) 198 toWrite[0] = byte(AddLinksAtLevel) 199 binary.LittleEndian.PutUint64(toWrite[1:9], nodeid) 200 binary.LittleEndian.PutUint16(toWrite[9:11], uint16(level)) 201 binary.LittleEndian.PutUint16(toWrite[11:13], uint16(len(targets))) 202 for i, target := range targets { 203 offsetStart := 13 + i*8 204 offsetEnd := offsetStart + 8 205 binary.LittleEndian.PutUint64(toWrite[offsetStart:offsetEnd], target) 206 } 207 _, err := c.newLog.Write(toWrite) 208 return err 209 } 210 211 func (c *MemoryCondensor) AddLinkAtLevel(nodeid uint64, level uint16, target uint64) error { 212 ec := &errorcompounder.ErrorCompounder{} 213 ec.Add(c.writeCommitType(c.newLog, AddLinkAtLevel)) 214 ec.Add(c.writeUint64(c.newLog, nodeid)) 215 ec.Add(c.writeUint16(c.newLog, uint16(level))) 216 ec.Add(c.writeUint64(c.newLog, target)) 217 218 return ec.ToError() 219 } 220 221 func (c *MemoryCondensor) SetEntryPointWithMaxLayer(id uint64, level int) error { 222 ec := &errorcompounder.ErrorCompounder{} 223 ec.Add(c.writeCommitType(c.newLog, SetEntryPointMaxLevel)) 224 ec.Add(c.writeUint64(c.newLog, id)) 225 ec.Add(c.writeUint16(c.newLog, uint16(level))) 226 227 return ec.ToError() 228 } 229 230 func (c *MemoryCondensor) AddTombstone(nodeid uint64) error { 231 ec := &errorcompounder.ErrorCompounder{} 232 ec.Add(c.writeCommitType(c.newLog, AddTombstone)) 233 ec.Add(c.writeUint64(c.newLog, nodeid)) 234 235 return ec.ToError() 236 } 237 238 func (c *MemoryCondensor) AddPQ(data compressionhelpers.PQData) error { 239 toWrite := make([]byte, 10) 240 toWrite[0] = byte(AddPQ) 241 binary.LittleEndian.PutUint16(toWrite[1:3], data.Dimensions) 242 toWrite[3] = byte(data.EncoderType) 243 binary.LittleEndian.PutUint16(toWrite[4:6], data.Ks) 244 binary.LittleEndian.PutUint16(toWrite[6:8], data.M) 245 toWrite[8] = data.EncoderDistribution 246 if data.UseBitsEncoding { 247 toWrite[9] = 1 248 } else { 249 toWrite[9] = 0 250 } 251 252 for _, encoder := range data.Encoders { 253 toWrite = append(toWrite, encoder.ExposeDataForRestore()...) 254 } 255 _, err := c.newLog.Write(toWrite) 256 return err 257 } 258 259 func NewMemoryCondensor(logger logrus.FieldLogger) *MemoryCondensor { 260 return &MemoryCondensor{logger: logger} 261 }