github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor_mmap_analyzer.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "bufio" 16 "encoding/binary" 17 "io" 18 "os" 19 "sort" 20 21 "github.com/pkg/errors" 22 ) 23 24 type mmapIndex struct { 25 nodes []mmapIndexNode 26 connectionsPerLevel int 27 } 28 29 func (mi *mmapIndex) UpsertNodeMaxLevel(node uint64, level uint16) { 30 n := sort.Search(len(mi.nodes), func(a int) bool { 31 return mi.nodes[a].id >= node 32 }) 33 34 if n < len(mi.nodes) && mi.nodes[n].id == node { 35 // update 36 if mi.nodes[n].maxLevel < level { 37 mi.nodes[n].maxLevel = level 38 } 39 } else { 40 // insert 41 42 // See https://github.com/golang/go/wiki/SliceTricks#insert 43 mi.nodes = append(mi.nodes, mmapIndexNode{}) 44 copy(mi.nodes[n+1:], mi.nodes[n:]) 45 mi.nodes[n].id = node 46 mi.nodes[n].maxLevel = level 47 } 48 } 49 50 func (mi *mmapIndex) DeleteNode(node uint64) { 51 } 52 53 type mmapIndexNode struct { 54 id uint64 55 offset uint64 56 maxLevel uint16 57 } 58 59 func (n mmapIndexNode) Size(connectionsPerLevel int) int { 60 return int(n.maxLevel)*2 + // overhead for uint16 length indicators 61 connectionsPerLevel*int(n.maxLevel+1) // level 0 has 2x connections 62 } 63 64 type MmapCondensorAnalyzer struct { 65 reader *bufio.Reader 66 connectionsPerLevel int 67 index mmapIndex 68 } 69 70 func newMmapCondensorAnalyzer(connectionsPerLevel int) *MmapCondensorAnalyzer { 71 return &MmapCondensorAnalyzer{connectionsPerLevel: connectionsPerLevel} 72 } 73 74 func (a *MmapCondensorAnalyzer) Do(file *os.File) (mmapIndex, error) { 75 a.reader = bufio.NewReaderSize(file, 1024*1024) 76 77 a.index = mmapIndex{ 78 connectionsPerLevel: a.connectionsPerLevel, 79 nodes: make([]mmapIndexNode, 0, 10000), 80 } 81 82 if err := a.loop(); err != nil { 83 return a.index, err 84 } 85 86 return a.index, nil 87 } 88 89 func (a *MmapCondensorAnalyzer) loop() error { 90 for { 91 ct, err := a.ReadCommitType(a.reader) 92 if err != nil { 93 if errors.Is(err, io.EOF) { 94 break 95 } 96 97 return err 98 } 99 100 switch ct { 101 case AddNode: 102 err = a.ReadNode(a.reader) 103 case SetEntryPointMaxLevel: 104 err = a.ReadEP(a.reader) 105 case AddLinkAtLevel: 106 err = a.ReadLink(a.reader) 107 case ReplaceLinksAtLevel: 108 err = a.ReadLinks(a.reader) 109 case AddTombstone: 110 err = a.ReadAddTombstone(a.reader) 111 case RemoveTombstone: 112 err = a.ReadRemoveTombstone(a.reader) 113 case ClearLinks: 114 err = a.ReadClearLinks(a.reader) 115 case DeleteNode: 116 err = a.ReadDeleteNode(a.reader) 117 case ResetIndex: 118 a.index.nodes = make([]mmapIndexNode, 0, 10000) 119 default: 120 err = errors.Errorf("unrecognized commit type %d", ct) 121 } 122 if err != nil { 123 // do not return nil, err, because the err could be a recoverable one 124 return err 125 } 126 } 127 128 return nil 129 } 130 131 func (a *MmapCondensorAnalyzer) ReadNode(r io.Reader) error { 132 id, err := a.readUint64(r) 133 if err != nil { 134 return err 135 } 136 137 level, err := a.readUint16(r) 138 if err != nil { 139 return err 140 } 141 142 a.index.UpsertNodeMaxLevel(id, level) 143 return nil 144 } 145 146 func (a *MmapCondensorAnalyzer) ReadEP(r io.Reader) error { 147 // TODO: is this an issue because of bufio Read vs ReadFull? 148 _, err := io.CopyN(io.Discard, r, 10) 149 return err 150 } 151 152 func (a *MmapCondensorAnalyzer) ReadLink(r io.Reader) error { 153 source, err := a.readUint64(r) 154 if err != nil { 155 return err 156 } 157 158 level, err := a.readUint16(r) 159 if err != nil { 160 return err 161 } 162 163 // TODO: is this an issue because of bufio Read vs ReadFull? 164 _, err = io.CopyN(io.Discard, r, 8) 165 if err != nil { 166 return err 167 } 168 a.index.UpsertNodeMaxLevel(source, level) 169 170 return nil 171 } 172 173 func (a *MmapCondensorAnalyzer) ReadLinks(r io.Reader) error { 174 source, err := a.readUint64(r) 175 if err != nil { 176 return err 177 } 178 179 level, err := a.readUint16(r) 180 if err != nil { 181 return err 182 } 183 184 length, err := a.readUint16(r) 185 if err != nil { 186 return err 187 } 188 189 a.index.UpsertNodeMaxLevel(source, level) 190 191 // TODO: is this an issue because of bufio Read vs ReadFull? 192 _, err = io.CopyN(io.Discard, r, 8*int64(length)) 193 if err != nil { 194 return err 195 } 196 197 return nil 198 } 199 200 func (a *MmapCondensorAnalyzer) ReadAddTombstone(r io.Reader) error { 201 // TODO: is this an issue because of bufio Read vs ReadFull? 202 _, err := io.CopyN(io.Discard, r, 8) 203 return err 204 } 205 206 func (a *MmapCondensorAnalyzer) ReadRemoveTombstone(r io.Reader) error { 207 // TODO: is this an issue because of bufio Read vs ReadFull? 208 _, err := io.CopyN(io.Discard, r, 8) 209 return err 210 } 211 212 func (a *MmapCondensorAnalyzer) ReadClearLinks(r io.Reader) error { 213 // TODO: is this an issue because of bufio Read vs ReadFull? 214 _, err := io.CopyN(io.Discard, r, 8) 215 return err 216 } 217 218 func (a *MmapCondensorAnalyzer) ReadDeleteNode(r io.Reader) error { 219 id, err := a.readUint64(r) 220 if err != nil { 221 return err 222 } 223 224 a.index.DeleteNode(id) 225 return nil 226 } 227 228 func (a *MmapCondensorAnalyzer) readUint64(r io.Reader) (uint64, error) { 229 var value uint64 230 tmpBuf := make([]byte, 8) 231 _, err := io.ReadFull(r, tmpBuf) 232 if err != nil { 233 return 0, errors.Wrap(err, "failed to read uint64") 234 } 235 236 value = binary.LittleEndian.Uint64(tmpBuf) 237 238 return value, nil 239 } 240 241 func (a *MmapCondensorAnalyzer) readUint16(r io.Reader) (uint16, error) { 242 var value uint16 243 tmpBuf := make([]byte, 2) 244 _, err := io.ReadFull(r, tmpBuf) 245 if err != nil { 246 return 0, errors.Wrap(err, "failed to read uint16") 247 } 248 249 value = binary.LittleEndian.Uint16(tmpBuf) 250 251 return value, nil 252 } 253 254 func (a *MmapCondensorAnalyzer) ReadCommitType(r io.Reader) (HnswCommitType, error) { 255 tmpBuf := make([]byte, 1) 256 if _, err := io.ReadFull(r, tmpBuf); err != nil { 257 return 0, errors.Wrap(err, "failed to read commit type") 258 } 259 260 return HnswCommitType(tmpBuf[0]), nil 261 }