github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor_mmap.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "io" 16 "os" 17 18 "github.com/davecgh/go-spew/spew" 19 "github.com/pkg/errors" 20 ) 21 22 type MmapCondensor struct { 23 connectionsPerLevel int 24 } 25 26 func NewMmapCondensor(connectionsPerLevel int) *MmapCondensor { 27 return &MmapCondensor{connectionsPerLevel: connectionsPerLevel} 28 } 29 30 func (c *MmapCondensor) Do(fileName string) error { 31 fd, err := os.Open(fileName) 32 if err != nil { 33 return errors.Wrap(err, "open commit log to be condensed") 34 } 35 defer fd.Close() 36 37 index, err := c.analyze(fd) 38 if err != nil { 39 return errors.Wrap(err, "analyze commit log and build index") 40 } 41 42 index.calculateOffsets() 43 44 // "rewind" file so we can read it again, this time into the mmap file 45 if _, err := fd.Seek(0, io.SeekStart); err != nil { 46 return errors.Wrap(err, "rewind uncondensed") 47 } 48 49 if err := c.read(fd, index, fileName+".scratch.tmp"); err != nil { 50 return errors.Wrap(err, "read uncondensed into mmap file") 51 } 52 53 spew.Dump(index) 54 spew.Dump(index.Size()) 55 return nil 56 } 57 58 func (c *MmapCondensor) analyze(file *os.File) (mmapIndex, error) { 59 return newMmapCondensorAnalyzer(c.connectionsPerLevel).Do(file) 60 } 61 62 func (c *MmapCondensor) read(source *os.File, index mmapIndex, 63 targetName string, 64 ) error { 65 return newMmapCondensorReader().Do(source, index, targetName) 66 } 67 68 func (mi *mmapIndex) calculateOffsets() { 69 for i := range mi.nodes { 70 if i == 0 { 71 // offset for the first element is 0, nothing to do 72 continue 73 } 74 75 // we now have the guarantee that elem i-1 exists 76 mi.nodes[i].offset = mi.nodes[i-1].offset + uint64(mi.nodes[i-1].Size(mi.connectionsPerLevel)) 77 } 78 } 79 80 // Size can only return a useful result if offsets have been calculated prior 81 // to calling Size() 82 func (mi *mmapIndex) Size() int { 83 if len(mi.nodes) == 0 { 84 return -1 85 } 86 87 return int(mi.nodes[len(mi.nodes)-1].offset) + 88 mi.nodes[len(mi.nodes)-1].Size(mi.connectionsPerLevel) 89 }