github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_precompute_for_compaction.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bytes" 16 "fmt" 17 "os" 18 "strings" 19 20 "github.com/edsrzf/mmap-go" 21 "github.com/pkg/errors" 22 "github.com/sirupsen/logrus" 23 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 24 ) 25 26 // preComputeSegmentMeta has no side-effects for an already running store. As a 27 // result this can be run without the need to obtain any locks. All files 28 // created will have a .tmp suffix so they don't interfere with existing 29 // segments that might have a similar name. 30 func preComputeSegmentMeta(path string, updatedCountNetAdditions int, 31 logger logrus.FieldLogger, useBloomFilter bool, calcCountNetAdditions bool, 32 ) ([]string, error) { 33 out := []string{path} 34 35 // as a guardrail validate that the segment is considered a .tmp segment. 36 // This way we can be sure that we're not accidentally operating on a live 37 // segment as the segment group completely ignores .tmp segment files 38 if !strings.HasSuffix(path, ".tmp") { 39 return nil, fmt.Errorf("pre computing a segment expects a .tmp segment path") 40 } 41 42 file, err := os.Open(path) 43 if err != nil { 44 return nil, fmt.Errorf("open file: %w", err) 45 } 46 defer file.Close() 47 48 fileInfo, err := file.Stat() 49 if err != nil { 50 return nil, fmt.Errorf("stat file: %w", err) 51 } 52 53 contents, err := mmap.MapRegion(file, int(fileInfo.Size()), mmap.RDONLY, 0, 0) 54 if err != nil { 55 return nil, fmt.Errorf("mmap file: %w", err) 56 } 57 58 defer contents.Unmap() 59 60 header, err := segmentindex.ParseHeader(bytes.NewReader(contents[:segmentindex.HeaderSize])) 61 if err != nil { 62 return nil, fmt.Errorf("parse header: %w", err) 63 } 64 65 switch header.Strategy { 66 case segmentindex.StrategyReplace, segmentindex.StrategySetCollection, 67 segmentindex.StrategyMapCollection, segmentindex.StrategyRoaringSet: 68 default: 69 return nil, fmt.Errorf("unsupported strategy in segment") 70 } 71 72 primaryIndex, err := header.PrimaryIndex(contents) 73 if err != nil { 74 return nil, fmt.Errorf("extract primary index position: %w", err) 75 } 76 77 primaryDiskIndex := segmentindex.NewDiskTree(primaryIndex) 78 79 seg := &segment{ 80 level: header.Level, 81 // trim the .tmp suffix to make sure the naming rules for the files we 82 // pre-compute later on still apply they will in turn be suffixed with 83 // .tmp, but that is supposed to be the end of the file. if we didn't trim 84 // the path here, we would end up with filenames like 85 // segment.tmp.bloom.tmp, whereas we want to end up with segment.bloom.tmp 86 path: strings.TrimSuffix(path, ".tmp"), 87 contents: contents, 88 contentFile: file, 89 version: header.Version, 90 secondaryIndexCount: header.SecondaryIndices, 91 segmentStartPos: header.IndexStart, 92 segmentEndPos: uint64(fileInfo.Size()), 93 strategy: header.Strategy, 94 dataStartPos: segmentindex.HeaderSize, // fixed value that's the same for all strategies 95 dataEndPos: header.IndexStart, 96 index: primaryDiskIndex, 97 logger: logger, 98 useBloomFilter: useBloomFilter, 99 calcCountNetAdditions: calcCountNetAdditions, 100 } 101 102 if seg.secondaryIndexCount > 0 { 103 seg.secondaryIndices = make([]diskIndex, seg.secondaryIndexCount) 104 for i := range seg.secondaryIndices { 105 secondary, err := header.SecondaryIndex(contents, uint16(i)) 106 if err != nil { 107 return nil, errors.Wrapf(err, "get position for secondary index at %d", i) 108 } 109 seg.secondaryIndices[i] = segmentindex.NewDiskTree(secondary) 110 } 111 } 112 113 if seg.useBloomFilter { 114 files, err := seg.precomputeBloomFilters() 115 if err != nil { 116 return nil, err 117 } 118 out = append(out, files...) 119 } 120 if seg.calcCountNetAdditions { 121 files, err := seg.precomputeCountNetAdditions(updatedCountNetAdditions) 122 if err != nil { 123 return nil, err 124 } 125 out = append(out, files...) 126 } 127 128 return out, nil 129 }