github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_net_count_additions.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bytes" 16 "encoding/binary" 17 "errors" 18 "fmt" 19 "os" 20 "path/filepath" 21 "strings" 22 23 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 24 ) 25 26 // ErrInvalidChecksum indicates that the read file should not be trusted. For 27 // any pre-computed data this is a recoverable issue, as the data can simply be 28 // re-computed at read-time. 29 var ErrInvalidChecksum = errors.New("invalid checksum") 30 31 // existOnLowerSegments is a simple function that can be passed at segment 32 // initialization time to check if any of the keys are truly new or previously 33 // seen. This can in turn be used to build up the net count additions. The 34 // reason this is abstract: 35 type existsOnLowerSegmentsFn func(key []byte) (bool, error) 36 37 func (s *segment) countNetPath() string { 38 return countNetPathFromSegmentPath(s.path) 39 } 40 41 func countNetPathFromSegmentPath(segPath string) string { 42 extless := strings.TrimSuffix(segPath, filepath.Ext(segPath)) 43 return fmt.Sprintf("%s.cna", extless) 44 } 45 46 func (s *segment) initCountNetAdditions(exists existsOnLowerSegmentsFn, overwrite bool) error { 47 if s.strategy != segmentindex.StrategyReplace { 48 // replace is the only strategy that supports counting 49 return nil 50 } 51 52 path := s.countNetPath() 53 54 ok, err := fileExists(path) 55 if err != nil { 56 return err 57 } 58 59 if ok { 60 if overwrite { 61 err := os.Remove(path) 62 if err != nil { 63 return fmt.Errorf("delete existing net additions counter %s: %w", path, err) 64 } 65 } else { 66 err = s.loadCountNetFromDisk() 67 if err == nil { 68 return nil 69 } 70 71 if !errors.Is(err, ErrInvalidChecksum) { 72 // not a recoverable error 73 return err 74 } 75 76 // now continue re-calculating 77 } 78 } 79 80 var lastErr error 81 countNet := 0 82 cb := func(key []byte, tombstone bool) { 83 existedOnPrior, err := exists(key) 84 if err != nil { 85 lastErr = err 86 } 87 88 if tombstone && existedOnPrior { 89 countNet-- 90 } 91 92 if !tombstone && !existedOnPrior { 93 countNet++ 94 } 95 } 96 97 extr := newBufferedKeyAndTombstoneExtractor(s.contents, s.dataStartPos, 98 s.dataEndPos, 10e6, s.secondaryIndexCount, cb) 99 100 extr.do() 101 102 s.countNetAdditions = countNet 103 104 if lastErr != nil { 105 return lastErr 106 } 107 108 if err := s.storeCountNetOnDisk(); err != nil { 109 return fmt.Errorf("store count net additions on disk: %w", err) 110 } 111 112 return nil 113 } 114 115 func (s *segment) storeCountNetOnDisk() error { 116 return storeCountNetOnDisk(s.countNetPath(), s.countNetAdditions) 117 } 118 119 func storeCountNetOnDisk(path string, value int) error { 120 buf := new(bytes.Buffer) 121 122 if err := binary.Write(buf, binary.LittleEndian, uint64(value)); err != nil { 123 return fmt.Errorf("write cna to buf: %w", err) 124 } 125 126 return writeWithChecksum(buf.Bytes(), path) 127 } 128 129 func (s *segment) loadCountNetFromDisk() error { 130 data, err := loadWithChecksum(s.countNetPath(), 12) 131 if err != nil { 132 return err 133 } 134 135 s.countNetAdditions = int(binary.LittleEndian.Uint64(data[0:8])) 136 137 return nil 138 } 139 140 func (s *segment) precomputeCountNetAdditions(updatedCountNetAdditions int) ([]string, error) { 141 if s.strategy != segmentindex.StrategyReplace { 142 // only "replace" has count net additions, so we are done 143 return []string{}, nil 144 } 145 146 cnaPath := fmt.Sprintf("%s.tmp", s.countNetPath()) 147 if err := storeCountNetOnDisk(cnaPath, updatedCountNetAdditions); err != nil { 148 return nil, err 149 } 150 return []string{cnaPath}, nil 151 }