github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/compactor_replace.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bufio" 16 "bytes" 17 "errors" 18 "fmt" 19 "io" 20 21 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 22 "github.com/weaviate/weaviate/entities/lsmkv" 23 ) 24 25 type compactorReplace struct { 26 // c1 is always the older segment, so when there is a conflict c2 wins 27 // (because of the replace strategy) 28 c1 *segmentCursorReplace 29 c2 *segmentCursorReplace 30 31 // the level matching those of the cursors 32 currentLevel uint16 33 // Tells if tombstones or keys without corresponding values 34 // can be removed from merged segment. 35 // (left segment is root (1st) one, keepTombstones is off for bucket) 36 cleanupTombstones bool 37 secondaryIndexCount uint16 38 39 w io.WriteSeeker 40 bufw *bufio.Writer 41 scratchSpacePath string 42 } 43 44 func newCompactorReplace(w io.WriteSeeker, 45 c1, c2 *segmentCursorReplace, level, secondaryIndexCount uint16, 46 scratchSpacePath string, cleanupTombstones bool, 47 ) *compactorReplace { 48 return &compactorReplace{ 49 c1: c1, 50 c2: c2, 51 w: w, 52 bufw: bufio.NewWriterSize(w, 256*1024), 53 currentLevel: level, 54 cleanupTombstones: cleanupTombstones, 55 secondaryIndexCount: secondaryIndexCount, 56 scratchSpacePath: scratchSpacePath, 57 } 58 } 59 60 func (c *compactorReplace) do() error { 61 if err := c.init(); err != nil { 62 return fmt.Errorf("init: %w", err) 63 } 64 65 kis, err := c.writeKeys() 66 if err != nil { 67 return fmt.Errorf("write keys: %w", err) 68 } 69 70 if err := c.writeIndices(kis); err != nil { 71 return fmt.Errorf("write indices: %w", err) 72 } 73 74 // flush buffered, so we can safely seek on underlying writer 75 if err := c.bufw.Flush(); err != nil { 76 return fmt.Errorf("flush buffered: %w", err) 77 } 78 79 var dataEnd uint64 = segmentindex.HeaderSize 80 if len(kis) > 0 { 81 dataEnd = uint64(kis[len(kis)-1].ValueEnd) 82 } 83 84 if err := c.writeHeader(c.currentLevel, 0, c.secondaryIndexCount, dataEnd); err != nil { 85 return fmt.Errorf("write header: %w", err) 86 } 87 88 return nil 89 } 90 91 func (c *compactorReplace) init() error { 92 // write a dummy header, we don't know the contents of the actual header yet, 93 // we will seek to the beginning and overwrite the actual header at the very 94 // end 95 96 if _, err := c.bufw.Write(make([]byte, segmentindex.HeaderSize)); err != nil { 97 return fmt.Errorf("write empty header: %w", err) 98 } 99 100 return nil 101 } 102 103 func (c *compactorReplace) writeKeys() ([]segmentindex.Key, error) { 104 res1, err1 := c.c1.firstWithAllKeys() 105 res2, err2 := c.c2.firstWithAllKeys() 106 107 // the (dummy) header was already written, this is our initial offset 108 offset := segmentindex.HeaderSize 109 110 var kis []segmentindex.Key 111 112 for { 113 if res1.primaryKey == nil && res2.primaryKey == nil { 114 break 115 } 116 if bytes.Equal(res1.primaryKey, res2.primaryKey) { 117 if !(c.cleanupTombstones && errors.Is(err2, lsmkv.Deleted)) { 118 ki, err := c.writeIndividualNode(offset, res2.primaryKey, res2.value, 119 res2.secondaryKeys, errors.Is(err2, lsmkv.Deleted)) 120 if err != nil { 121 return nil, fmt.Errorf("write individual node (equal keys): %w", err) 122 } 123 124 offset = ki.ValueEnd 125 kis = append(kis, ki) 126 } 127 // advance both! 128 res1, err1 = c.c1.nextWithAllKeys() 129 res2, err2 = c.c2.nextWithAllKeys() 130 continue 131 } 132 133 if (res1.primaryKey != nil && bytes.Compare(res1.primaryKey, res2.primaryKey) == -1) || res2.primaryKey == nil { 134 // key 1 is smaller 135 if !(c.cleanupTombstones && errors.Is(err1, lsmkv.Deleted)) { 136 ki, err := c.writeIndividualNode(offset, res1.primaryKey, res1.value, 137 res1.secondaryKeys, errors.Is(err1, lsmkv.Deleted)) 138 if err != nil { 139 return nil, fmt.Errorf("write individual node (res1.primaryKey smaller)") 140 } 141 142 offset = ki.ValueEnd 143 kis = append(kis, ki) 144 } 145 res1, err1 = c.c1.nextWithAllKeys() 146 } else { 147 // key 2 is smaller 148 if !(c.cleanupTombstones && errors.Is(err2, lsmkv.Deleted)) { 149 ki, err := c.writeIndividualNode(offset, res2.primaryKey, res2.value, 150 res2.secondaryKeys, errors.Is(err2, lsmkv.Deleted)) 151 if err != nil { 152 return nil, fmt.Errorf("write individual node (res2.primaryKey smaller): %w", err) 153 } 154 155 offset = ki.ValueEnd 156 kis = append(kis, ki) 157 } 158 res2, err2 = c.c2.nextWithAllKeys() 159 } 160 } 161 162 return kis, nil 163 } 164 165 func (c *compactorReplace) writeIndividualNode(offset int, key, value []byte, 166 secondaryKeys [][]byte, tombstone bool, 167 ) (segmentindex.Key, error) { 168 segNode := segmentReplaceNode{ 169 offset: offset, 170 tombstone: tombstone, 171 value: value, 172 primaryKey: key, 173 secondaryIndexCount: c.secondaryIndexCount, 174 secondaryKeys: secondaryKeys, 175 } 176 177 return segNode.KeyIndexAndWriteTo(c.bufw) 178 } 179 180 func (c *compactorReplace) writeIndices(keys []segmentindex.Key) error { 181 indices := &segmentindex.Indexes{ 182 Keys: keys, 183 SecondaryIndexCount: c.secondaryIndexCount, 184 ScratchSpacePath: c.scratchSpacePath, 185 } 186 187 _, err := indices.WriteTo(c.bufw) 188 return err 189 } 190 191 // writeHeader assumes that everything has been written to the underlying 192 // writer and it is now safe to seek to the beginning and override the initial 193 // header 194 func (c *compactorReplace) writeHeader(level, version, secondaryIndices uint16, 195 startOfIndex uint64, 196 ) error { 197 if _, err := c.w.Seek(0, io.SeekStart); err != nil { 198 return fmt.Errorf("seek to beginning to write header: %w", err) 199 } 200 201 h := &segmentindex.Header{ 202 Level: level, 203 Version: version, 204 SecondaryIndices: secondaryIndices, 205 Strategy: segmentindex.StrategyReplace, 206 IndexStart: startOfIndex, 207 } 208 209 if _, err := h.WriteTo(c.w); err != nil { 210 return err 211 } 212 213 return nil 214 }