github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/compactor.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package roaringset 13 14 import ( 15 "bufio" 16 "bytes" 17 "fmt" 18 "io" 19 20 "github.com/pkg/errors" 21 "github.com/weaviate/sroar" 22 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 23 ) 24 25 // Compactor takes in a left and a right segment and merges them into a single 26 // segment. The input segments are represented by cursors without their 27 // respective segmentindexes. A new segmentindex is built from the merged nodes 28 // without taking the old indexes into account at all. 29 // 30 // The left segment must precede the right one in its creation time, as the 31 // compactor applies latest-takes-presence rules when there is a conflict. 32 // 33 // # Merging independent key/value pairs 34 // 35 // The new segment's nodes will be in sorted fashion (this is a requirement for 36 // the segment index and segment cursors to function). To achieve a sorted end 37 // result, the Compactor goes over both input cursors simultaneously and always 38 // works on the smaller of the two keys. After a key/value pair has been added 39 // to the output only the input cursor that provided the pair is advanced. 40 // 41 // # Merging key/value pairs with identical keys 42 // 43 // When both segment have a key/value pair with an overlapping key, the value 44 // has to be merged. The merge logic is not part of the compactor itself. 45 // Instead it makes use of [BitmapLayers.Merge]. 46 // 47 // # Exit Criterium 48 // 49 // When both cursors no longer return values, all key/value pairs are 50 // considered compacted. The compactor then deals with metadata. 51 // 52 // # Index and Header metadata 53 // 54 // Only once the key/value pairs have been compacted, will the compactor write 55 // the primary index based on the new key/value payload. Finally, the input 56 // writer is rewinded to be able to write the header metadata at the beginning 57 // of the file. Because of this, the input writer must be an [io.WriteSeeker], 58 // such as [*os.File]. 59 // 60 // The level of the resulting segment is the input level increased by one. 61 // Levels help the "eligible for compaction" cycle to find suitable compaction 62 // pairs. 63 type Compactor struct { 64 left, right *SegmentCursor 65 currentLevel uint16 66 // Tells if deletions or keys without corresponding values 67 // can be removed from merged segment. 68 // (left segment is root (1st) one, keepTombstones is off for bucket) 69 cleanupDeletions bool 70 71 w io.WriteSeeker 72 bufw *bufio.Writer 73 74 scratchSpacePath string 75 } 76 77 // NewCompactor from left (older) and right (newer) seeker. See [Compactor] for 78 // an explanation of what goes on under the hood, and why the input 79 // requirements are the way they are. 80 func NewCompactor(w io.WriteSeeker, 81 left, right *SegmentCursor, level uint16, 82 scratchSpacePath string, cleanupDeletions bool, 83 ) *Compactor { 84 return &Compactor{ 85 left: left, 86 right: right, 87 w: w, 88 bufw: bufio.NewWriterSize(w, 256*1024), 89 currentLevel: level, 90 cleanupDeletions: cleanupDeletions, 91 scratchSpacePath: scratchSpacePath, 92 } 93 } 94 95 // Do starts a compaction. See [Compactor] for an explanation of this process. 96 func (c *Compactor) Do() error { 97 if err := c.init(); err != nil { 98 return fmt.Errorf("init: %w", err) 99 } 100 101 kis, err := c.writeNodes() 102 if err != nil { 103 return fmt.Errorf("write keys: %w", err) 104 } 105 106 if err := c.writeIndexes(kis); err != nil { 107 return fmt.Errorf("write index: %w", err) 108 } 109 110 // flush buffered, so we can safely seek on underlying writer 111 if err := c.bufw.Flush(); err != nil { 112 return fmt.Errorf("flush buffered: %w", err) 113 } 114 115 var dataEnd uint64 = segmentindex.HeaderSize 116 if len(kis) > 0 { 117 dataEnd = uint64(kis[len(kis)-1].ValueEnd) 118 } 119 120 if err := c.writeHeader(c.currentLevel, 0, 0, 121 dataEnd); err != nil { 122 return fmt.Errorf("write header: %w", err) 123 } 124 125 return nil 126 } 127 128 func (c *Compactor) init() error { 129 // write a dummy header, we don't know the contents of the actual header yet, 130 // we will seek to the beginning and overwrite the actual header at the very 131 // end 132 133 if _, err := c.bufw.Write(make([]byte, segmentindex.HeaderSize)); err != nil { 134 return errors.Wrap(err, "write empty header") 135 } 136 137 return nil 138 } 139 140 // nodeCompactor is a helper type to improve the code structure of merging 141 // nodes in a compaction 142 type nodeCompactor struct { 143 left, right *SegmentCursor 144 keyLeft, keyRight []byte 145 valueLeft, valueRight BitmapLayer 146 output []segmentindex.Key 147 offset int 148 bufw *bufio.Writer 149 150 cleanupDeletions bool 151 emptyBitmap *sroar.Bitmap 152 } 153 154 func (c *Compactor) writeNodes() ([]segmentindex.Key, error) { 155 nc := &nodeCompactor{ 156 left: c.left, 157 right: c.right, 158 bufw: c.bufw, 159 cleanupDeletions: c.cleanupDeletions, 160 emptyBitmap: sroar.NewBitmap(), 161 } 162 163 nc.init() 164 165 if err := nc.loopThroughKeys(); err != nil { 166 return nil, err 167 } 168 169 return nc.output, nil 170 } 171 172 func (c *nodeCompactor) init() { 173 c.keyLeft, c.valueLeft, _ = c.left.First() 174 c.keyRight, c.valueRight, _ = c.right.First() 175 176 // the (dummy) header was already written, this is our initial offset 177 c.offset = segmentindex.HeaderSize 178 } 179 180 func (c *nodeCompactor) loopThroughKeys() error { 181 for { 182 if c.keyLeft == nil && c.keyRight == nil { 183 return nil 184 } 185 186 if c.keysEqual() { 187 if err := c.mergeIdenticalKeys(); err != nil { 188 return err 189 } 190 } else if c.leftKeySmallerOrRightNotSet() { 191 if err := c.takeLeftKey(); err != nil { 192 return err 193 } 194 } else { 195 if err := c.takeRightKey(); err != nil { 196 return err 197 } 198 } 199 } 200 } 201 202 func (c *nodeCompactor) keysEqual() bool { 203 return bytes.Equal(c.keyLeft, c.keyRight) 204 } 205 206 func (c *nodeCompactor) leftKeySmallerOrRightNotSet() bool { 207 return (c.keyLeft != nil && bytes.Compare(c.keyLeft, c.keyRight) == -1) || c.keyRight == nil 208 } 209 210 func (c *nodeCompactor) mergeIdenticalKeys() error { 211 layers := BitmapLayers{ 212 {Additions: c.valueLeft.Additions, Deletions: c.valueLeft.Deletions}, 213 {Additions: c.valueRight.Additions, Deletions: c.valueRight.Deletions}, 214 } 215 merged, err := layers.Merge() 216 if err != nil { 217 return fmt.Errorf("merge bitmap layers for identical keys: %w", err) 218 } 219 220 if additions, deletions, skip := c.cleanupValues(merged.Additions, merged.Deletions); !skip { 221 sn, err := NewSegmentNode(c.keyRight, additions, deletions) 222 if err != nil { 223 return fmt.Errorf("new segment node for merged key: %w", err) 224 } 225 226 ki, err := sn.KeyIndexAndWriteTo(c.bufw, c.offset) 227 if err != nil { 228 return fmt.Errorf("write individual node (merged key): %w", err) 229 } 230 231 c.offset = ki.ValueEnd 232 c.output = append(c.output, ki) 233 } 234 235 // advance both! 236 c.keyLeft, c.valueLeft, _ = c.left.Next() 237 c.keyRight, c.valueRight, _ = c.right.Next() 238 return nil 239 } 240 241 func (c *nodeCompactor) takeLeftKey() error { 242 if additions, deletions, skip := c.cleanupValues(c.valueLeft.Additions, c.valueLeft.Deletions); !skip { 243 sn, err := NewSegmentNode(c.keyLeft, additions, deletions) 244 if err != nil { 245 return fmt.Errorf("new segment node for left key: %w", err) 246 } 247 248 ki, err := sn.KeyIndexAndWriteTo(c.bufw, c.offset) 249 if err != nil { 250 return fmt.Errorf("write individual node (left key): %w", err) 251 } 252 253 c.offset = ki.ValueEnd 254 c.output = append(c.output, ki) 255 } 256 257 c.keyLeft, c.valueLeft, _ = c.left.Next() 258 return nil 259 } 260 261 func (c *nodeCompactor) takeRightKey() error { 262 if additions, deletions, skip := c.cleanupValues(c.valueRight.Additions, c.valueRight.Deletions); !skip { 263 sn, err := NewSegmentNode(c.keyRight, additions, deletions) 264 if err != nil { 265 return fmt.Errorf("new segment node for right key: %w", err) 266 } 267 268 ki, err := sn.KeyIndexAndWriteTo(c.bufw, c.offset) 269 if err != nil { 270 return fmt.Errorf("write individual node (right key): %w", err) 271 } 272 273 c.offset = ki.ValueEnd 274 c.output = append(c.output, ki) 275 } 276 277 c.keyRight, c.valueRight, _ = c.right.Next() 278 return nil 279 } 280 281 func (c *nodeCompactor) cleanupValues(additions, deletions *sroar.Bitmap, 282 ) (add, del *sroar.Bitmap, skip bool) { 283 if !c.cleanupDeletions { 284 return additions, deletions, false 285 } 286 if !additions.IsEmpty() { 287 return additions, c.emptyBitmap, false 288 } 289 return nil, nil, true 290 } 291 292 func (c *Compactor) writeIndexes(keys []segmentindex.Key) error { 293 indexes := &segmentindex.Indexes{ 294 Keys: keys, 295 SecondaryIndexCount: 0, 296 ScratchSpacePath: c.scratchSpacePath, 297 } 298 299 _, err := indexes.WriteTo(c.bufw) 300 return err 301 } 302 303 // writeHeader assumes that everything has been written to the underlying 304 // writer and it is now safe to seek to the beginning and override the initial 305 // header 306 func (c *Compactor) writeHeader(level, version, secondaryIndices uint16, 307 startOfIndex uint64, 308 ) error { 309 if _, err := c.w.Seek(0, io.SeekStart); err != nil { 310 return errors.Wrap(err, "seek to beginning to write header") 311 } 312 313 h := &segmentindex.Header{ 314 Level: level, 315 Version: version, 316 SecondaryIndices: secondaryIndices, 317 Strategy: segmentindex.StrategyRoaringSet, 318 IndexStart: startOfIndex, 319 } 320 321 if _, err := h.WriteTo(c.w); err != nil { 322 return err 323 } 324 325 return nil 326 }