github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/compactor_set.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bufio" 16 "bytes" 17 "io" 18 19 "github.com/pkg/errors" 20 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 21 ) 22 23 type compactorSet struct { 24 // c1 is always the older segment, so when there is a conflict c2 wins 25 // (because of the replace strategy) 26 c1 *segmentCursorCollection 27 c2 *segmentCursorCollection 28 29 // the level matching those of the cursors 30 currentLevel uint16 31 secondaryIndexCount uint16 32 // Tells if tombstones or keys without corresponding values 33 // can be removed from merged segment. 34 // (left segment is root (1st) one, keepTombstones is off for bucket) 35 cleanupTombstones bool 36 37 w io.WriteSeeker 38 bufw *bufio.Writer 39 40 scratchSpacePath string 41 } 42 43 func newCompactorSetCollection(w io.WriteSeeker, 44 c1, c2 *segmentCursorCollection, level, secondaryIndexCount uint16, 45 scratchSpacePath string, cleanupTombstones bool, 46 ) *compactorSet { 47 return &compactorSet{ 48 c1: c1, 49 c2: c2, 50 w: w, 51 bufw: bufio.NewWriterSize(w, 256*1024), 52 currentLevel: level, 53 cleanupTombstones: cleanupTombstones, 54 secondaryIndexCount: secondaryIndexCount, 55 scratchSpacePath: scratchSpacePath, 56 } 57 } 58 59 func (c *compactorSet) do() error { 60 if err := c.init(); err != nil { 61 return errors.Wrap(err, "init") 62 } 63 64 kis, err := c.writeKeys() 65 if err != nil { 66 return errors.Wrap(err, "write keys") 67 } 68 69 if err := c.writeIndices(kis); err != nil { 70 return errors.Wrap(err, "write index") 71 } 72 73 // flush buffered, so we can safely seek on underlying writer 74 if err := c.bufw.Flush(); err != nil { 75 return errors.Wrap(err, "flush buffered") 76 } 77 78 var dataEnd uint64 = segmentindex.HeaderSize 79 if len(kis) > 0 { 80 dataEnd = uint64(kis[len(kis)-1].ValueEnd) 81 } 82 83 if err := c.writeHeader(c.currentLevel, 0, c.secondaryIndexCount, 84 dataEnd); err != nil { 85 return errors.Wrap(err, "write header") 86 } 87 88 return nil 89 } 90 91 func (c *compactorSet) init() error { 92 // write a dummy header, we don't know the contents of the actual header yet, 93 // we will seek to the beginning and overwrite the actual header at the very 94 // end 95 96 if _, err := c.bufw.Write(make([]byte, segmentindex.HeaderSize)); err != nil { 97 return errors.Wrap(err, "write empty header") 98 } 99 100 return nil 101 } 102 103 func (c *compactorSet) writeKeys() ([]segmentindex.Key, error) { 104 key1, value1, _ := c.c1.first() 105 key2, value2, _ := c.c2.first() 106 107 // the (dummy) header was already written, this is our initial offset 108 offset := segmentindex.HeaderSize 109 110 var kis []segmentindex.Key 111 112 for { 113 if key1 == nil && key2 == nil { 114 break 115 } 116 if bytes.Equal(key1, key2) { 117 values := append(value1, value2...) 118 valuesMerged := newSetDecoder().DoPartial(values) 119 if values, skip := c.cleanupValues(valuesMerged); !skip { 120 ki, err := c.writeIndividualNode(offset, key2, values) 121 if err != nil { 122 return nil, errors.Wrap(err, "write individual node (equal keys)") 123 } 124 125 offset = ki.ValueEnd 126 kis = append(kis, ki) 127 } 128 // advance both! 129 key1, value1, _ = c.c1.next() 130 key2, value2, _ = c.c2.next() 131 continue 132 } 133 134 if (key1 != nil && bytes.Compare(key1, key2) == -1) || key2 == nil { 135 // key 1 is smaller 136 if values, skip := c.cleanupValues(value1); !skip { 137 ki, err := c.writeIndividualNode(offset, key1, values) 138 if err != nil { 139 return nil, errors.Wrap(err, "write individual node (key1 smaller)") 140 } 141 142 offset = ki.ValueEnd 143 kis = append(kis, ki) 144 } 145 key1, value1, _ = c.c1.next() 146 } else { 147 // key 2 is smaller 148 if values, skip := c.cleanupValues(value2); !skip { 149 ki, err := c.writeIndividualNode(offset, key2, values) 150 if err != nil { 151 return nil, errors.Wrap(err, "write individual node (key2 smaller)") 152 } 153 154 offset = ki.ValueEnd 155 kis = append(kis, ki) 156 } 157 key2, value2, _ = c.c2.next() 158 } 159 } 160 161 return kis, nil 162 } 163 164 func (c *compactorSet) writeIndividualNode(offset int, key []byte, 165 values []value, 166 ) (segmentindex.Key, error) { 167 return (&segmentCollectionNode{ 168 values: values, 169 primaryKey: key, 170 offset: offset, 171 }).KeyIndexAndWriteTo(c.bufw) 172 } 173 174 func (c *compactorSet) writeIndices(keys []segmentindex.Key) error { 175 indices := &segmentindex.Indexes{ 176 Keys: keys, 177 SecondaryIndexCount: c.secondaryIndexCount, 178 ScratchSpacePath: c.scratchSpacePath, 179 } 180 181 _, err := indices.WriteTo(c.bufw) 182 return err 183 } 184 185 // writeHeader assumes that everything has been written to the underlying 186 // writer and it is now safe to seek to the beginning and override the initial 187 // header 188 func (c *compactorSet) writeHeader(level, version, secondaryIndices uint16, 189 startOfIndex uint64, 190 ) error { 191 if _, err := c.w.Seek(0, io.SeekStart); err != nil { 192 return errors.Wrap(err, "seek to beginning to write header") 193 } 194 195 h := &segmentindex.Header{ 196 Level: level, 197 Version: version, 198 SecondaryIndices: secondaryIndices, 199 Strategy: segmentindex.StrategySetCollection, 200 IndexStart: startOfIndex, 201 } 202 203 if _, err := h.WriteTo(c.w); err != nil { 204 return err 205 } 206 207 return nil 208 } 209 210 // Removes values with tombstone set from input slice. Output slice may be smaller than input one. 211 // Returned skip of true means there are no values left (key can be omitted in segment) 212 // WARN: method can alter input slice by swapping its elements and reducing length (not capacity) 213 func (c *compactorSet) cleanupValues(values []value) (vals []value, skip bool) { 214 if !c.cleanupTombstones { 215 return values, false 216 } 217 218 // Reuse input slice not to allocate new memory 219 // Rearrange slice in a way that tombstoned values are moved to the end 220 // and reduce slice's length. 221 last := 0 222 for i := 0; i < len(values); i++ { 223 if !values[i].tombstone { 224 // Swap both elements instead overwritting `last` by `i`. 225 // Overwrite would result in `values[last].value` pointing to the same slice 226 // as `values[i].value`. 227 // If `values` slice is reused by multiple nodes (as it happens for map cursors 228 // `segmentCursorCollectionReusable` using `segmentCollectionNode` as buffer) 229 // populating values[i].value would overwrite values[last].value 230 // Swaps makes sure values[i].value and values[last].value point to different slices 231 values[last], values[i] = values[i], values[last] 232 last++ 233 } 234 } 235 236 if last == 0 { 237 return nil, true 238 } 239 return values[:last], false 240 }