kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/beamio/leveldb.go (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package beamio 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/binary" 23 "fmt" 24 "io" 25 "path/filepath" 26 "reflect" 27 "sort" 28 "strings" 29 "time" 30 31 "kythe.io/kythe/go/util/log" 32 33 "github.com/apache/beam/sdks/go/pkg/beam" 34 "github.com/apache/beam/sdks/go/pkg/beam/io/filesystem" 35 "github.com/apache/beam/sdks/go/pkg/beam/transforms/stats" 36 "github.com/syndtr/goleveldb/leveldb/comparer" 37 "github.com/syndtr/goleveldb/leveldb/journal" 38 "github.com/syndtr/goleveldb/leveldb/opt" 39 "github.com/syndtr/goleveldb/leveldb/table" 40 ) 41 42 func init() { 43 beam.RegisterType(reflect.TypeOf((*writeManifest)(nil)).Elem()) 44 beam.RegisterType(reflect.TypeOf((*writeTable)(nil)).Elem()) 45 beam.RegisterFunction(keyByKey) 46 beam.RegisterFunction(distinctCombine) 47 } 48 49 // WriteLevelDB writes a set of PCollections containing KVs to a new LevelDB at 50 // the given path. Each KV is serialized and stored as a single LevelDB 51 // key-value entry according to their enclosing PCollection's beam.Coder. Each 52 // table may have different KV types. Keys must be unique across all 53 // PCollections. 54 func WriteLevelDB(s beam.Scope, path string, opts stats.Opts, tables ...beam.PCollection) { 55 filesystem.ValidateScheme(path) 56 s = s.Scope("WriteLevelDB") 57 58 tableMetadata := writeShards(s, path, opts, tables...) 59 60 // Write all SSTable metadata to the LevelDB's MANIFEST journal. 61 s = s.Scope("Manifest") 62 beam.ParDo(s, &writeManifest{Path: path}, beam.GroupByKey(s, beam.AddFixedKey(s, tableMetadata))) 63 } 64 65 func writeShards(s beam.Scope, path string, opts stats.Opts, tables ...beam.PCollection) beam.PCollection { 66 s = s.Scope("Shards") 67 68 encoded := EncodeKeyValues(s, tables...) 69 70 // Group each key-value by a shard number based on its key's byte encoding. 71 shards := beam.GroupByKey(s, ComputeShards(s, makeDistinct(s, encoded), opts)) 72 73 // Write each shard to a separate SSTable. The resulting PCollection contains 74 // each SSTable's metadata (*tableMetadata). 75 return beam.ParDo(s, &writeTable{path}, shards) 76 } 77 78 func keyByKey(kv KeyValue) ([]byte, KeyValue) { 79 return kv.Key, kv 80 } 81 82 func makeDistinct(s beam.Scope, kvs beam.PCollection) beam.PCollection { 83 return beam.DropKey(s, beam.CombinePerKey(s, distinctCombine, beam.ParDo(s, keyByKey, kvs))) 84 } 85 86 func distinctCombine(ctx context.Context, accum, other KeyValue) KeyValue { 87 if accum.Key == nil { 88 return other 89 } 90 duplicateLevelDBKeysCounter.Inc(ctx, 1) 91 if !bytes.Equal(accum.Value, other.Value) { 92 conflictingLevelDBValuesCounter.Inc(ctx, 1) 93 } 94 return accum 95 } 96 97 type writeManifest struct{ Path string } 98 99 type fsFile struct { 100 io.WriteCloser 101 fs filesystem.Interface 102 } 103 104 // Close implements part of the io.WriteCloser interface. It closes both the 105 // file and underlying filesystem. 106 func (f *fsFile) Close() error { 107 fErr := f.WriteCloser.Close() 108 fsErr := f.fs.Close() 109 if fErr != nil { 110 return fErr 111 } 112 return fsErr 113 } 114 115 func openWrite(ctx context.Context, path string) (io.WriteCloser, error) { 116 fs, err := filesystem.New(ctx, path) 117 if err != nil { 118 return nil, err 119 } 120 f, err := fs.OpenWrite(ctx, path) 121 if err != nil { 122 return nil, err 123 } 124 return &fsFile{f, fs}, nil 125 } 126 127 // Constants used as IDs for LevelDB journal entries. 128 const ( 129 manifestCompararerNum = 1 130 manifestCurrentJournalNum = 2 131 manifestNextFileNum = 3 132 manifestLastCompactionNum = 4 133 manifestAddedTableNum = 7 134 ) 135 136 // ProcessElement combines all tableMetadata into LevelDB's journal format and 137 // writes the database's CURRENT manifest file. It returns the maximum shard 138 // number processed. 139 func (w *writeManifest) ProcessElement(ctx context.Context, _ beam.T, e func(*tableMetadata) bool) (int, error) { 140 const manifestName = "MANIFEST-000000" 141 defer func(start time.Time) { log.InfoContextf(ctx, "Manifest written in %s", time.Since(start)) }(time.Now()) 142 143 // Write the CURRENT manifest to the 0'th LevelDB file. 144 f, err := openWrite(ctx, schemePreservingPathJoin(w.Path, manifestName)) 145 if err != nil { 146 return 0, err 147 } 148 149 journals := journal.NewWriter(f) 150 j, err := journals.Next() 151 if err != nil { 152 return 0, err 153 } 154 155 // Comparer 156 putUvarint(j, manifestCompararerNum) 157 putBytes(j, []byte(keyComparer{}.Name())) 158 159 // Current journal 160 putUvarint(j, manifestCurrentJournalNum) 161 putUvarint(j, 0) // MANIFEST-000000 162 163 // Added table entry 164 var maxShard, maxSeq int 165 var md tableMetadata 166 for e(&md) { 167 putUvarint(j, manifestAddedTableNum) 168 putUvarint(j, 0) // all SSTables are level-0 169 putUvarint(j, uint64(md.Shard)) 170 putUvarint(j, uint64(md.Size)) 171 putBytes(j, md.First) 172 putBytes(j, md.Last) 173 174 // Keep track of the last shard num and maximum sequence number. 175 if md.Shard > maxShard { 176 maxShard = md.Shard 177 } 178 if md.Seq > maxSeq { 179 maxSeq = md.Seq 180 } 181 } 182 183 // Next available file entry 184 putUvarint(j, manifestNextFileNum) 185 putUvarint(j, uint64(maxShard+1)) 186 187 // Last compaction sequence 188 putUvarint(j, manifestLastCompactionNum) 189 putUvarint(j, uint64(maxSeq)) 190 191 if err := journals.Close(); err != nil { 192 return 0, err 193 } else if err := f.Close(); err != nil { 194 return 0, err 195 } 196 197 // Write the CURRENT pointer to the freshly written manifest file. 198 currentFile, err := openWrite(ctx, schemePreservingPathJoin(w.Path, "CURRENT")) 199 if err != nil { 200 return 0, err 201 } else if _, err := io.WriteString(currentFile, manifestName+"\n"); err != nil { 202 return 0, err 203 } else if err := currentFile.Close(); err != nil { 204 return 0, err 205 } 206 207 return maxShard, nil 208 } 209 210 // putUvarint writes x as a varint to w. 211 func putUvarint(w io.Writer, x uint64) error { 212 buf := make([]byte, binary.MaxVarintLen64) 213 n := binary.PutUvarint(buf, x) 214 _, err := w.Write(buf[:n]) 215 return err 216 } 217 218 // putBytes writes a varint-prefixed buffer to w. 219 func putBytes(w io.Writer, b []byte) error { 220 if err := putUvarint(w, uint64(len(b))); err != nil { 221 return err 222 } 223 _, err := w.Write(b) 224 return err 225 } 226 227 type writeTable struct{ Path string } 228 229 // tableMetadata represents a single SSTable within a LevelDB. Each SSTable 230 // written by the LevelDB sink is a level-0 table (meaning that its key ranges 231 // can overlap with another SSTable's). 232 type tableMetadata struct { 233 // Shard is the table's identifying number. 234 Shard int 235 236 // First/Last are the first and last keys in the table. 237 First, Last []byte 238 239 // Size is the byte size of the encoded table. 240 Size int 241 242 // Seq is the last used sequence number in the table. 243 Seq int 244 } 245 246 var ( 247 duplicateLevelDBKeysCounter = beam.NewCounter("kythe.beamio.leveldb", "duplicate-keys") 248 conflictingLevelDBValuesCounter = beam.NewCounter("kythe.beamio.leveldb", "conflicting-values") 249 ) 250 251 const schemaSeparator = "://" 252 253 // schemePreservingPathJoin is like filepath.Join, but doesn't collapse 254 // the double-slash in the schema prefix, if any. 255 func schemePreservingPathJoin(p, f string) string { 256 parts := strings.SplitN(p, schemaSeparator, 2) 257 if len(parts) == 2 { 258 return parts[0] + schemaSeparator + filepath.Join(parts[1], f) 259 } 260 return filepath.Join(p, f) 261 } 262 263 // ProcessElement writes a set of KeyValues to the an SSTable per shard. Shards 264 // should be small enough to fit into memory so that they can be sorted. 265 // TODO(BEAM-4405): use SortValues extension to remove in-memory requirement 266 func (w *writeTable) ProcessElement(ctx context.Context, shard int, kvIter func(*KeyValue) bool, emit func(tableMetadata)) error { 267 opts := &opt.Options{ 268 BlockSize: 5 * opt.MiB, 269 Comparer: keyComparer{}, 270 } 271 272 var totalElements int 273 defer func(start time.Time) { 274 log.InfoContextf(ctx, "Shard %04d: %s (size: %d)", shard, time.Since(start), totalElements) 275 }(time.Now()) 276 md := tableMetadata{Shard: shard + 1} 277 278 var els []KeyValue 279 var kv KeyValue 280 for kvIter(&kv) { 281 els = append(els, kv) 282 } 283 sort.Slice(els, func(i, j int) bool { 284 return bytes.Compare(els[i].Key, els[j].Key) < 0 285 }) 286 287 // Remove duplicate keys 288 j := 1 289 for i := 1; i < len(els); i++ { 290 if bytes.Equal(els[j-1].Key, els[i].Key) { 291 if !bytes.Equal(els[j-1].Value, els[i].Value) { 292 conflictingLevelDBValuesCounter.Inc(ctx, 1) 293 } 294 duplicateLevelDBKeysCounter.Inc(ctx, 1) 295 } else { 296 els[j] = els[i] 297 j++ 298 } 299 } 300 els = els[:j] 301 302 // Encode keys for LevelDB 303 for i := 0; i < len(els); i++ { 304 md.Seq++ 305 els[i].Key = makeLevelDBKey(uint64(md.Seq), els[i].Key) 306 } 307 308 totalElements = len(els) 309 md.First = els[0].Key 310 md.Last = els[len(els)-1].Key 311 312 // Write each sorted key-value to an SSTable. 313 f, err := openWrite(ctx, schemePreservingPathJoin(w.Path, fmt.Sprintf("%06d.ldb", md.Shard))) 314 if err != nil { 315 return err 316 } 317 wr := table.NewWriter(f, opts) 318 for _, kv := range els { 319 if err := wr.Append(kv.Key, kv.Value); err != nil { 320 return err 321 } 322 } 323 if err := wr.Close(); err != nil { 324 return err 325 } else if err := f.Close(); err != nil { 326 return err 327 } 328 md.Size = wr.BytesLen() 329 330 emit(md) 331 return nil 332 } 333 334 const keySuffixSize = 8 335 336 // makeLevelDBKey constructs an internal LevelDB key from a user key. seq is 337 // the sequence number for the key-value entry within the LevelDB. 338 func makeLevelDBKey(seq uint64, key []byte) []byte { 339 const typ = 1 // value (vs. deletion) 340 k := make([]byte, len(key)+keySuffixSize) 341 copy(k, key) 342 binary.LittleEndian.PutUint64(k[len(key):], (seq<<keySuffixSize)|typ) 343 return k 344 } 345 346 // parseLevelDBKey returns the user key and the sequence number (and value type) 347 // from an internal LevelDB key. 348 func parseLevelDBKey(key []byte) (ukey []byte, seqNum uint64) { 349 return key[:len(key)-keySuffixSize], binary.LittleEndian.Uint64(key[len(key)-keySuffixSize:]) 350 } 351 352 // keyComparer compares internal (ukey, seqNum) LevelDB keys. 353 type keyComparer struct{} 354 355 // Name implements part of the comparer.Comparer interface. 356 func (keyComparer) Name() string { return "leveldb.BytewiseComparator" } 357 358 // Compare implements part of the comparer.Comparer interface. 359 func (keyComparer) Compare(a, b []byte) int { 360 ak, an := parseLevelDBKey(a) 361 bk, bn := parseLevelDBKey(b) 362 c := bytes.Compare(ak, bk) 363 if c == 0 { 364 return int(bn - an) 365 } 366 return c 367 } 368 369 // Separator implements part of the comparer.Comparer interface. 370 func (keyComparer) Separator(dst, a, b []byte) []byte { 371 ak, _ := parseLevelDBKey(a) 372 bk, _ := parseLevelDBKey(b) 373 dst = comparer.DefaultComparer.Separator(dst, ak, bk) 374 if dst != nil && len(dst) < len(ak) && bytes.Compare(ak, dst) < 0 { 375 return append(dst, maxKeyNumSuffix...) 376 } 377 return nil 378 } 379 380 // Successor implements part of the comparer.Comparer interface. 381 func (keyComparer) Successor(dst, k []byte) []byte { 382 k, _ = parseLevelDBKey(k) 383 dst = comparer.DefaultComparer.Successor(dst, k) 384 if dst != nil && len(dst) < len(k) && bytes.Compare(k, dst) < 0 { 385 return append(dst, maxKeyNumSuffix...) 386 } 387 return nil 388 } 389 390 // maxKeyNumSuffix is maximum possible sequence number (and value type) for an 391 // internal LevelDB key. 392 var maxKeyNumSuffix = bytes.Repeat([]byte{0xFF}, keySuffixSize)