kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/beamio/leveldb.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package beamio
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/binary"
    23  	"fmt"
    24  	"io"
    25  	"path/filepath"
    26  	"reflect"
    27  	"sort"
    28  	"strings"
    29  	"time"
    30  
    31  	"kythe.io/kythe/go/util/log"
    32  
    33  	"github.com/apache/beam/sdks/go/pkg/beam"
    34  	"github.com/apache/beam/sdks/go/pkg/beam/io/filesystem"
    35  	"github.com/apache/beam/sdks/go/pkg/beam/transforms/stats"
    36  	"github.com/syndtr/goleveldb/leveldb/comparer"
    37  	"github.com/syndtr/goleveldb/leveldb/journal"
    38  	"github.com/syndtr/goleveldb/leveldb/opt"
    39  	"github.com/syndtr/goleveldb/leveldb/table"
    40  )
    41  
    42  func init() {
    43  	beam.RegisterType(reflect.TypeOf((*writeManifest)(nil)).Elem())
    44  	beam.RegisterType(reflect.TypeOf((*writeTable)(nil)).Elem())
    45  	beam.RegisterFunction(keyByKey)
    46  	beam.RegisterFunction(distinctCombine)
    47  }
    48  
    49  // WriteLevelDB writes a set of PCollections containing KVs to a new LevelDB at
    50  // the given path.  Each KV is serialized and stored as a single LevelDB
    51  // key-value entry according to their enclosing PCollection's beam.Coder.  Each
    52  // table may have different KV types.  Keys must be unique across all
    53  // PCollections.
    54  func WriteLevelDB(s beam.Scope, path string, opts stats.Opts, tables ...beam.PCollection) {
    55  	filesystem.ValidateScheme(path)
    56  	s = s.Scope("WriteLevelDB")
    57  
    58  	tableMetadata := writeShards(s, path, opts, tables...)
    59  
    60  	// Write all SSTable metadata to the LevelDB's MANIFEST journal.
    61  	s = s.Scope("Manifest")
    62  	beam.ParDo(s, &writeManifest{Path: path}, beam.GroupByKey(s, beam.AddFixedKey(s, tableMetadata)))
    63  }
    64  
    65  func writeShards(s beam.Scope, path string, opts stats.Opts, tables ...beam.PCollection) beam.PCollection {
    66  	s = s.Scope("Shards")
    67  
    68  	encoded := EncodeKeyValues(s, tables...)
    69  
    70  	// Group each key-value by a shard number based on its key's byte encoding.
    71  	shards := beam.GroupByKey(s, ComputeShards(s, makeDistinct(s, encoded), opts))
    72  
    73  	// Write each shard to a separate SSTable.  The resulting PCollection contains
    74  	// each SSTable's metadata (*tableMetadata).
    75  	return beam.ParDo(s, &writeTable{path}, shards)
    76  }
    77  
    78  func keyByKey(kv KeyValue) ([]byte, KeyValue) {
    79  	return kv.Key, kv
    80  }
    81  
    82  func makeDistinct(s beam.Scope, kvs beam.PCollection) beam.PCollection {
    83  	return beam.DropKey(s, beam.CombinePerKey(s, distinctCombine, beam.ParDo(s, keyByKey, kvs)))
    84  }
    85  
    86  func distinctCombine(ctx context.Context, accum, other KeyValue) KeyValue {
    87  	if accum.Key == nil {
    88  		return other
    89  	}
    90  	duplicateLevelDBKeysCounter.Inc(ctx, 1)
    91  	if !bytes.Equal(accum.Value, other.Value) {
    92  		conflictingLevelDBValuesCounter.Inc(ctx, 1)
    93  	}
    94  	return accum
    95  }
    96  
    97  type writeManifest struct{ Path string }
    98  
    99  type fsFile struct {
   100  	io.WriteCloser
   101  	fs filesystem.Interface
   102  }
   103  
   104  // Close implements part of the io.WriteCloser interface.  It closes both the
   105  // file and underlying filesystem.
   106  func (f *fsFile) Close() error {
   107  	fErr := f.WriteCloser.Close()
   108  	fsErr := f.fs.Close()
   109  	if fErr != nil {
   110  		return fErr
   111  	}
   112  	return fsErr
   113  }
   114  
   115  func openWrite(ctx context.Context, path string) (io.WriteCloser, error) {
   116  	fs, err := filesystem.New(ctx, path)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  	f, err := fs.OpenWrite(ctx, path)
   121  	if err != nil {
   122  		return nil, err
   123  	}
   124  	return &fsFile{f, fs}, nil
   125  }
   126  
   127  // Constants used as IDs for LevelDB journal entries.
   128  const (
   129  	manifestCompararerNum     = 1
   130  	manifestCurrentJournalNum = 2
   131  	manifestNextFileNum       = 3
   132  	manifestLastCompactionNum = 4
   133  	manifestAddedTableNum     = 7
   134  )
   135  
   136  // ProcessElement combines all tableMetadata into LevelDB's journal format and
   137  // writes the database's CURRENT manifest file.  It returns the maximum shard
   138  // number processed.
   139  func (w *writeManifest) ProcessElement(ctx context.Context, _ beam.T, e func(*tableMetadata) bool) (int, error) {
   140  	const manifestName = "MANIFEST-000000"
   141  	defer func(start time.Time) { log.InfoContextf(ctx, "Manifest written in %s", time.Since(start)) }(time.Now())
   142  
   143  	// Write the CURRENT manifest to the 0'th LevelDB file.
   144  	f, err := openWrite(ctx, schemePreservingPathJoin(w.Path, manifestName))
   145  	if err != nil {
   146  		return 0, err
   147  	}
   148  
   149  	journals := journal.NewWriter(f)
   150  	j, err := journals.Next()
   151  	if err != nil {
   152  		return 0, err
   153  	}
   154  
   155  	// Comparer
   156  	putUvarint(j, manifestCompararerNum)
   157  	putBytes(j, []byte(keyComparer{}.Name()))
   158  
   159  	// Current journal
   160  	putUvarint(j, manifestCurrentJournalNum)
   161  	putUvarint(j, 0) // MANIFEST-000000
   162  
   163  	// Added table entry
   164  	var maxShard, maxSeq int
   165  	var md tableMetadata
   166  	for e(&md) {
   167  		putUvarint(j, manifestAddedTableNum)
   168  		putUvarint(j, 0) // all SSTables are level-0
   169  		putUvarint(j, uint64(md.Shard))
   170  		putUvarint(j, uint64(md.Size))
   171  		putBytes(j, md.First)
   172  		putBytes(j, md.Last)
   173  
   174  		// Keep track of the last shard num and maximum sequence number.
   175  		if md.Shard > maxShard {
   176  			maxShard = md.Shard
   177  		}
   178  		if md.Seq > maxSeq {
   179  			maxSeq = md.Seq
   180  		}
   181  	}
   182  
   183  	// Next available file entry
   184  	putUvarint(j, manifestNextFileNum)
   185  	putUvarint(j, uint64(maxShard+1))
   186  
   187  	// Last compaction sequence
   188  	putUvarint(j, manifestLastCompactionNum)
   189  	putUvarint(j, uint64(maxSeq))
   190  
   191  	if err := journals.Close(); err != nil {
   192  		return 0, err
   193  	} else if err := f.Close(); err != nil {
   194  		return 0, err
   195  	}
   196  
   197  	// Write the CURRENT pointer to the freshly written manifest file.
   198  	currentFile, err := openWrite(ctx, schemePreservingPathJoin(w.Path, "CURRENT"))
   199  	if err != nil {
   200  		return 0, err
   201  	} else if _, err := io.WriteString(currentFile, manifestName+"\n"); err != nil {
   202  		return 0, err
   203  	} else if err := currentFile.Close(); err != nil {
   204  		return 0, err
   205  	}
   206  
   207  	return maxShard, nil
   208  }
   209  
   210  // putUvarint writes x as a varint to w.
   211  func putUvarint(w io.Writer, x uint64) error {
   212  	buf := make([]byte, binary.MaxVarintLen64)
   213  	n := binary.PutUvarint(buf, x)
   214  	_, err := w.Write(buf[:n])
   215  	return err
   216  }
   217  
   218  // putBytes writes a varint-prefixed buffer to w.
   219  func putBytes(w io.Writer, b []byte) error {
   220  	if err := putUvarint(w, uint64(len(b))); err != nil {
   221  		return err
   222  	}
   223  	_, err := w.Write(b)
   224  	return err
   225  }
   226  
   227  type writeTable struct{ Path string }
   228  
   229  // tableMetadata represents a single SSTable within a LevelDB.  Each SSTable
   230  // written by the LevelDB sink is a level-0 table (meaning that its key ranges
   231  // can overlap with another SSTable's).
   232  type tableMetadata struct {
   233  	// Shard is the table's identifying number.
   234  	Shard int
   235  
   236  	// First/Last are the first and last keys in the table.
   237  	First, Last []byte
   238  
   239  	// Size is the byte size of the encoded table.
   240  	Size int
   241  
   242  	// Seq is the last used sequence number in the table.
   243  	Seq int
   244  }
   245  
   246  var (
   247  	duplicateLevelDBKeysCounter     = beam.NewCounter("kythe.beamio.leveldb", "duplicate-keys")
   248  	conflictingLevelDBValuesCounter = beam.NewCounter("kythe.beamio.leveldb", "conflicting-values")
   249  )
   250  
   251  const schemaSeparator = "://"
   252  
   253  // schemePreservingPathJoin is like filepath.Join, but doesn't collapse
   254  // the double-slash in the schema prefix, if any.
   255  func schemePreservingPathJoin(p, f string) string {
   256  	parts := strings.SplitN(p, schemaSeparator, 2)
   257  	if len(parts) == 2 {
   258  		return parts[0] + schemaSeparator + filepath.Join(parts[1], f)
   259  	}
   260  	return filepath.Join(p, f)
   261  }
   262  
   263  // ProcessElement writes a set of KeyValues to the an SSTable per shard.  Shards
   264  // should be small enough to fit into memory so that they can be sorted.
   265  // TODO(BEAM-4405): use SortValues extension to remove in-memory requirement
   266  func (w *writeTable) ProcessElement(ctx context.Context, shard int, kvIter func(*KeyValue) bool, emit func(tableMetadata)) error {
   267  	opts := &opt.Options{
   268  		BlockSize: 5 * opt.MiB,
   269  		Comparer:  keyComparer{},
   270  	}
   271  
   272  	var totalElements int
   273  	defer func(start time.Time) {
   274  		log.InfoContextf(ctx, "Shard %04d: %s (size: %d)", shard, time.Since(start), totalElements)
   275  	}(time.Now())
   276  	md := tableMetadata{Shard: shard + 1}
   277  
   278  	var els []KeyValue
   279  	var kv KeyValue
   280  	for kvIter(&kv) {
   281  		els = append(els, kv)
   282  	}
   283  	sort.Slice(els, func(i, j int) bool {
   284  		return bytes.Compare(els[i].Key, els[j].Key) < 0
   285  	})
   286  
   287  	// Remove duplicate keys
   288  	j := 1
   289  	for i := 1; i < len(els); i++ {
   290  		if bytes.Equal(els[j-1].Key, els[i].Key) {
   291  			if !bytes.Equal(els[j-1].Value, els[i].Value) {
   292  				conflictingLevelDBValuesCounter.Inc(ctx, 1)
   293  			}
   294  			duplicateLevelDBKeysCounter.Inc(ctx, 1)
   295  		} else {
   296  			els[j] = els[i]
   297  			j++
   298  		}
   299  	}
   300  	els = els[:j]
   301  
   302  	// Encode keys for LevelDB
   303  	for i := 0; i < len(els); i++ {
   304  		md.Seq++
   305  		els[i].Key = makeLevelDBKey(uint64(md.Seq), els[i].Key)
   306  	}
   307  
   308  	totalElements = len(els)
   309  	md.First = els[0].Key
   310  	md.Last = els[len(els)-1].Key
   311  
   312  	// Write each sorted key-value to an SSTable.
   313  	f, err := openWrite(ctx, schemePreservingPathJoin(w.Path, fmt.Sprintf("%06d.ldb", md.Shard)))
   314  	if err != nil {
   315  		return err
   316  	}
   317  	wr := table.NewWriter(f, opts)
   318  	for _, kv := range els {
   319  		if err := wr.Append(kv.Key, kv.Value); err != nil {
   320  			return err
   321  		}
   322  	}
   323  	if err := wr.Close(); err != nil {
   324  		return err
   325  	} else if err := f.Close(); err != nil {
   326  		return err
   327  	}
   328  	md.Size = wr.BytesLen()
   329  
   330  	emit(md)
   331  	return nil
   332  }
   333  
   334  const keySuffixSize = 8
   335  
   336  // makeLevelDBKey constructs an internal LevelDB key from a user key.  seq is
   337  // the sequence number for the key-value entry within the LevelDB.
   338  func makeLevelDBKey(seq uint64, key []byte) []byte {
   339  	const typ = 1 // value (vs. deletion)
   340  	k := make([]byte, len(key)+keySuffixSize)
   341  	copy(k, key)
   342  	binary.LittleEndian.PutUint64(k[len(key):], (seq<<keySuffixSize)|typ)
   343  	return k
   344  }
   345  
   346  // parseLevelDBKey returns the user key and the sequence number (and value type)
   347  // from an internal LevelDB key.
   348  func parseLevelDBKey(key []byte) (ukey []byte, seqNum uint64) {
   349  	return key[:len(key)-keySuffixSize], binary.LittleEndian.Uint64(key[len(key)-keySuffixSize:])
   350  }
   351  
   352  // keyComparer compares internal (ukey, seqNum) LevelDB keys.
   353  type keyComparer struct{}
   354  
   355  // Name implements part of the comparer.Comparer interface.
   356  func (keyComparer) Name() string { return "leveldb.BytewiseComparator" }
   357  
   358  // Compare implements part of the comparer.Comparer interface.
   359  func (keyComparer) Compare(a, b []byte) int {
   360  	ak, an := parseLevelDBKey(a)
   361  	bk, bn := parseLevelDBKey(b)
   362  	c := bytes.Compare(ak, bk)
   363  	if c == 0 {
   364  		return int(bn - an)
   365  	}
   366  	return c
   367  }
   368  
   369  // Separator implements part of the comparer.Comparer interface.
   370  func (keyComparer) Separator(dst, a, b []byte) []byte {
   371  	ak, _ := parseLevelDBKey(a)
   372  	bk, _ := parseLevelDBKey(b)
   373  	dst = comparer.DefaultComparer.Separator(dst, ak, bk)
   374  	if dst != nil && len(dst) < len(ak) && bytes.Compare(ak, dst) < 0 {
   375  		return append(dst, maxKeyNumSuffix...)
   376  	}
   377  	return nil
   378  }
   379  
   380  // Successor implements part of the comparer.Comparer interface.
   381  func (keyComparer) Successor(dst, k []byte) []byte {
   382  	k, _ = parseLevelDBKey(k)
   383  	dst = comparer.DefaultComparer.Successor(dst, k)
   384  	if dst != nil && len(dst) < len(k) && bytes.Compare(k, dst) < 0 {
   385  		return append(dst, maxKeyNumSuffix...)
   386  	}
   387  	return nil
   388  }
   389  
   390  // maxKeyNumSuffix is maximum possible sequence number (and value type) for an
   391  // internal LevelDB key.
   392  var maxKeyNumSuffix = bytes.Repeat([]byte{0xFF}, keySuffixSize)