github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/sstable/writer.go (about)

     1  package sstable
     2  
     3  import (
     4  	"context"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"hash"
     8  	"hash/fnv"
     9  	"strconv"
    10  
    11  	"github.com/cockroachdb/pebble/sstable"
    12  	"github.com/treeverse/lakefs/pkg/graveler"
    13  	"github.com/treeverse/lakefs/pkg/graveler/committed"
    14  	"github.com/treeverse/lakefs/pkg/ident"
    15  	"github.com/treeverse/lakefs/pkg/pyramid"
    16  )
    17  
    18  const (
    19  	MetadataFirstKey         = "min_key"
    20  	MetadataLastKey          = "max_key"
    21  	MetadataNumRecordsKey    = "count"
    22  	MetadataEstimatedSizeKey = "estimated_size_bytes"
    23  )
    24  
    25  type DiskWriter struct {
    26  	ctx    context.Context
    27  	w      *sstable.Writer
    28  	props  map[string]string
    29  	tierFS pyramid.FS
    30  	first  committed.Key
    31  	last   committed.Key
    32  	count  int
    33  	hash   hash.Hash
    34  	fh     pyramid.StoredFile
    35  	closed bool
    36  }
    37  
    38  func NewDiskWriter(ctx context.Context, tierFS pyramid.FS, ns committed.Namespace, hash hash.Hash, metadata graveler.Metadata) (*DiskWriter, error) {
    39  	fh, err := tierFS.Create(ctx, string(ns))
    40  	if err != nil {
    41  		return nil, fmt.Errorf("opening file: %w", err)
    42  	}
    43  
    44  	props := make(map[string]string)
    45  	for k, v := range metadata {
    46  		props[k] = v
    47  	}
    48  
    49  	writer := sstable.NewWriter(fh, sstable.WriterOptions{
    50  		Compression:             sstable.SnappyCompression,
    51  		TablePropertyCollectors: []func() sstable.TablePropertyCollector{NewStaticCollector(props)},
    52  	})
    53  
    54  	return &DiskWriter{
    55  		ctx:    ctx,
    56  		w:      writer,
    57  		props:  props,
    58  		fh:     fh,
    59  		tierFS: tierFS,
    60  		hash:   hash,
    61  	}, nil
    62  }
    63  
    64  // SetMetadata associates metadata value (which will be stringified) with key.
    65  // Keys and values are also calculated as part of the resulting range ID
    66  func (dw *DiskWriter) SetMetadata(key, value string) {
    67  	dw.props[key] = value
    68  }
    69  
    70  func (dw *DiskWriter) GetFS() pyramid.FS {
    71  	return dw.tierFS
    72  }
    73  
    74  func (dw *DiskWriter) GetStoredFile() pyramid.StoredFile {
    75  	return dw.fh
    76  }
    77  
    78  func (dw *DiskWriter) WriteRecord(record committed.Record) error {
    79  	if err := dw.w.Set(record.Key, record.Value); err != nil {
    80  		return fmt.Errorf("setting key and value: %w", err)
    81  	}
    82  
    83  	// updating stats
    84  	if dw.count == 0 {
    85  		dw.first = make(committed.Key, len(record.Key))
    86  		copy(dw.first, record.Key)
    87  	}
    88  	dw.last = make(committed.Key, len(record.Key))
    89  	copy(dw.last, record.Key)
    90  	dw.count++
    91  
    92  	if err := dw.writeHashWithLen(record.Key); err != nil {
    93  		return err
    94  	}
    95  	return dw.writeHashWithLen(record.Value)
    96  }
    97  
    98  func (dw *DiskWriter) GetApproximateSize() uint64 {
    99  	return dw.w.EstimatedSize()
   100  }
   101  
   102  func (dw *DiskWriter) writeHashWithLen(buf []byte) error {
   103  	if _, err := dw.hash.Write([]byte(strconv.Itoa(len(buf)))); err != nil {
   104  		return err
   105  	}
   106  	if _, err := dw.hash.Write(buf); err != nil {
   107  		return err
   108  	}
   109  	if _, err := dw.hash.Write([]byte("|")); err != nil {
   110  		return err
   111  	}
   112  	return nil
   113  }
   114  
   115  func (dw *DiskWriter) Abort() error {
   116  	if dw.closed {
   117  		return nil
   118  	}
   119  
   120  	if err := dw.w.Close(); err != nil {
   121  		return fmt.Errorf("sstable file close: %w", err)
   122  	}
   123  
   124  	if err := dw.fh.Abort(dw.ctx); err != nil {
   125  		return fmt.Errorf("sstable file abort: %w", err)
   126  	}
   127  	return nil
   128  }
   129  
   130  func (dw *DiskWriter) Close() (*committed.WriteResult, error) {
   131  	// Before closing, we write all user supplied metadata keys and values to the hash
   132  	// This is done to avoid collisions, especially on empty sstables that might hash to the same value otherwise.
   133  	ident.MarshalStringMap(dw.hash, dw.props)
   134  
   135  	tableHash := dw.hash.Sum(nil)
   136  	sstableID := hex.EncodeToString(tableHash)
   137  
   138  	// Prepare metadata properties for Close to write.  The map was already set in the
   139  	// sstable.Writer constructor and cannot be changed, but we can replace its values
   140  	// before writing it out.
   141  	first := dw.first
   142  	last := dw.last
   143  	estimatedSize := dw.w.EstimatedSize()
   144  	count := dw.count
   145  	dw.SetMetadata(MetadataFirstKey, string(first))
   146  	dw.SetMetadata(MetadataLastKey, string(last))
   147  	dw.SetMetadata(MetadataNumRecordsKey, strconv.Itoa(count))
   148  	dw.SetMetadata(MetadataEstimatedSizeKey, strconv.FormatUint(estimatedSize, 10))
   149  
   150  	if err := dw.w.Close(); err != nil {
   151  		return nil, fmt.Errorf("sstable close (%s): %w", sstableID, err)
   152  	}
   153  
   154  	if err := dw.fh.Store(dw.ctx, sstableID); err != nil {
   155  		return nil, fmt.Errorf("sstable store (%s): %w", sstableID, err)
   156  	}
   157  
   158  	dw.closed = true
   159  
   160  	return &committed.WriteResult{
   161  		RangeID:                 committed.ID(sstableID),
   162  		First:                   first,
   163  		Last:                    last,
   164  		Count:                   count,
   165  		EstimatedRangeSizeBytes: estimatedSize,
   166  	}, nil
   167  }
   168  
   169  // ShouldBreakAtKey returns true if it should break range after the given key
   170  func (dw *DiskWriter) ShouldBreakAtKey(key graveler.Key, params *committed.Params) bool {
   171  	approximateSize := dw.GetApproximateSize()
   172  	if approximateSize < params.MinRangeSizeBytes {
   173  		return false
   174  	}
   175  	if approximateSize >= params.MaxRangeSizeBytes {
   176  		return true
   177  	}
   178  
   179  	h := fnv.New64a()
   180  	// FNV always reads all bytes and never fails; ignore its return values
   181  	_, _ = h.Write(key)
   182  	r := h.Sum64() % uint64(params.RangeSizeEntriesRaggedness)
   183  	return r == 0
   184  }