github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/sstable/writer.go (about) 1 package sstable 2 3 import ( 4 "context" 5 "encoding/hex" 6 "fmt" 7 "hash" 8 "hash/fnv" 9 "strconv" 10 11 "github.com/cockroachdb/pebble/sstable" 12 "github.com/treeverse/lakefs/pkg/graveler" 13 "github.com/treeverse/lakefs/pkg/graveler/committed" 14 "github.com/treeverse/lakefs/pkg/ident" 15 "github.com/treeverse/lakefs/pkg/pyramid" 16 ) 17 18 const ( 19 MetadataFirstKey = "min_key" 20 MetadataLastKey = "max_key" 21 MetadataNumRecordsKey = "count" 22 MetadataEstimatedSizeKey = "estimated_size_bytes" 23 ) 24 25 type DiskWriter struct { 26 ctx context.Context 27 w *sstable.Writer 28 props map[string]string 29 tierFS pyramid.FS 30 first committed.Key 31 last committed.Key 32 count int 33 hash hash.Hash 34 fh pyramid.StoredFile 35 closed bool 36 } 37 38 func NewDiskWriter(ctx context.Context, tierFS pyramid.FS, ns committed.Namespace, hash hash.Hash, metadata graveler.Metadata) (*DiskWriter, error) { 39 fh, err := tierFS.Create(ctx, string(ns)) 40 if err != nil { 41 return nil, fmt.Errorf("opening file: %w", err) 42 } 43 44 props := make(map[string]string) 45 for k, v := range metadata { 46 props[k] = v 47 } 48 49 writer := sstable.NewWriter(fh, sstable.WriterOptions{ 50 Compression: sstable.SnappyCompression, 51 TablePropertyCollectors: []func() sstable.TablePropertyCollector{NewStaticCollector(props)}, 52 }) 53 54 return &DiskWriter{ 55 ctx: ctx, 56 w: writer, 57 props: props, 58 fh: fh, 59 tierFS: tierFS, 60 hash: hash, 61 }, nil 62 } 63 64 // SetMetadata associates metadata value (which will be stringified) with key. 65 // Keys and values are also calculated as part of the resulting range ID 66 func (dw *DiskWriter) SetMetadata(key, value string) { 67 dw.props[key] = value 68 } 69 70 func (dw *DiskWriter) GetFS() pyramid.FS { 71 return dw.tierFS 72 } 73 74 func (dw *DiskWriter) GetStoredFile() pyramid.StoredFile { 75 return dw.fh 76 } 77 78 func (dw *DiskWriter) WriteRecord(record committed.Record) error { 79 if err := dw.w.Set(record.Key, record.Value); err != nil { 80 return fmt.Errorf("setting key and value: %w", err) 81 } 82 83 // updating stats 84 if dw.count == 0 { 85 dw.first = make(committed.Key, len(record.Key)) 86 copy(dw.first, record.Key) 87 } 88 dw.last = make(committed.Key, len(record.Key)) 89 copy(dw.last, record.Key) 90 dw.count++ 91 92 if err := dw.writeHashWithLen(record.Key); err != nil { 93 return err 94 } 95 return dw.writeHashWithLen(record.Value) 96 } 97 98 func (dw *DiskWriter) GetApproximateSize() uint64 { 99 return dw.w.EstimatedSize() 100 } 101 102 func (dw *DiskWriter) writeHashWithLen(buf []byte) error { 103 if _, err := dw.hash.Write([]byte(strconv.Itoa(len(buf)))); err != nil { 104 return err 105 } 106 if _, err := dw.hash.Write(buf); err != nil { 107 return err 108 } 109 if _, err := dw.hash.Write([]byte("|")); err != nil { 110 return err 111 } 112 return nil 113 } 114 115 func (dw *DiskWriter) Abort() error { 116 if dw.closed { 117 return nil 118 } 119 120 if err := dw.w.Close(); err != nil { 121 return fmt.Errorf("sstable file close: %w", err) 122 } 123 124 if err := dw.fh.Abort(dw.ctx); err != nil { 125 return fmt.Errorf("sstable file abort: %w", err) 126 } 127 return nil 128 } 129 130 func (dw *DiskWriter) Close() (*committed.WriteResult, error) { 131 // Before closing, we write all user supplied metadata keys and values to the hash 132 // This is done to avoid collisions, especially on empty sstables that might hash to the same value otherwise. 133 ident.MarshalStringMap(dw.hash, dw.props) 134 135 tableHash := dw.hash.Sum(nil) 136 sstableID := hex.EncodeToString(tableHash) 137 138 // Prepare metadata properties for Close to write. The map was already set in the 139 // sstable.Writer constructor and cannot be changed, but we can replace its values 140 // before writing it out. 141 first := dw.first 142 last := dw.last 143 estimatedSize := dw.w.EstimatedSize() 144 count := dw.count 145 dw.SetMetadata(MetadataFirstKey, string(first)) 146 dw.SetMetadata(MetadataLastKey, string(last)) 147 dw.SetMetadata(MetadataNumRecordsKey, strconv.Itoa(count)) 148 dw.SetMetadata(MetadataEstimatedSizeKey, strconv.FormatUint(estimatedSize, 10)) 149 150 if err := dw.w.Close(); err != nil { 151 return nil, fmt.Errorf("sstable close (%s): %w", sstableID, err) 152 } 153 154 if err := dw.fh.Store(dw.ctx, sstableID); err != nil { 155 return nil, fmt.Errorf("sstable store (%s): %w", sstableID, err) 156 } 157 158 dw.closed = true 159 160 return &committed.WriteResult{ 161 RangeID: committed.ID(sstableID), 162 First: first, 163 Last: last, 164 Count: count, 165 EstimatedRangeSizeBytes: estimatedSize, 166 }, nil 167 } 168 169 // ShouldBreakAtKey returns true if it should break range after the given key 170 func (dw *DiskWriter) ShouldBreakAtKey(key graveler.Key, params *committed.Params) bool { 171 approximateSize := dw.GetApproximateSize() 172 if approximateSize < params.MinRangeSizeBytes { 173 return false 174 } 175 if approximateSize >= params.MaxRangeSizeBytes { 176 return true 177 } 178 179 h := fnv.New64a() 180 // FNV always reads all bytes and never fails; ignore its return values 181 _, _ = h.Write(key) 182 r := h.Sum64() % uint64(params.RangeSizeEntriesRaggedness) 183 return r == 0 184 }