github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/sstable/properties.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "fmt" 11 "math" 12 "reflect" 13 "sort" 14 "unsafe" 15 16 "github.com/zuoyebang/bitalostable/internal/intern" 17 ) 18 19 const propertiesBlockRestartInterval = math.MaxInt32 20 const propGlobalSeqnumName = "rocksdb.external_sst_file.global_seqno" 21 22 var propTagMap = make(map[string]reflect.StructField) 23 var propBoolTrue = []byte{'1'} 24 var propBoolFalse = []byte{'0'} 25 26 var columnFamilyIDField = func() reflect.StructField { 27 f, ok := reflect.TypeOf(Properties{}).FieldByName("ColumnFamilyID") 28 if !ok { 29 panic("Properties.ColumnFamilyID field not found") 30 } 31 return f 32 }() 33 34 var propOffsetTagMap = make(map[uintptr]string) 35 36 func init() { 37 t := reflect.TypeOf(Properties{}) 38 for i := 0; i < t.NumField(); i++ { 39 f := t.Field(i) 40 if tag := f.Tag.Get("prop"); tag != "" { 41 switch f.Type.Kind() { 42 case reflect.Bool: 43 case reflect.Uint32: 44 case reflect.Uint64: 45 case reflect.String: 46 default: 47 panic(fmt.Sprintf("unsupported property field type: %s %s", f.Name, f.Type)) 48 } 49 propTagMap[tag] = f 50 propOffsetTagMap[f.Offset] = tag 51 } 52 } 53 } 54 55 // Properties holds the sstable property values. The properties are 56 // automatically populated during sstable creation and load from the properties 57 // meta block when an sstable is opened. 58 type Properties struct { 59 // ID of column family for this SST file, corresponding to the CF identified 60 // by column_family_name. 61 ColumnFamilyID uint64 `prop:"rocksdb.column.family.id"` 62 // Name of the column family with which this SST file is associated. Empty if 63 // the column family is unknown. 64 ColumnFamilyName string `prop:"rocksdb.column.family.name"` 65 // The name of the comparer used in this table. 66 ComparerName string `prop:"rocksdb.comparator"` 67 // The compression algorithm used to compress blocks. 68 CompressionName string `prop:"rocksdb.compression"` 69 // The compression options used to compress blocks. 70 CompressionOptions string `prop:"rocksdb.compression_options"` 71 // The time when the SST file was created. Since SST files are immutable, 72 // this is equivalent to last modified time. 73 CreationTime uint64 `prop:"rocksdb.creation.time"` 74 // The total size of all data blocks. 75 DataSize uint64 `prop:"rocksdb.data.size"` 76 // The external sstable version format. Version 2 is the one RocksDB has been 77 // using since 5.13. RocksDB only uses the global sequence number for an 78 // sstable if this property has been set. 79 ExternalFormatVersion uint32 `prop:"rocksdb.external_sst_file.version"` 80 // Actual SST file creation time. 0 means unknown. 81 FileCreationTime uint64 `prop:"rocksdb.file.creation.time"` 82 // The name of the filter policy used in this table. Empty if no filter 83 // policy is used. 84 FilterPolicyName string `prop:"rocksdb.filter.policy"` 85 // The size of filter block. 86 FilterSize uint64 `prop:"rocksdb.filter.size"` 87 // If 0, key is variable length. Otherwise number of bytes for each key. 88 FixedKeyLen uint64 `prop:"rocksdb.fixed.key.length"` 89 // Format version, reserved for backward compatibility. 90 FormatVersion uint64 `prop:"rocksdb.format.version"` 91 // The global sequence number to use for all entries in the table. Present if 92 // the table was created externally and ingested whole. 93 GlobalSeqNum uint64 `prop:"rocksdb.external_sst_file.global_seqno"` 94 // Whether the index key is user key or an internal key. 95 IndexKeyIsUserKey uint64 `prop:"rocksdb.index.key.is.user.key"` 96 // Total number of index partitions if kTwoLevelIndexSearch is used. 97 IndexPartitions uint64 `prop:"rocksdb.index.partitions"` 98 // The size of index block. 99 IndexSize uint64 `prop:"rocksdb.index.size"` 100 // The index type. TODO(peter): add a more detailed description. 101 IndexType uint32 `prop:"rocksdb.block.based.table.index.type"` 102 // Whether delta encoding is used to encode the index values. 103 IndexValueIsDeltaEncoded uint64 `prop:"rocksdb.index.value.is.delta.encoded"` 104 // The name of the merger used in this table. Empty if no merger is used. 105 MergerName string `prop:"rocksdb.merge.operator"` 106 // The number of blocks in this table. 107 NumDataBlocks uint64 `prop:"rocksdb.num.data.blocks"` 108 // The number of deletion entries in this table, including both point and 109 // range deletions. 110 NumDeletions uint64 `prop:"rocksdb.deleted.keys"` 111 // The number of entries in this table. 112 NumEntries uint64 `prop:"rocksdb.num.entries"` 113 // The number of merge operands in the table. 114 NumMergeOperands uint64 `prop:"rocksdb.merge.operands"` 115 // The number of range deletions in this table. 116 NumRangeDeletions uint64 `prop:"rocksdb.num.range-deletions"` 117 // The number of RANGEKEYDELs in this table. 118 NumRangeKeyDels uint64 `prop:"bitalostable.num.range-key-dels"` 119 // The number of RANGEKEYSETs in this table. 120 NumRangeKeySets uint64 `prop:"bitalostable.num.range-key-sets"` 121 // The number of RANGEKEYUNSETs in this table. 122 NumRangeKeyUnsets uint64 `prop:"bitalostable.num.range-key-unsets"` 123 // Timestamp of the earliest key. 0 if unknown. 124 OldestKeyTime uint64 `prop:"rocksdb.oldest.key.time"` 125 // The name of the prefix extractor used in this table. Empty if no prefix 126 // extractor is used. 127 PrefixExtractorName string `prop:"rocksdb.prefix.extractor.name"` 128 // If filtering is enabled, was the filter created on the key prefix. 129 PrefixFiltering bool `prop:"rocksdb.block.based.table.prefix.filtering"` 130 // A comma separated list of names of the property collectors used in this 131 // table. 132 PropertyCollectorNames string `prop:"rocksdb.property.collectors"` 133 // Total raw key size. 134 RawKeySize uint64 `prop:"rocksdb.raw.key.size"` 135 // Total raw rangekey key size. 136 RawRangeKeyKeySize uint64 `prop:"bitalostable.raw.range-key.key.size"` 137 // Total raw rangekey value size. 138 RawRangeKeyValueSize uint64 `prop:"bitalostable.raw.range-key.value.size"` 139 // Total raw value size. 140 RawValueSize uint64 `prop:"rocksdb.raw.value.size"` 141 // Size of the top-level index if kTwoLevelIndexSearch is used. 142 TopLevelIndexSize uint64 `prop:"rocksdb.top-level.index.size"` 143 // User collected properties. 144 UserProperties map[string]string 145 // If filtering is enabled, was the filter created on the whole key. 146 WholeKeyFiltering bool `prop:"rocksdb.block.based.table.whole.key.filtering"` 147 148 // Loaded set indicating which fields have been loaded from disk. Indexed by 149 // the field's byte offset within the struct 150 // (reflect.StructField.Offset). Only set if the properties have been loaded 151 // from a file. Only exported for testing purposes. 152 Loaded map[uintptr]struct{} 153 } 154 155 // NumPointDeletions returns the number of point deletions in this table. 156 func (p *Properties) NumPointDeletions() uint64 { 157 return p.NumDeletions - p.NumRangeDeletions 158 } 159 160 // NumRangeKeys returns a count of the number of range keys in this table. 161 func (p *Properties) NumRangeKeys() uint64 { 162 return p.NumRangeKeyDels + p.NumRangeKeySets + p.NumRangeKeyUnsets 163 } 164 165 func (p *Properties) String() string { 166 var buf bytes.Buffer 167 v := reflect.ValueOf(*p) 168 vt := v.Type() 169 for i := 0; i < v.NumField(); i++ { 170 ft := vt.Field(i) 171 tag := ft.Tag.Get("prop") 172 if tag == "" { 173 continue 174 } 175 176 f := v.Field(i) 177 // TODO(peter): Use f.IsZero() when we can rely on go1.13. 178 if zero := reflect.Zero(f.Type()); zero.Interface() == f.Interface() { 179 // Skip printing of zero values which were not loaded from disk. 180 if _, ok := p.Loaded[ft.Offset]; !ok { 181 continue 182 } 183 } 184 185 fmt.Fprintf(&buf, "%s: ", tag) 186 switch ft.Type.Kind() { 187 case reflect.Bool: 188 fmt.Fprintf(&buf, "%t\n", f.Bool()) 189 case reflect.Uint32: 190 fmt.Fprintf(&buf, "%d\n", f.Uint()) 191 case reflect.Uint64: 192 u := f.Uint() 193 if ft.Offset == columnFamilyIDField.Offset && u == math.MaxInt32 { 194 fmt.Fprintf(&buf, "-\n") 195 } else { 196 fmt.Fprintf(&buf, "%d\n", f.Uint()) 197 } 198 case reflect.String: 199 fmt.Fprintf(&buf, "%s\n", f.String()) 200 default: 201 panic("not reached") 202 } 203 } 204 keys := make([]string, 0, len(p.UserProperties)) 205 for key := range p.UserProperties { 206 keys = append(keys, key) 207 } 208 sort.Strings(keys) 209 for _, key := range keys { 210 fmt.Fprintf(&buf, "%s: %s\n", key, p.UserProperties[key]) 211 } 212 return buf.String() 213 } 214 215 func (p *Properties) load(b block, blockOffset uint64) error { 216 i, err := newRawBlockIter(bytes.Compare, b) 217 if err != nil { 218 return err 219 } 220 p.Loaded = make(map[uintptr]struct{}) 221 v := reflect.ValueOf(p).Elem() 222 for valid := i.First(); valid; valid = i.Next() { 223 tag := intern.Bytes(i.Key().UserKey) 224 if f, ok := propTagMap[tag]; ok { 225 p.Loaded[f.Offset] = struct{}{} 226 field := v.FieldByIndex(f.Index) 227 switch f.Type.Kind() { 228 case reflect.Bool: 229 field.SetBool(bytes.Equal(i.Value(), propBoolTrue)) 230 case reflect.Uint32: 231 field.SetUint(uint64(binary.LittleEndian.Uint32(i.Value()))) 232 case reflect.Uint64: 233 var n uint64 234 if tag == propGlobalSeqnumName { 235 n = binary.LittleEndian.Uint64(i.Value()) 236 } else { 237 n, _ = binary.Uvarint(i.Value()) 238 } 239 field.SetUint(n) 240 case reflect.String: 241 field.SetString(intern.Bytes(i.Value())) 242 default: 243 panic("not reached") 244 } 245 continue 246 } 247 if p.UserProperties == nil { 248 p.UserProperties = make(map[string]string) 249 } 250 p.UserProperties[tag] = string(i.Value()) 251 } 252 return nil 253 } 254 255 func (p *Properties) saveBool(m map[string][]byte, offset uintptr, value bool) { 256 tag := propOffsetTagMap[offset] 257 if value { 258 m[tag] = propBoolTrue 259 } else { 260 m[tag] = propBoolFalse 261 } 262 } 263 264 func (p *Properties) saveUint32(m map[string][]byte, offset uintptr, value uint32) { 265 var buf [4]byte 266 binary.LittleEndian.PutUint32(buf[:], value) 267 m[propOffsetTagMap[offset]] = buf[:] 268 } 269 270 func (p *Properties) saveUint64(m map[string][]byte, offset uintptr, value uint64) { 271 var buf [8]byte 272 binary.LittleEndian.PutUint64(buf[:], value) 273 m[propOffsetTagMap[offset]] = buf[:] 274 } 275 276 func (p *Properties) saveUvarint(m map[string][]byte, offset uintptr, value uint64) { 277 var buf [10]byte 278 n := binary.PutUvarint(buf[:], value) 279 m[propOffsetTagMap[offset]] = buf[:n] 280 } 281 282 func (p *Properties) saveString(m map[string][]byte, offset uintptr, value string) { 283 m[propOffsetTagMap[offset]] = []byte(value) 284 } 285 286 func (p *Properties) save(w *rawBlockWriter) { 287 m := make(map[string][]byte) 288 for k, v := range p.UserProperties { 289 m[k] = []byte(v) 290 } 291 292 p.saveUvarint(m, unsafe.Offsetof(p.ColumnFamilyID), p.ColumnFamilyID) 293 if p.ColumnFamilyName != "" { 294 p.saveString(m, unsafe.Offsetof(p.ColumnFamilyName), p.ColumnFamilyName) 295 } 296 if p.ComparerName != "" { 297 p.saveString(m, unsafe.Offsetof(p.ComparerName), p.ComparerName) 298 } 299 if p.CompressionName != "" { 300 p.saveString(m, unsafe.Offsetof(p.CompressionName), p.CompressionName) 301 } 302 if p.CompressionOptions != "" { 303 p.saveString(m, unsafe.Offsetof(p.CompressionOptions), p.CompressionOptions) 304 } 305 p.saveUvarint(m, unsafe.Offsetof(p.CreationTime), p.CreationTime) 306 p.saveUvarint(m, unsafe.Offsetof(p.DataSize), p.DataSize) 307 if p.ExternalFormatVersion != 0 { 308 p.saveUint32(m, unsafe.Offsetof(p.ExternalFormatVersion), p.ExternalFormatVersion) 309 p.saveUint64(m, unsafe.Offsetof(p.GlobalSeqNum), p.GlobalSeqNum) 310 } 311 if p.FileCreationTime > 0 { 312 p.saveUvarint(m, unsafe.Offsetof(p.FileCreationTime), p.FileCreationTime) 313 } 314 if p.FilterPolicyName != "" { 315 p.saveString(m, unsafe.Offsetof(p.FilterPolicyName), p.FilterPolicyName) 316 } 317 p.saveUvarint(m, unsafe.Offsetof(p.FilterSize), p.FilterSize) 318 p.saveUvarint(m, unsafe.Offsetof(p.FixedKeyLen), p.FixedKeyLen) 319 p.saveUvarint(m, unsafe.Offsetof(p.FormatVersion), p.FormatVersion) 320 p.saveUvarint(m, unsafe.Offsetof(p.IndexKeyIsUserKey), p.IndexKeyIsUserKey) 321 if p.IndexPartitions != 0 { 322 p.saveUvarint(m, unsafe.Offsetof(p.IndexPartitions), p.IndexPartitions) 323 p.saveUvarint(m, unsafe.Offsetof(p.TopLevelIndexSize), p.TopLevelIndexSize) 324 } 325 p.saveUvarint(m, unsafe.Offsetof(p.IndexSize), p.IndexSize) 326 p.saveUint32(m, unsafe.Offsetof(p.IndexType), p.IndexType) 327 p.saveUvarint(m, unsafe.Offsetof(p.IndexValueIsDeltaEncoded), p.IndexValueIsDeltaEncoded) 328 if p.MergerName != "" { 329 p.saveString(m, unsafe.Offsetof(p.MergerName), p.MergerName) 330 } 331 p.saveUvarint(m, unsafe.Offsetof(p.NumDataBlocks), p.NumDataBlocks) 332 p.saveUvarint(m, unsafe.Offsetof(p.NumEntries), p.NumEntries) 333 p.saveUvarint(m, unsafe.Offsetof(p.NumDeletions), p.NumDeletions) 334 p.saveUvarint(m, unsafe.Offsetof(p.NumMergeOperands), p.NumMergeOperands) 335 p.saveUvarint(m, unsafe.Offsetof(p.NumRangeDeletions), p.NumRangeDeletions) 336 if p.NumRangeKeys() > 0 { 337 p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeyDels), p.NumRangeKeyDels) 338 p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeySets), p.NumRangeKeySets) 339 p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeyUnsets), p.NumRangeKeyUnsets) 340 p.saveUvarint(m, unsafe.Offsetof(p.RawRangeKeyKeySize), p.RawRangeKeyKeySize) 341 p.saveUvarint(m, unsafe.Offsetof(p.RawRangeKeyValueSize), p.RawRangeKeyValueSize) 342 } 343 p.saveUvarint(m, unsafe.Offsetof(p.OldestKeyTime), p.OldestKeyTime) 344 if p.PrefixExtractorName != "" { 345 p.saveString(m, unsafe.Offsetof(p.PrefixExtractorName), p.PrefixExtractorName) 346 } 347 p.saveBool(m, unsafe.Offsetof(p.PrefixFiltering), p.PrefixFiltering) 348 if p.PropertyCollectorNames != "" { 349 p.saveString(m, unsafe.Offsetof(p.PropertyCollectorNames), p.PropertyCollectorNames) 350 } 351 p.saveUvarint(m, unsafe.Offsetof(p.RawKeySize), p.RawKeySize) 352 p.saveUvarint(m, unsafe.Offsetof(p.RawValueSize), p.RawValueSize) 353 p.saveBool(m, unsafe.Offsetof(p.WholeKeyFiltering), p.WholeKeyFiltering) 354 355 keys := make([]string, 0, len(m)) 356 for key := range m { 357 keys = append(keys, key) 358 } 359 sort.Strings(keys) 360 for _, key := range keys { 361 w.add(InternalKey{UserKey: []byte(key)}, m[key]) 362 } 363 }