github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/sstable/properties.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "fmt" 11 "math" 12 "reflect" 13 "sort" 14 "unsafe" 15 ) 16 17 const propertiesBlockRestartInterval = math.MaxInt32 18 19 var propTagMap = make(map[string]reflect.StructField) 20 21 var columnFamilyIDField = func() reflect.StructField { 22 f, ok := reflect.TypeOf(Properties{}).FieldByName("ColumnFamilyID") 23 if !ok { 24 panic("Properties.ColumnFamilyID field not found") 25 } 26 return f 27 }() 28 29 var propOffsetTagMap = make(map[uintptr]string) 30 31 func init() { 32 t := reflect.TypeOf(Properties{}) 33 for i := 0; i < t.NumField(); i++ { 34 f := t.Field(i) 35 if tag := f.Tag.Get("prop"); tag != "" { 36 switch f.Type.Kind() { 37 case reflect.Bool: 38 case reflect.Uint32: 39 case reflect.Uint64: 40 case reflect.String: 41 default: 42 panic(fmt.Sprintf("unsupported property field type: %s %s", f.Name, f.Type)) 43 } 44 propTagMap[tag] = f 45 propOffsetTagMap[f.Offset] = tag 46 } 47 } 48 } 49 50 // Properties holds the sstable property values. The properties are 51 // automatically populated during sstable creation and load from the properties 52 // meta block when an sstable is opened. 53 type Properties struct { 54 // ID of column family for this SST file, corresponding to the CF identified 55 // by column_family_name. 56 ColumnFamilyID uint64 `prop:"rocksdb.column.family.id"` 57 // Name of the column family with which this SST file is associated. Empty if 58 // the column family is unknown. 59 ColumnFamilyName string `prop:"rocksdb.column.family.name"` 60 // The name of the comparer used in this table. 61 ComparerName string `prop:"rocksdb.comparator"` 62 // The compression algorithm used to compress blocks. 63 CompressionName string `prop:"rocksdb.compression"` 64 // The compression options used to compress blocks. 65 CompressionOptions string `prop:"rocksdb.compression_options"` 66 // The time when the SST file was created. Since SST files are immutable, 67 // this is equivalent to last modified time. 68 CreationTime uint64 `prop:"rocksdb.creation.time"` 69 // The total size of all data blocks. 70 DataSize uint64 `prop:"rocksdb.data.size"` 71 // Actual SST file creation time. 0 means unknown. 72 FileCreationTime uint64 `prop:"rocksdb.file.creation.time"` 73 // The name of the filter policy used in this table. Empty if no filter 74 // policy is used. 75 FilterPolicyName string `prop:"rocksdb.filter.policy"` 76 // The size of filter block. 77 FilterSize uint64 `prop:"rocksdb.filter.size"` 78 // If 0, key is variable length. Otherwise number of bytes for each key. 79 FixedKeyLen uint64 `prop:"rocksdb.fixed.key.length"` 80 // format version, reserved for backward compatibility. 81 FormatVersion uint64 `prop:"rocksdb.format.version"` 82 // The global sequence number to use for all entries in the table. Present if 83 // the table was created externally and ingested whole. 84 GlobalSeqNum uint64 `prop:"rocksdb.external_sst_file.global_seqno"` 85 // Whether the index key is user key or an internal key. 86 IndexKeyIsUserKey uint64 `prop:"rocksdb.index.key.is.user.key"` 87 // Total number of index partitions if kTwoLevelIndexSearch is used. 88 IndexPartitions uint64 `prop:"rocksdb.index.partitions"` 89 // The size of index block. 90 IndexSize uint64 `prop:"rocksdb.index.size"` 91 // The index type. TODO(peter): add a more detailed description. 92 IndexType uint32 `prop:"rocksdb.block.based.table.index.type"` 93 // Whether delta encoding is used to encode the index values. 94 IndexValueIsDeltaEncoded uint64 `prop:"rocksdb.index.value.is.delta.encoded"` 95 // The name of the merger used in this table. Empty if no merger is used. 96 MergerName string `prop:"rocksdb.merge.operator"` 97 // The number of blocks in this table. 98 NumDataBlocks uint64 `prop:"rocksdb.num.data.blocks"` 99 // The number of deletion entries in this table. 100 NumDeletions uint64 `prop:"rocksdb.deleted.keys"` 101 // The number of entries in this table. 102 NumEntries uint64 `prop:"rocksdb.num.entries"` 103 // The number of merge operands in the table. 104 NumMergeOperands uint64 `prop:"rocksdb.merge.operands"` 105 // The number of range deletions in this table. 106 NumRangeDeletions uint64 `prop:"rocksdb.num.range-deletions"` 107 // Timestamp of the earliest key. 0 if unknown. 108 OldestKeyTime uint64 `prop:"rocksdb.oldest.key.time"` 109 // The name of the prefix extractor used in this table. Empty if no prefix 110 // extractor is used. 111 PrefixExtractorName string `prop:"rocksdb.prefix.extractor.name"` 112 // If filtering is enabled, was the filter created on the key prefix. 113 PrefixFiltering bool `prop:"rocksdb.block.based.table.prefix.filtering"` 114 // A comma separated list of names of the property collectors used in this 115 // table. 116 PropertyCollectorNames string `prop:"rocksdb.property.collectors"` 117 // Total raw key size. 118 RawKeySize uint64 `prop:"rocksdb.raw.key.size"` 119 // Total raw value size. 120 RawValueSize uint64 `prop:"rocksdb.raw.value.size"` 121 // Size of the top-level index if kTwoLevelIndexSearch is used. 122 TopLevelIndexSize uint64 `prop:"rocksdb.top-level.index.size"` 123 // User collected properties. 124 UserProperties map[string]string 125 // ValueOffsets map from property name to byte offset of the property value 126 // within the file. Only set if the properties have been loaded from a file. 127 ValueOffsets map[string]uint64 128 // The version. TODO(peter): add a more detailed description. 129 Version uint32 `prop:"rocksdb.external_sst_file.version"` 130 // If filtering is enabled, was the filter created on the whole key. 131 WholeKeyFiltering bool `prop:"rocksdb.block.based.table.whole.key.filtering"` 132 } 133 134 func (p *Properties) String() string { 135 var buf bytes.Buffer 136 v := reflect.ValueOf(*p) 137 vt := v.Type() 138 for i := 0; i < v.NumField(); i++ { 139 ft := vt.Field(i) 140 tag := ft.Tag.Get("prop") 141 if tag == "" { 142 continue 143 } 144 fmt.Fprintf(&buf, "%s: ", tag) 145 f := v.Field(i) 146 switch ft.Type.Kind() { 147 case reflect.Bool: 148 fmt.Fprintf(&buf, "%t\n", f.Bool()) 149 case reflect.Uint32: 150 fmt.Fprintf(&buf, "%d\n", f.Uint()) 151 case reflect.Uint64: 152 u := f.Uint() 153 if ft.Offset == columnFamilyIDField.Offset && u == math.MaxInt32 { 154 fmt.Fprintf(&buf, "-\n") 155 } else { 156 fmt.Fprintf(&buf, "%d\n", f.Uint()) 157 } 158 case reflect.String: 159 fmt.Fprintf(&buf, "%s\n", f.String()) 160 default: 161 panic("not reached") 162 } 163 } 164 keys := make([]string, 0, len(p.UserProperties)) 165 for key := range p.UserProperties { 166 keys = append(keys, key) 167 } 168 sort.Strings(keys) 169 for _, key := range keys { 170 fmt.Fprintf(&buf, "%s: %s\n", key, p.UserProperties[key]) 171 } 172 return buf.String() 173 } 174 175 func (p *Properties) load(b block, blockOffset uint64) error { 176 i, err := newRawBlockIter(bytes.Compare, b) 177 if err != nil { 178 return err 179 } 180 p.ValueOffsets = make(map[string]uint64) 181 v := reflect.ValueOf(p).Elem() 182 for valid := i.First(); valid; valid = i.Next() { 183 tag := i.Key().UserKey 184 p.ValueOffsets[string(tag)] = blockOffset + i.valueOffset() 185 if f, ok := propTagMap[string(tag)]; ok { 186 field := v.FieldByIndex(f.Index) 187 switch f.Type.Kind() { 188 case reflect.Bool: 189 field.SetBool(string(i.Value()) == "1") 190 case reflect.Uint32: 191 field.SetUint(uint64(binary.LittleEndian.Uint32(i.Value()))) 192 case reflect.Uint64: 193 var n uint64 194 if string(tag) == "rocksdb.external_sst_file.global_seqno" { 195 n = binary.LittleEndian.Uint64(i.Value()) 196 } else { 197 n, _ = binary.Uvarint(i.Value()) 198 } 199 field.SetUint(n) 200 case reflect.String: 201 field.SetString(string(i.Value())) 202 default: 203 panic("not reached") 204 } 205 continue 206 } 207 if p.UserProperties == nil { 208 p.UserProperties = make(map[string]string) 209 } 210 p.UserProperties[string(tag)] = string(i.Value()) 211 } 212 return nil 213 } 214 215 func (p *Properties) saveBool(m map[string][]byte, offset uintptr, value bool) { 216 tag := propOffsetTagMap[offset] 217 if value { 218 m[tag] = []byte{'1'} 219 } else { 220 m[tag] = []byte{'0'} 221 } 222 } 223 224 func (p *Properties) saveUint32(m map[string][]byte, offset uintptr, value uint32) { 225 var buf [4]byte 226 binary.LittleEndian.PutUint32(buf[:], value) 227 m[propOffsetTagMap[offset]] = buf[:] 228 } 229 230 func (p *Properties) saveUint64(m map[string][]byte, offset uintptr, value uint64) { 231 var buf [8]byte 232 binary.LittleEndian.PutUint64(buf[:], value) 233 m[propOffsetTagMap[offset]] = buf[:] 234 } 235 236 func (p *Properties) saveUvarint(m map[string][]byte, offset uintptr, value uint64) { 237 var buf [10]byte 238 n := binary.PutUvarint(buf[:], value) 239 m[propOffsetTagMap[offset]] = buf[:n] 240 } 241 242 func (p *Properties) saveString(m map[string][]byte, offset uintptr, value string) { 243 m[propOffsetTagMap[offset]] = []byte(value) 244 } 245 246 func (p *Properties) save(w *rawBlockWriter) { 247 m := make(map[string][]byte) 248 for k, v := range p.UserProperties { 249 m[k] = []byte(v) 250 } 251 252 p.saveUvarint(m, unsafe.Offsetof(p.ColumnFamilyID), p.ColumnFamilyID) 253 if p.ColumnFamilyName != "" { 254 p.saveString(m, unsafe.Offsetof(p.ColumnFamilyName), p.ColumnFamilyName) 255 } 256 if p.ComparerName != "" { 257 p.saveString(m, unsafe.Offsetof(p.ComparerName), p.ComparerName) 258 } 259 if p.CompressionName != "" { 260 p.saveString(m, unsafe.Offsetof(p.CompressionName), p.CompressionName) 261 } 262 if p.CompressionOptions != "" { 263 p.saveString(m, unsafe.Offsetof(p.CompressionOptions), p.CompressionOptions) 264 } 265 p.saveUvarint(m, unsafe.Offsetof(p.CreationTime), p.CreationTime) 266 p.saveUvarint(m, unsafe.Offsetof(p.DataSize), p.DataSize) 267 if p.FileCreationTime > 0 { 268 p.saveUvarint(m, unsafe.Offsetof(p.FileCreationTime), p.FileCreationTime) 269 } 270 if p.FilterPolicyName != "" { 271 p.saveString(m, unsafe.Offsetof(p.FilterPolicyName), p.FilterPolicyName) 272 } 273 p.saveUvarint(m, unsafe.Offsetof(p.FilterSize), p.FilterSize) 274 p.saveUvarint(m, unsafe.Offsetof(p.FixedKeyLen), p.FixedKeyLen) 275 p.saveUvarint(m, unsafe.Offsetof(p.FormatVersion), p.FormatVersion) 276 p.saveUint64(m, unsafe.Offsetof(p.GlobalSeqNum), p.GlobalSeqNum) 277 p.saveUvarint(m, unsafe.Offsetof(p.IndexKeyIsUserKey), p.IndexKeyIsUserKey) 278 if p.IndexPartitions != 0 { 279 p.saveUvarint(m, unsafe.Offsetof(p.IndexPartitions), p.IndexPartitions) 280 p.saveUvarint(m, unsafe.Offsetof(p.TopLevelIndexSize), p.TopLevelIndexSize) 281 } 282 p.saveUvarint(m, unsafe.Offsetof(p.IndexSize), p.IndexSize) 283 p.saveUint32(m, unsafe.Offsetof(p.IndexType), p.IndexType) 284 p.saveUvarint(m, unsafe.Offsetof(p.IndexValueIsDeltaEncoded), p.IndexValueIsDeltaEncoded) 285 if p.MergerName != "" { 286 p.saveString(m, unsafe.Offsetof(p.MergerName), p.MergerName) 287 } 288 p.saveUvarint(m, unsafe.Offsetof(p.NumDataBlocks), p.NumDataBlocks) 289 p.saveUvarint(m, unsafe.Offsetof(p.NumEntries), p.NumEntries) 290 p.saveUvarint(m, unsafe.Offsetof(p.NumDeletions), p.NumDeletions) 291 p.saveUvarint(m, unsafe.Offsetof(p.NumMergeOperands), p.NumMergeOperands) 292 p.saveUvarint(m, unsafe.Offsetof(p.NumRangeDeletions), p.NumRangeDeletions) 293 p.saveUvarint(m, unsafe.Offsetof(p.OldestKeyTime), p.OldestKeyTime) 294 if p.PrefixExtractorName != "" { 295 p.saveString(m, unsafe.Offsetof(p.PrefixExtractorName), p.PrefixExtractorName) 296 } 297 p.saveBool(m, unsafe.Offsetof(p.PrefixFiltering), p.PrefixFiltering) 298 if p.PropertyCollectorNames != "" { 299 p.saveString(m, unsafe.Offsetof(p.PropertyCollectorNames), p.PropertyCollectorNames) 300 } 301 p.saveUvarint(m, unsafe.Offsetof(p.RawKeySize), p.RawKeySize) 302 p.saveUvarint(m, unsafe.Offsetof(p.RawValueSize), p.RawValueSize) 303 p.saveUint32(m, unsafe.Offsetof(p.Version), p.Version) 304 p.saveBool(m, unsafe.Offsetof(p.WholeKeyFiltering), p.WholeKeyFiltering) 305 306 keys := make([]string, 0, len(m)) 307 for key := range m { 308 keys = append(keys, key) 309 } 310 sort.Strings(keys) 311 for _, key := range keys { 312 w.add(InternalKey{UserKey: []byte(key)}, m[key]) 313 } 314 }