github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/sstable/properties.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"math"
    12  	"reflect"
    13  	"sort"
    14  	"unsafe"
    15  
    16  	"github.com/zuoyebang/bitalostable/internal/intern"
    17  )
    18  
    19  const propertiesBlockRestartInterval = math.MaxInt32
    20  const propGlobalSeqnumName = "rocksdb.external_sst_file.global_seqno"
    21  
    22  var propTagMap = make(map[string]reflect.StructField)
    23  var propBoolTrue = []byte{'1'}
    24  var propBoolFalse = []byte{'0'}
    25  
    26  var columnFamilyIDField = func() reflect.StructField {
    27  	f, ok := reflect.TypeOf(Properties{}).FieldByName("ColumnFamilyID")
    28  	if !ok {
    29  		panic("Properties.ColumnFamilyID field not found")
    30  	}
    31  	return f
    32  }()
    33  
    34  var propOffsetTagMap = make(map[uintptr]string)
    35  
    36  func init() {
    37  	t := reflect.TypeOf(Properties{})
    38  	for i := 0; i < t.NumField(); i++ {
    39  		f := t.Field(i)
    40  		if tag := f.Tag.Get("prop"); tag != "" {
    41  			switch f.Type.Kind() {
    42  			case reflect.Bool:
    43  			case reflect.Uint32:
    44  			case reflect.Uint64:
    45  			case reflect.String:
    46  			default:
    47  				panic(fmt.Sprintf("unsupported property field type: %s %s", f.Name, f.Type))
    48  			}
    49  			propTagMap[tag] = f
    50  			propOffsetTagMap[f.Offset] = tag
    51  		}
    52  	}
    53  }
    54  
    55  // Properties holds the sstable property values. The properties are
    56  // automatically populated during sstable creation and load from the properties
    57  // meta block when an sstable is opened.
    58  type Properties struct {
    59  	// ID of column family for this SST file, corresponding to the CF identified
    60  	// by column_family_name.
    61  	ColumnFamilyID uint64 `prop:"rocksdb.column.family.id"`
    62  	// Name of the column family with which this SST file is associated. Empty if
    63  	// the column family is unknown.
    64  	ColumnFamilyName string `prop:"rocksdb.column.family.name"`
    65  	// The name of the comparer used in this table.
    66  	ComparerName string `prop:"rocksdb.comparator"`
    67  	// The compression algorithm used to compress blocks.
    68  	CompressionName string `prop:"rocksdb.compression"`
    69  	// The compression options used to compress blocks.
    70  	CompressionOptions string `prop:"rocksdb.compression_options"`
    71  	// The time when the SST file was created. Since SST files are immutable,
    72  	// this is equivalent to last modified time.
    73  	CreationTime uint64 `prop:"rocksdb.creation.time"`
    74  	// The total size of all data blocks.
    75  	DataSize uint64 `prop:"rocksdb.data.size"`
    76  	// The external sstable version format. Version 2 is the one RocksDB has been
    77  	// using since 5.13. RocksDB only uses the global sequence number for an
    78  	// sstable if this property has been set.
    79  	ExternalFormatVersion uint32 `prop:"rocksdb.external_sst_file.version"`
    80  	// Actual SST file creation time. 0 means unknown.
    81  	FileCreationTime uint64 `prop:"rocksdb.file.creation.time"`
    82  	// The name of the filter policy used in this table. Empty if no filter
    83  	// policy is used.
    84  	FilterPolicyName string `prop:"rocksdb.filter.policy"`
    85  	// The size of filter block.
    86  	FilterSize uint64 `prop:"rocksdb.filter.size"`
    87  	// If 0, key is variable length. Otherwise number of bytes for each key.
    88  	FixedKeyLen uint64 `prop:"rocksdb.fixed.key.length"`
    89  	// Format version, reserved for backward compatibility.
    90  	FormatVersion uint64 `prop:"rocksdb.format.version"`
    91  	// The global sequence number to use for all entries in the table. Present if
    92  	// the table was created externally and ingested whole.
    93  	GlobalSeqNum uint64 `prop:"rocksdb.external_sst_file.global_seqno"`
    94  	// Whether the index key is user key or an internal key.
    95  	IndexKeyIsUserKey uint64 `prop:"rocksdb.index.key.is.user.key"`
    96  	// Total number of index partitions if kTwoLevelIndexSearch is used.
    97  	IndexPartitions uint64 `prop:"rocksdb.index.partitions"`
    98  	// The size of index block.
    99  	IndexSize uint64 `prop:"rocksdb.index.size"`
   100  	// The index type. TODO(peter): add a more detailed description.
   101  	IndexType uint32 `prop:"rocksdb.block.based.table.index.type"`
   102  	// Whether delta encoding is used to encode the index values.
   103  	IndexValueIsDeltaEncoded uint64 `prop:"rocksdb.index.value.is.delta.encoded"`
   104  	// The name of the merger used in this table. Empty if no merger is used.
   105  	MergerName string `prop:"rocksdb.merge.operator"`
   106  	// The number of blocks in this table.
   107  	NumDataBlocks uint64 `prop:"rocksdb.num.data.blocks"`
   108  	// The number of deletion entries in this table, including both point and
   109  	// range deletions.
   110  	NumDeletions uint64 `prop:"rocksdb.deleted.keys"`
   111  	// The number of entries in this table.
   112  	NumEntries uint64 `prop:"rocksdb.num.entries"`
   113  	// The number of merge operands in the table.
   114  	NumMergeOperands uint64 `prop:"rocksdb.merge.operands"`
   115  	// The number of range deletions in this table.
   116  	NumRangeDeletions uint64 `prop:"rocksdb.num.range-deletions"`
   117  	// The number of RANGEKEYDELs in this table.
   118  	NumRangeKeyDels uint64 `prop:"bitalostable.num.range-key-dels"`
   119  	// The number of RANGEKEYSETs in this table.
   120  	NumRangeKeySets uint64 `prop:"bitalostable.num.range-key-sets"`
   121  	// The number of RANGEKEYUNSETs in this table.
   122  	NumRangeKeyUnsets uint64 `prop:"bitalostable.num.range-key-unsets"`
   123  	// Timestamp of the earliest key. 0 if unknown.
   124  	OldestKeyTime uint64 `prop:"rocksdb.oldest.key.time"`
   125  	// The name of the prefix extractor used in this table. Empty if no prefix
   126  	// extractor is used.
   127  	PrefixExtractorName string `prop:"rocksdb.prefix.extractor.name"`
   128  	// If filtering is enabled, was the filter created on the key prefix.
   129  	PrefixFiltering bool `prop:"rocksdb.block.based.table.prefix.filtering"`
   130  	// A comma separated list of names of the property collectors used in this
   131  	// table.
   132  	PropertyCollectorNames string `prop:"rocksdb.property.collectors"`
   133  	// Total raw key size.
   134  	RawKeySize uint64 `prop:"rocksdb.raw.key.size"`
   135  	// Total raw rangekey key size.
   136  	RawRangeKeyKeySize uint64 `prop:"bitalostable.raw.range-key.key.size"`
   137  	// Total raw rangekey value size.
   138  	RawRangeKeyValueSize uint64 `prop:"bitalostable.raw.range-key.value.size"`
   139  	// Total raw value size.
   140  	RawValueSize uint64 `prop:"rocksdb.raw.value.size"`
   141  	// Size of the top-level index if kTwoLevelIndexSearch is used.
   142  	TopLevelIndexSize uint64 `prop:"rocksdb.top-level.index.size"`
   143  	// User collected properties.
   144  	UserProperties map[string]string
   145  	// If filtering is enabled, was the filter created on the whole key.
   146  	WholeKeyFiltering bool `prop:"rocksdb.block.based.table.whole.key.filtering"`
   147  
   148  	// Loaded set indicating which fields have been loaded from disk. Indexed by
   149  	// the field's byte offset within the struct
   150  	// (reflect.StructField.Offset). Only set if the properties have been loaded
   151  	// from a file. Only exported for testing purposes.
   152  	Loaded map[uintptr]struct{}
   153  }
   154  
   155  // NumPointDeletions returns the number of point deletions in this table.
   156  func (p *Properties) NumPointDeletions() uint64 {
   157  	return p.NumDeletions - p.NumRangeDeletions
   158  }
   159  
   160  // NumRangeKeys returns a count of the number of range keys in this table.
   161  func (p *Properties) NumRangeKeys() uint64 {
   162  	return p.NumRangeKeyDels + p.NumRangeKeySets + p.NumRangeKeyUnsets
   163  }
   164  
   165  func (p *Properties) String() string {
   166  	var buf bytes.Buffer
   167  	v := reflect.ValueOf(*p)
   168  	vt := v.Type()
   169  	for i := 0; i < v.NumField(); i++ {
   170  		ft := vt.Field(i)
   171  		tag := ft.Tag.Get("prop")
   172  		if tag == "" {
   173  			continue
   174  		}
   175  
   176  		f := v.Field(i)
   177  		// TODO(peter): Use f.IsZero() when we can rely on go1.13.
   178  		if zero := reflect.Zero(f.Type()); zero.Interface() == f.Interface() {
   179  			// Skip printing of zero values which were not loaded from disk.
   180  			if _, ok := p.Loaded[ft.Offset]; !ok {
   181  				continue
   182  			}
   183  		}
   184  
   185  		fmt.Fprintf(&buf, "%s: ", tag)
   186  		switch ft.Type.Kind() {
   187  		case reflect.Bool:
   188  			fmt.Fprintf(&buf, "%t\n", f.Bool())
   189  		case reflect.Uint32:
   190  			fmt.Fprintf(&buf, "%d\n", f.Uint())
   191  		case reflect.Uint64:
   192  			u := f.Uint()
   193  			if ft.Offset == columnFamilyIDField.Offset && u == math.MaxInt32 {
   194  				fmt.Fprintf(&buf, "-\n")
   195  			} else {
   196  				fmt.Fprintf(&buf, "%d\n", f.Uint())
   197  			}
   198  		case reflect.String:
   199  			fmt.Fprintf(&buf, "%s\n", f.String())
   200  		default:
   201  			panic("not reached")
   202  		}
   203  	}
   204  	keys := make([]string, 0, len(p.UserProperties))
   205  	for key := range p.UserProperties {
   206  		keys = append(keys, key)
   207  	}
   208  	sort.Strings(keys)
   209  	for _, key := range keys {
   210  		fmt.Fprintf(&buf, "%s: %s\n", key, p.UserProperties[key])
   211  	}
   212  	return buf.String()
   213  }
   214  
   215  func (p *Properties) load(b block, blockOffset uint64) error {
   216  	i, err := newRawBlockIter(bytes.Compare, b)
   217  	if err != nil {
   218  		return err
   219  	}
   220  	p.Loaded = make(map[uintptr]struct{})
   221  	v := reflect.ValueOf(p).Elem()
   222  	for valid := i.First(); valid; valid = i.Next() {
   223  		tag := intern.Bytes(i.Key().UserKey)
   224  		if f, ok := propTagMap[tag]; ok {
   225  			p.Loaded[f.Offset] = struct{}{}
   226  			field := v.FieldByIndex(f.Index)
   227  			switch f.Type.Kind() {
   228  			case reflect.Bool:
   229  				field.SetBool(bytes.Equal(i.Value(), propBoolTrue))
   230  			case reflect.Uint32:
   231  				field.SetUint(uint64(binary.LittleEndian.Uint32(i.Value())))
   232  			case reflect.Uint64:
   233  				var n uint64
   234  				if tag == propGlobalSeqnumName {
   235  					n = binary.LittleEndian.Uint64(i.Value())
   236  				} else {
   237  					n, _ = binary.Uvarint(i.Value())
   238  				}
   239  				field.SetUint(n)
   240  			case reflect.String:
   241  				field.SetString(intern.Bytes(i.Value()))
   242  			default:
   243  				panic("not reached")
   244  			}
   245  			continue
   246  		}
   247  		if p.UserProperties == nil {
   248  			p.UserProperties = make(map[string]string)
   249  		}
   250  		p.UserProperties[tag] = string(i.Value())
   251  	}
   252  	return nil
   253  }
   254  
   255  func (p *Properties) saveBool(m map[string][]byte, offset uintptr, value bool) {
   256  	tag := propOffsetTagMap[offset]
   257  	if value {
   258  		m[tag] = propBoolTrue
   259  	} else {
   260  		m[tag] = propBoolFalse
   261  	}
   262  }
   263  
   264  func (p *Properties) saveUint32(m map[string][]byte, offset uintptr, value uint32) {
   265  	var buf [4]byte
   266  	binary.LittleEndian.PutUint32(buf[:], value)
   267  	m[propOffsetTagMap[offset]] = buf[:]
   268  }
   269  
   270  func (p *Properties) saveUint64(m map[string][]byte, offset uintptr, value uint64) {
   271  	var buf [8]byte
   272  	binary.LittleEndian.PutUint64(buf[:], value)
   273  	m[propOffsetTagMap[offset]] = buf[:]
   274  }
   275  
   276  func (p *Properties) saveUvarint(m map[string][]byte, offset uintptr, value uint64) {
   277  	var buf [10]byte
   278  	n := binary.PutUvarint(buf[:], value)
   279  	m[propOffsetTagMap[offset]] = buf[:n]
   280  }
   281  
   282  func (p *Properties) saveString(m map[string][]byte, offset uintptr, value string) {
   283  	m[propOffsetTagMap[offset]] = []byte(value)
   284  }
   285  
   286  func (p *Properties) save(w *rawBlockWriter) {
   287  	m := make(map[string][]byte)
   288  	for k, v := range p.UserProperties {
   289  		m[k] = []byte(v)
   290  	}
   291  
   292  	p.saveUvarint(m, unsafe.Offsetof(p.ColumnFamilyID), p.ColumnFamilyID)
   293  	if p.ColumnFamilyName != "" {
   294  		p.saveString(m, unsafe.Offsetof(p.ColumnFamilyName), p.ColumnFamilyName)
   295  	}
   296  	if p.ComparerName != "" {
   297  		p.saveString(m, unsafe.Offsetof(p.ComparerName), p.ComparerName)
   298  	}
   299  	if p.CompressionName != "" {
   300  		p.saveString(m, unsafe.Offsetof(p.CompressionName), p.CompressionName)
   301  	}
   302  	if p.CompressionOptions != "" {
   303  		p.saveString(m, unsafe.Offsetof(p.CompressionOptions), p.CompressionOptions)
   304  	}
   305  	p.saveUvarint(m, unsafe.Offsetof(p.CreationTime), p.CreationTime)
   306  	p.saveUvarint(m, unsafe.Offsetof(p.DataSize), p.DataSize)
   307  	if p.ExternalFormatVersion != 0 {
   308  		p.saveUint32(m, unsafe.Offsetof(p.ExternalFormatVersion), p.ExternalFormatVersion)
   309  		p.saveUint64(m, unsafe.Offsetof(p.GlobalSeqNum), p.GlobalSeqNum)
   310  	}
   311  	if p.FileCreationTime > 0 {
   312  		p.saveUvarint(m, unsafe.Offsetof(p.FileCreationTime), p.FileCreationTime)
   313  	}
   314  	if p.FilterPolicyName != "" {
   315  		p.saveString(m, unsafe.Offsetof(p.FilterPolicyName), p.FilterPolicyName)
   316  	}
   317  	p.saveUvarint(m, unsafe.Offsetof(p.FilterSize), p.FilterSize)
   318  	p.saveUvarint(m, unsafe.Offsetof(p.FixedKeyLen), p.FixedKeyLen)
   319  	p.saveUvarint(m, unsafe.Offsetof(p.FormatVersion), p.FormatVersion)
   320  	p.saveUvarint(m, unsafe.Offsetof(p.IndexKeyIsUserKey), p.IndexKeyIsUserKey)
   321  	if p.IndexPartitions != 0 {
   322  		p.saveUvarint(m, unsafe.Offsetof(p.IndexPartitions), p.IndexPartitions)
   323  		p.saveUvarint(m, unsafe.Offsetof(p.TopLevelIndexSize), p.TopLevelIndexSize)
   324  	}
   325  	p.saveUvarint(m, unsafe.Offsetof(p.IndexSize), p.IndexSize)
   326  	p.saveUint32(m, unsafe.Offsetof(p.IndexType), p.IndexType)
   327  	p.saveUvarint(m, unsafe.Offsetof(p.IndexValueIsDeltaEncoded), p.IndexValueIsDeltaEncoded)
   328  	if p.MergerName != "" {
   329  		p.saveString(m, unsafe.Offsetof(p.MergerName), p.MergerName)
   330  	}
   331  	p.saveUvarint(m, unsafe.Offsetof(p.NumDataBlocks), p.NumDataBlocks)
   332  	p.saveUvarint(m, unsafe.Offsetof(p.NumEntries), p.NumEntries)
   333  	p.saveUvarint(m, unsafe.Offsetof(p.NumDeletions), p.NumDeletions)
   334  	p.saveUvarint(m, unsafe.Offsetof(p.NumMergeOperands), p.NumMergeOperands)
   335  	p.saveUvarint(m, unsafe.Offsetof(p.NumRangeDeletions), p.NumRangeDeletions)
   336  	if p.NumRangeKeys() > 0 {
   337  		p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeyDels), p.NumRangeKeyDels)
   338  		p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeySets), p.NumRangeKeySets)
   339  		p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeyUnsets), p.NumRangeKeyUnsets)
   340  		p.saveUvarint(m, unsafe.Offsetof(p.RawRangeKeyKeySize), p.RawRangeKeyKeySize)
   341  		p.saveUvarint(m, unsafe.Offsetof(p.RawRangeKeyValueSize), p.RawRangeKeyValueSize)
   342  	}
   343  	p.saveUvarint(m, unsafe.Offsetof(p.OldestKeyTime), p.OldestKeyTime)
   344  	if p.PrefixExtractorName != "" {
   345  		p.saveString(m, unsafe.Offsetof(p.PrefixExtractorName), p.PrefixExtractorName)
   346  	}
   347  	p.saveBool(m, unsafe.Offsetof(p.PrefixFiltering), p.PrefixFiltering)
   348  	if p.PropertyCollectorNames != "" {
   349  		p.saveString(m, unsafe.Offsetof(p.PropertyCollectorNames), p.PropertyCollectorNames)
   350  	}
   351  	p.saveUvarint(m, unsafe.Offsetof(p.RawKeySize), p.RawKeySize)
   352  	p.saveUvarint(m, unsafe.Offsetof(p.RawValueSize), p.RawValueSize)
   353  	p.saveBool(m, unsafe.Offsetof(p.WholeKeyFiltering), p.WholeKeyFiltering)
   354  
   355  	keys := make([]string, 0, len(m))
   356  	for key := range m {
   357  		keys = append(keys, key)
   358  	}
   359  	sort.Strings(keys)
   360  	for _, key := range keys {
   361  		w.add(InternalKey{UserKey: []byte(key)}, m[key])
   362  	}
   363  }