github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/sstable/properties.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"math"
    12  	"reflect"
    13  	"sort"
    14  	"unsafe"
    15  )
    16  
    17  const propertiesBlockRestartInterval = math.MaxInt32
    18  
    19  var propTagMap = make(map[string]reflect.StructField)
    20  
    21  var columnFamilyIDField = func() reflect.StructField {
    22  	f, ok := reflect.TypeOf(Properties{}).FieldByName("ColumnFamilyID")
    23  	if !ok {
    24  		panic("Properties.ColumnFamilyID field not found")
    25  	}
    26  	return f
    27  }()
    28  
    29  var propOffsetTagMap = make(map[uintptr]string)
    30  
    31  func init() {
    32  	t := reflect.TypeOf(Properties{})
    33  	for i := 0; i < t.NumField(); i++ {
    34  		f := t.Field(i)
    35  		if tag := f.Tag.Get("prop"); tag != "" {
    36  			switch f.Type.Kind() {
    37  			case reflect.Bool:
    38  			case reflect.Uint32:
    39  			case reflect.Uint64:
    40  			case reflect.String:
    41  			default:
    42  				panic(fmt.Sprintf("unsupported property field type: %s %s", f.Name, f.Type))
    43  			}
    44  			propTagMap[tag] = f
    45  			propOffsetTagMap[f.Offset] = tag
    46  		}
    47  	}
    48  }
    49  
    50  // Properties holds the sstable property values. The properties are
    51  // automatically populated during sstable creation and load from the properties
    52  // meta block when an sstable is opened.
    53  type Properties struct {
    54  	// ID of column family for this SST file, corresponding to the CF identified
    55  	// by column_family_name.
    56  	ColumnFamilyID uint64 `prop:"rocksdb.column.family.id"`
    57  	// Name of the column family with which this SST file is associated. Empty if
    58  	// the column family is unknown.
    59  	ColumnFamilyName string `prop:"rocksdb.column.family.name"`
    60  	// The name of the comparer used in this table.
    61  	ComparerName string `prop:"rocksdb.comparator"`
    62  	// The compression algorithm used to compress blocks.
    63  	CompressionName string `prop:"rocksdb.compression"`
    64  	// The compression options used to compress blocks.
    65  	CompressionOptions string `prop:"rocksdb.compression_options"`
    66  	// The time when the SST file was created. Since SST files are immutable,
    67  	// this is equivalent to last modified time.
    68  	CreationTime uint64 `prop:"rocksdb.creation.time"`
    69  	// The total size of all data blocks.
    70  	DataSize uint64 `prop:"rocksdb.data.size"`
    71  	// Actual SST file creation time. 0 means unknown.
    72  	FileCreationTime uint64 `prop:"rocksdb.file.creation.time"`
    73  	// The name of the filter policy used in this table. Empty if no filter
    74  	// policy is used.
    75  	FilterPolicyName string `prop:"rocksdb.filter.policy"`
    76  	// The size of filter block.
    77  	FilterSize uint64 `prop:"rocksdb.filter.size"`
    78  	// If 0, key is variable length. Otherwise number of bytes for each key.
    79  	FixedKeyLen uint64 `prop:"rocksdb.fixed.key.length"`
    80  	// format version, reserved for backward compatibility.
    81  	FormatVersion uint64 `prop:"rocksdb.format.version"`
    82  	// The global sequence number to use for all entries in the table. Present if
    83  	// the table was created externally and ingested whole.
    84  	GlobalSeqNum uint64 `prop:"rocksdb.external_sst_file.global_seqno"`
    85  	// Whether the index key is user key or an internal key.
    86  	IndexKeyIsUserKey uint64 `prop:"rocksdb.index.key.is.user.key"`
    87  	// Total number of index partitions if kTwoLevelIndexSearch is used.
    88  	IndexPartitions uint64 `prop:"rocksdb.index.partitions"`
    89  	// The size of index block.
    90  	IndexSize uint64 `prop:"rocksdb.index.size"`
    91  	// The index type. TODO(peter): add a more detailed description.
    92  	IndexType uint32 `prop:"rocksdb.block.based.table.index.type"`
    93  	// Whether delta encoding is used to encode the index values.
    94  	IndexValueIsDeltaEncoded uint64 `prop:"rocksdb.index.value.is.delta.encoded"`
    95  	// The name of the merger used in this table. Empty if no merger is used.
    96  	MergerName string `prop:"rocksdb.merge.operator"`
    97  	// The number of blocks in this table.
    98  	NumDataBlocks uint64 `prop:"rocksdb.num.data.blocks"`
    99  	// The number of deletion entries in this table.
   100  	NumDeletions uint64 `prop:"rocksdb.deleted.keys"`
   101  	// The number of entries in this table.
   102  	NumEntries uint64 `prop:"rocksdb.num.entries"`
   103  	// The number of merge operands in the table.
   104  	NumMergeOperands uint64 `prop:"rocksdb.merge.operands"`
   105  	// The number of range deletions in this table.
   106  	NumRangeDeletions uint64 `prop:"rocksdb.num.range-deletions"`
   107  	// Timestamp of the earliest key. 0 if unknown.
   108  	OldestKeyTime uint64 `prop:"rocksdb.oldest.key.time"`
   109  	// The name of the prefix extractor used in this table. Empty if no prefix
   110  	// extractor is used.
   111  	PrefixExtractorName string `prop:"rocksdb.prefix.extractor.name"`
   112  	// If filtering is enabled, was the filter created on the key prefix.
   113  	PrefixFiltering bool `prop:"rocksdb.block.based.table.prefix.filtering"`
   114  	// A comma separated list of names of the property collectors used in this
   115  	// table.
   116  	PropertyCollectorNames string `prop:"rocksdb.property.collectors"`
   117  	// Total raw key size.
   118  	RawKeySize uint64 `prop:"rocksdb.raw.key.size"`
   119  	// Total raw value size.
   120  	RawValueSize uint64 `prop:"rocksdb.raw.value.size"`
   121  	// Size of the top-level index if kTwoLevelIndexSearch is used.
   122  	TopLevelIndexSize uint64 `prop:"rocksdb.top-level.index.size"`
   123  	// User collected properties.
   124  	UserProperties map[string]string
   125  	// ValueOffsets map from property name to byte offset of the property value
   126  	// within the file. Only set if the properties have been loaded from a file.
   127  	ValueOffsets map[string]uint64
   128  	// The version. TODO(peter): add a more detailed description.
   129  	Version uint32 `prop:"rocksdb.external_sst_file.version"`
   130  	// If filtering is enabled, was the filter created on the whole key.
   131  	WholeKeyFiltering bool `prop:"rocksdb.block.based.table.whole.key.filtering"`
   132  }
   133  
   134  func (p *Properties) String() string {
   135  	var buf bytes.Buffer
   136  	v := reflect.ValueOf(*p)
   137  	vt := v.Type()
   138  	for i := 0; i < v.NumField(); i++ {
   139  		ft := vt.Field(i)
   140  		tag := ft.Tag.Get("prop")
   141  		if tag == "" {
   142  			continue
   143  		}
   144  		fmt.Fprintf(&buf, "%s: ", tag)
   145  		f := v.Field(i)
   146  		switch ft.Type.Kind() {
   147  		case reflect.Bool:
   148  			fmt.Fprintf(&buf, "%t\n", f.Bool())
   149  		case reflect.Uint32:
   150  			fmt.Fprintf(&buf, "%d\n", f.Uint())
   151  		case reflect.Uint64:
   152  			u := f.Uint()
   153  			if ft.Offset == columnFamilyIDField.Offset && u == math.MaxInt32 {
   154  				fmt.Fprintf(&buf, "-\n")
   155  			} else {
   156  				fmt.Fprintf(&buf, "%d\n", f.Uint())
   157  			}
   158  		case reflect.String:
   159  			fmt.Fprintf(&buf, "%s\n", f.String())
   160  		default:
   161  			panic("not reached")
   162  		}
   163  	}
   164  	keys := make([]string, 0, len(p.UserProperties))
   165  	for key := range p.UserProperties {
   166  		keys = append(keys, key)
   167  	}
   168  	sort.Strings(keys)
   169  	for _, key := range keys {
   170  		fmt.Fprintf(&buf, "%s: %s\n", key, p.UserProperties[key])
   171  	}
   172  	return buf.String()
   173  }
   174  
   175  func (p *Properties) load(b block, blockOffset uint64) error {
   176  	i, err := newRawBlockIter(bytes.Compare, b)
   177  	if err != nil {
   178  		return err
   179  	}
   180  	p.ValueOffsets = make(map[string]uint64)
   181  	v := reflect.ValueOf(p).Elem()
   182  	for valid := i.First(); valid; valid = i.Next() {
   183  		tag := i.Key().UserKey
   184  		p.ValueOffsets[string(tag)] = blockOffset + i.valueOffset()
   185  		if f, ok := propTagMap[string(tag)]; ok {
   186  			field := v.FieldByIndex(f.Index)
   187  			switch f.Type.Kind() {
   188  			case reflect.Bool:
   189  				field.SetBool(string(i.Value()) == "1")
   190  			case reflect.Uint32:
   191  				field.SetUint(uint64(binary.LittleEndian.Uint32(i.Value())))
   192  			case reflect.Uint64:
   193  				var n uint64
   194  				if string(tag) == "rocksdb.external_sst_file.global_seqno" {
   195  					n = binary.LittleEndian.Uint64(i.Value())
   196  				} else {
   197  					n, _ = binary.Uvarint(i.Value())
   198  				}
   199  				field.SetUint(n)
   200  			case reflect.String:
   201  				field.SetString(string(i.Value()))
   202  			default:
   203  				panic("not reached")
   204  			}
   205  			continue
   206  		}
   207  		if p.UserProperties == nil {
   208  			p.UserProperties = make(map[string]string)
   209  		}
   210  		p.UserProperties[string(tag)] = string(i.Value())
   211  	}
   212  	return nil
   213  }
   214  
   215  func (p *Properties) saveBool(m map[string][]byte, offset uintptr, value bool) {
   216  	tag := propOffsetTagMap[offset]
   217  	if value {
   218  		m[tag] = []byte{'1'}
   219  	} else {
   220  		m[tag] = []byte{'0'}
   221  	}
   222  }
   223  
   224  func (p *Properties) saveUint32(m map[string][]byte, offset uintptr, value uint32) {
   225  	var buf [4]byte
   226  	binary.LittleEndian.PutUint32(buf[:], value)
   227  	m[propOffsetTagMap[offset]] = buf[:]
   228  }
   229  
   230  func (p *Properties) saveUint64(m map[string][]byte, offset uintptr, value uint64) {
   231  	var buf [8]byte
   232  	binary.LittleEndian.PutUint64(buf[:], value)
   233  	m[propOffsetTagMap[offset]] = buf[:]
   234  }
   235  
   236  func (p *Properties) saveUvarint(m map[string][]byte, offset uintptr, value uint64) {
   237  	var buf [10]byte
   238  	n := binary.PutUvarint(buf[:], value)
   239  	m[propOffsetTagMap[offset]] = buf[:n]
   240  }
   241  
   242  func (p *Properties) saveString(m map[string][]byte, offset uintptr, value string) {
   243  	m[propOffsetTagMap[offset]] = []byte(value)
   244  }
   245  
   246  func (p *Properties) save(w *rawBlockWriter) {
   247  	m := make(map[string][]byte)
   248  	for k, v := range p.UserProperties {
   249  		m[k] = []byte(v)
   250  	}
   251  
   252  	p.saveUvarint(m, unsafe.Offsetof(p.ColumnFamilyID), p.ColumnFamilyID)
   253  	if p.ColumnFamilyName != "" {
   254  		p.saveString(m, unsafe.Offsetof(p.ColumnFamilyName), p.ColumnFamilyName)
   255  	}
   256  	if p.ComparerName != "" {
   257  		p.saveString(m, unsafe.Offsetof(p.ComparerName), p.ComparerName)
   258  	}
   259  	if p.CompressionName != "" {
   260  		p.saveString(m, unsafe.Offsetof(p.CompressionName), p.CompressionName)
   261  	}
   262  	if p.CompressionOptions != "" {
   263  		p.saveString(m, unsafe.Offsetof(p.CompressionOptions), p.CompressionOptions)
   264  	}
   265  	p.saveUvarint(m, unsafe.Offsetof(p.CreationTime), p.CreationTime)
   266  	p.saveUvarint(m, unsafe.Offsetof(p.DataSize), p.DataSize)
   267  	if p.FileCreationTime > 0 {
   268  		p.saveUvarint(m, unsafe.Offsetof(p.FileCreationTime), p.FileCreationTime)
   269  	}
   270  	if p.FilterPolicyName != "" {
   271  		p.saveString(m, unsafe.Offsetof(p.FilterPolicyName), p.FilterPolicyName)
   272  	}
   273  	p.saveUvarint(m, unsafe.Offsetof(p.FilterSize), p.FilterSize)
   274  	p.saveUvarint(m, unsafe.Offsetof(p.FixedKeyLen), p.FixedKeyLen)
   275  	p.saveUvarint(m, unsafe.Offsetof(p.FormatVersion), p.FormatVersion)
   276  	p.saveUint64(m, unsafe.Offsetof(p.GlobalSeqNum), p.GlobalSeqNum)
   277  	p.saveUvarint(m, unsafe.Offsetof(p.IndexKeyIsUserKey), p.IndexKeyIsUserKey)
   278  	if p.IndexPartitions != 0 {
   279  		p.saveUvarint(m, unsafe.Offsetof(p.IndexPartitions), p.IndexPartitions)
   280  		p.saveUvarint(m, unsafe.Offsetof(p.TopLevelIndexSize), p.TopLevelIndexSize)
   281  	}
   282  	p.saveUvarint(m, unsafe.Offsetof(p.IndexSize), p.IndexSize)
   283  	p.saveUint32(m, unsafe.Offsetof(p.IndexType), p.IndexType)
   284  	p.saveUvarint(m, unsafe.Offsetof(p.IndexValueIsDeltaEncoded), p.IndexValueIsDeltaEncoded)
   285  	if p.MergerName != "" {
   286  		p.saveString(m, unsafe.Offsetof(p.MergerName), p.MergerName)
   287  	}
   288  	p.saveUvarint(m, unsafe.Offsetof(p.NumDataBlocks), p.NumDataBlocks)
   289  	p.saveUvarint(m, unsafe.Offsetof(p.NumEntries), p.NumEntries)
   290  	p.saveUvarint(m, unsafe.Offsetof(p.NumDeletions), p.NumDeletions)
   291  	p.saveUvarint(m, unsafe.Offsetof(p.NumMergeOperands), p.NumMergeOperands)
   292  	p.saveUvarint(m, unsafe.Offsetof(p.NumRangeDeletions), p.NumRangeDeletions)
   293  	p.saveUvarint(m, unsafe.Offsetof(p.OldestKeyTime), p.OldestKeyTime)
   294  	if p.PrefixExtractorName != "" {
   295  		p.saveString(m, unsafe.Offsetof(p.PrefixExtractorName), p.PrefixExtractorName)
   296  	}
   297  	p.saveBool(m, unsafe.Offsetof(p.PrefixFiltering), p.PrefixFiltering)
   298  	if p.PropertyCollectorNames != "" {
   299  		p.saveString(m, unsafe.Offsetof(p.PropertyCollectorNames), p.PropertyCollectorNames)
   300  	}
   301  	p.saveUvarint(m, unsafe.Offsetof(p.RawKeySize), p.RawKeySize)
   302  	p.saveUvarint(m, unsafe.Offsetof(p.RawValueSize), p.RawValueSize)
   303  	p.saveUint32(m, unsafe.Offsetof(p.Version), p.Version)
   304  	p.saveBool(m, unsafe.Offsetof(p.WholeKeyFiltering), p.WholeKeyFiltering)
   305  
   306  	keys := make([]string, 0, len(m))
   307  	for key := range m {
   308  		keys = append(keys, key)
   309  	}
   310  	sort.Strings(keys)
   311  	for _, key := range keys {
   312  		w.add(InternalKey{UserKey: []byte(key)}, m[key])
   313  	}
   314  }