github.com/grafana/pyroscope@v1.18.0/pkg/block/metadata/metadata.go (about)

     1  package metadata
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"fmt"
     7  	"hash/crc32"
     8  	"io"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/oklog/ulid/v2"
    13  
    14  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    15  	"github.com/grafana/pyroscope/pkg/iter"
    16  )
    17  
    18  var ErrMetadataInvalid = errors.New("metadata: invalid metadata")
    19  
    20  func Tenant(md *metastorev1.BlockMeta) string {
    21  	if md.Tenant <= 0 || int(md.Tenant) >= len(md.StringTable) {
    22  		return ""
    23  	}
    24  	return md.StringTable[md.Tenant]
    25  }
    26  
    27  func Timestamp(md *metastorev1.BlockMeta) time.Time {
    28  	return time.UnixMilli(int64(ulid.MustParse(md.Id).Time()))
    29  }
    30  
    31  func Sanitize(md *metastorev1.BlockMeta) error {
    32  	// TODO(kolesnikovae): Implement.
    33  	_, err := ulid.Parse(md.Id)
    34  	return err
    35  }
    36  
    37  var stringTablePool = sync.Pool{
    38  	New: func() any { return NewStringTable() },
    39  }
    40  
    41  type StringTable struct {
    42  	Dict    map[string]int32
    43  	Strings []string
    44  }
    45  
    46  func NewStringTable() *StringTable {
    47  	var empty string
    48  	return &StringTable{
    49  		Dict:    map[string]int32{empty: 0},
    50  		Strings: []string{empty},
    51  	}
    52  }
    53  
    54  func (t *StringTable) IsEmpty() bool {
    55  	if len(t.Strings) == 0 {
    56  		return true
    57  	}
    58  	return len(t.Strings) == 1 && t.Strings[0] == ""
    59  }
    60  
    61  func (t *StringTable) Reset() {
    62  	clear(t.Dict)
    63  	t.Dict[""] = 0
    64  	t.Strings[0] = ""
    65  	t.Strings = t.Strings[:1]
    66  }
    67  
    68  func (t *StringTable) Clone() *StringTable {
    69  	n := &StringTable{
    70  		Dict:    make(map[string]int32, len(t.Dict)),
    71  		Strings: make([]string, len(t.Strings)),
    72  	}
    73  	for k, v := range t.Dict {
    74  		n.Dict[k] = v
    75  	}
    76  	copy(n.Strings, t.Strings)
    77  	return n
    78  }
    79  
    80  func (t *StringTable) Put(s string) int32 {
    81  	if i, ok := t.Dict[s]; ok {
    82  		return i
    83  	}
    84  	i := int32(len(t.Strings))
    85  	t.Strings = append(t.Strings, s)
    86  	t.Dict[s] = i
    87  	return i
    88  }
    89  
    90  func (t *StringTable) Lookup(i int32) string {
    91  	if i < 0 || int(i) >= len(t.Strings) {
    92  		return ""
    93  	}
    94  	return t.Strings[i]
    95  }
    96  
    97  func (t *StringTable) LookupString(s string) int32 {
    98  	if i, ok := t.Dict[s]; ok {
    99  		return i
   100  	}
   101  	return -1
   102  }
   103  
   104  // Import strings from the metadata entry and update the references.
   105  func (t *StringTable) Import(src *metastorev1.BlockMeta) {
   106  	if len(src.StringTable) < 2 {
   107  		return
   108  	}
   109  	// TODO: Pool?
   110  	lut := make([]int32, len(src.StringTable))
   111  	for i, s := range src.StringTable {
   112  		x := t.Put(s)
   113  		lut[i] = x
   114  	}
   115  	src.Tenant = lut[src.Tenant]
   116  	src.CreatedBy = lut[src.CreatedBy]
   117  	for _, ds := range src.Datasets {
   118  		ds.Tenant = lut[ds.Tenant]
   119  		ds.Name = lut[ds.Name]
   120  		var skip int
   121  		for i, v := range ds.Labels {
   122  			if i == skip {
   123  				skip += int(v)*2 + 1
   124  				continue
   125  			}
   126  			ds.Labels[i] = lut[v]
   127  		}
   128  	}
   129  }
   130  
   131  func (t *StringTable) Export(dst *metastorev1.BlockMeta) {
   132  	n := stringTablePool.Get().(*StringTable)
   133  	defer stringTablePool.Put(n)
   134  	dst.Tenant = n.Put(t.Lookup(dst.Tenant))
   135  	dst.CreatedBy = n.Put(t.Lookup(dst.CreatedBy))
   136  	for _, ds := range dst.Datasets {
   137  		ds.Tenant = n.Put(t.Lookup(ds.Tenant))
   138  		ds.Name = n.Put(t.Lookup(ds.Name))
   139  		var skip int
   140  		for i, v := range ds.Labels {
   141  			if i == skip {
   142  				skip += int(v)*2 + 1
   143  				continue
   144  			}
   145  			ds.Labels[i] = n.Put(t.Lookup(ds.Labels[i]))
   146  		}
   147  	}
   148  	dst.StringTable = make([]string, len(n.Strings))
   149  	copy(dst.StringTable, n.Strings)
   150  	n.Reset()
   151  }
   152  
   153  func (t *StringTable) Load(x iter.Iterator[string]) error {
   154  	for x.Next() {
   155  		t.Put(x.At())
   156  	}
   157  	return x.Err()
   158  }
   159  
   160  func OpenStringTable(src *metastorev1.BlockMeta) *StringTable {
   161  	t := &StringTable{
   162  		Dict:    make(map[string]int32, len(src.StringTable)),
   163  		Strings: src.StringTable,
   164  	}
   165  	for i, s := range src.StringTable {
   166  		t.Dict[s] = int32(i)
   167  	}
   168  	return t
   169  }
   170  
   171  var castagnoli = crc32.MakeTable(crc32.Castagnoli)
   172  
   173  // Encode writes the metadata to the writer in the following format:
   174  //
   175  //	raw       | protobuf-encoded metadata
   176  //	be_uint32 | size of the raw metadata
   177  //	be_uint32 | CRC32 of the raw metadata and size
   178  func Encode(w io.Writer, md *metastorev1.BlockMeta) error {
   179  	crc := crc32.New(castagnoli)
   180  	w = io.MultiWriter(w, crc)
   181  	b, _ := md.MarshalVT()
   182  	n, err := w.Write(b)
   183  	if err != nil {
   184  		return err
   185  	}
   186  	if err = binary.Write(w, binary.BigEndian, uint32(n)); err != nil {
   187  		return err
   188  	}
   189  	return binary.Write(w, binary.BigEndian, crc.Sum32())
   190  }
   191  
   192  // Decode metadata encoded with Encode.
   193  //
   194  // Note that the metadata decoded from the object has zero Size field,
   195  // as the block size is not known at the point the metadata is written.
   196  // It is expected that the caller has access to the block object and
   197  // can set the Size field after reading the metadata.
   198  func Decode(b []byte, md *metastorev1.BlockMeta) error {
   199  	if len(b) <= 8 {
   200  		return fmt.Errorf("%w: invalid size", ErrMetadataInvalid)
   201  	}
   202  	crc := binary.BigEndian.Uint32(b[len(b)-4:])
   203  	size := binary.BigEndian.Uint32(b[len(b)-8 : len(b)-4])
   204  	off := len(b) - 8 - int(size)
   205  	if off < 0 {
   206  		return fmt.Errorf("%w: invalid size", ErrMetadataInvalid)
   207  	}
   208  	if crc32.Checksum(b[off:len(b)-4], castagnoli) != crc {
   209  		return fmt.Errorf("%w: invalid CRC", ErrMetadataInvalid)
   210  	}
   211  	return md.UnmarshalVT(b[off : len(b)-8])
   212  }