github.com/grafana/pyroscope@v1.18.0/pkg/block/metadata/metadata.go (about) 1 package metadata 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "fmt" 7 "hash/crc32" 8 "io" 9 "sync" 10 "time" 11 12 "github.com/oklog/ulid/v2" 13 14 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 15 "github.com/grafana/pyroscope/pkg/iter" 16 ) 17 18 var ErrMetadataInvalid = errors.New("metadata: invalid metadata") 19 20 func Tenant(md *metastorev1.BlockMeta) string { 21 if md.Tenant <= 0 || int(md.Tenant) >= len(md.StringTable) { 22 return "" 23 } 24 return md.StringTable[md.Tenant] 25 } 26 27 func Timestamp(md *metastorev1.BlockMeta) time.Time { 28 return time.UnixMilli(int64(ulid.MustParse(md.Id).Time())) 29 } 30 31 func Sanitize(md *metastorev1.BlockMeta) error { 32 // TODO(kolesnikovae): Implement. 33 _, err := ulid.Parse(md.Id) 34 return err 35 } 36 37 var stringTablePool = sync.Pool{ 38 New: func() any { return NewStringTable() }, 39 } 40 41 type StringTable struct { 42 Dict map[string]int32 43 Strings []string 44 } 45 46 func NewStringTable() *StringTable { 47 var empty string 48 return &StringTable{ 49 Dict: map[string]int32{empty: 0}, 50 Strings: []string{empty}, 51 } 52 } 53 54 func (t *StringTable) IsEmpty() bool { 55 if len(t.Strings) == 0 { 56 return true 57 } 58 return len(t.Strings) == 1 && t.Strings[0] == "" 59 } 60 61 func (t *StringTable) Reset() { 62 clear(t.Dict) 63 t.Dict[""] = 0 64 t.Strings[0] = "" 65 t.Strings = t.Strings[:1] 66 } 67 68 func (t *StringTable) Clone() *StringTable { 69 n := &StringTable{ 70 Dict: make(map[string]int32, len(t.Dict)), 71 Strings: make([]string, len(t.Strings)), 72 } 73 for k, v := range t.Dict { 74 n.Dict[k] = v 75 } 76 copy(n.Strings, t.Strings) 77 return n 78 } 79 80 func (t *StringTable) Put(s string) int32 { 81 if i, ok := t.Dict[s]; ok { 82 return i 83 } 84 i := int32(len(t.Strings)) 85 t.Strings = append(t.Strings, s) 86 t.Dict[s] = i 87 return i 88 } 89 90 func (t *StringTable) Lookup(i int32) string { 91 if i < 0 || int(i) >= len(t.Strings) { 92 return "" 93 } 94 return t.Strings[i] 95 } 96 97 func (t *StringTable) LookupString(s string) int32 { 98 if i, ok := t.Dict[s]; ok { 99 return i 100 } 101 return -1 102 } 103 104 // Import strings from the metadata entry and update the references. 105 func (t *StringTable) Import(src *metastorev1.BlockMeta) { 106 if len(src.StringTable) < 2 { 107 return 108 } 109 // TODO: Pool? 110 lut := make([]int32, len(src.StringTable)) 111 for i, s := range src.StringTable { 112 x := t.Put(s) 113 lut[i] = x 114 } 115 src.Tenant = lut[src.Tenant] 116 src.CreatedBy = lut[src.CreatedBy] 117 for _, ds := range src.Datasets { 118 ds.Tenant = lut[ds.Tenant] 119 ds.Name = lut[ds.Name] 120 var skip int 121 for i, v := range ds.Labels { 122 if i == skip { 123 skip += int(v)*2 + 1 124 continue 125 } 126 ds.Labels[i] = lut[v] 127 } 128 } 129 } 130 131 func (t *StringTable) Export(dst *metastorev1.BlockMeta) { 132 n := stringTablePool.Get().(*StringTable) 133 defer stringTablePool.Put(n) 134 dst.Tenant = n.Put(t.Lookup(dst.Tenant)) 135 dst.CreatedBy = n.Put(t.Lookup(dst.CreatedBy)) 136 for _, ds := range dst.Datasets { 137 ds.Tenant = n.Put(t.Lookup(ds.Tenant)) 138 ds.Name = n.Put(t.Lookup(ds.Name)) 139 var skip int 140 for i, v := range ds.Labels { 141 if i == skip { 142 skip += int(v)*2 + 1 143 continue 144 } 145 ds.Labels[i] = n.Put(t.Lookup(ds.Labels[i])) 146 } 147 } 148 dst.StringTable = make([]string, len(n.Strings)) 149 copy(dst.StringTable, n.Strings) 150 n.Reset() 151 } 152 153 func (t *StringTable) Load(x iter.Iterator[string]) error { 154 for x.Next() { 155 t.Put(x.At()) 156 } 157 return x.Err() 158 } 159 160 func OpenStringTable(src *metastorev1.BlockMeta) *StringTable { 161 t := &StringTable{ 162 Dict: make(map[string]int32, len(src.StringTable)), 163 Strings: src.StringTable, 164 } 165 for i, s := range src.StringTable { 166 t.Dict[s] = int32(i) 167 } 168 return t 169 } 170 171 var castagnoli = crc32.MakeTable(crc32.Castagnoli) 172 173 // Encode writes the metadata to the writer in the following format: 174 // 175 // raw | protobuf-encoded metadata 176 // be_uint32 | size of the raw metadata 177 // be_uint32 | CRC32 of the raw metadata and size 178 func Encode(w io.Writer, md *metastorev1.BlockMeta) error { 179 crc := crc32.New(castagnoli) 180 w = io.MultiWriter(w, crc) 181 b, _ := md.MarshalVT() 182 n, err := w.Write(b) 183 if err != nil { 184 return err 185 } 186 if err = binary.Write(w, binary.BigEndian, uint32(n)); err != nil { 187 return err 188 } 189 return binary.Write(w, binary.BigEndian, crc.Sum32()) 190 } 191 192 // Decode metadata encoded with Encode. 193 // 194 // Note that the metadata decoded from the object has zero Size field, 195 // as the block size is not known at the point the metadata is written. 196 // It is expected that the caller has access to the block object and 197 // can set the Size field after reading the metadata. 198 func Decode(b []byte, md *metastorev1.BlockMeta) error { 199 if len(b) <= 8 { 200 return fmt.Errorf("%w: invalid size", ErrMetadataInvalid) 201 } 202 crc := binary.BigEndian.Uint32(b[len(b)-4:]) 203 size := binary.BigEndian.Uint32(b[len(b)-8 : len(b)-4]) 204 off := len(b) - 8 - int(size) 205 if off < 0 { 206 return fmt.Errorf("%w: invalid size", ErrMetadataInvalid) 207 } 208 if crc32.Checksum(b[off:len(b)-4], castagnoli) != crc { 209 return fmt.Errorf("%w: invalid CRC", ErrMetadataInvalid) 210 } 211 return md.UnmarshalVT(b[off : len(b)-8]) 212 }