github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/series/index/schema_util.go (about) 1 package index 2 3 import ( 4 "crypto/sha256" 5 "encoding/base64" 6 "encoding/binary" 7 "encoding/hex" 8 "encoding/json" 9 "fmt" 10 "strconv" 11 "strings" 12 "sync" 13 14 "github.com/pkg/errors" 15 "github.com/prometheus/common/model" 16 "github.com/prometheus/prometheus/model/labels" 17 ) 18 19 // Backwards-compatible with model.Metric.String() 20 func labelsString(ls labels.Labels) string { 21 metricName := ls.Get(labels.MetricName) 22 if metricName != "" && len(ls) == 1 { 23 return metricName 24 } 25 var b strings.Builder 26 b.Grow(1000) 27 28 b.WriteString(metricName) 29 b.WriteByte('{') 30 i := 0 31 for _, l := range ls { 32 if l.Name == labels.MetricName { 33 continue 34 } 35 if i > 0 { 36 b.WriteByte(',') 37 b.WriteByte(' ') 38 } 39 b.WriteString(l.Name) 40 b.WriteByte('=') 41 var buf [1000]byte 42 b.Write(strconv.AppendQuote(buf[:0], l.Value)) 43 i++ 44 } 45 b.WriteByte('}') 46 47 return b.String() 48 } 49 50 func labelsSeriesID(ls labels.Labels) []byte { 51 h := sha256.Sum256([]byte(labelsString(ls))) 52 return encodeBase64Bytes(h[:]) 53 } 54 55 func sha256bytes(s string) []byte { 56 h := sha256.Sum256([]byte(s)) 57 return encodeBase64Bytes(h[:]) 58 } 59 60 // Build an index key, encoded as multiple parts separated by a 0 byte, with extra space at the end. 61 func buildRangeValue(extra int, ss ...[]byte) []byte { 62 length := extra 63 for _, s := range ss { 64 length += len(s) + 1 65 } 66 output, i := make([]byte, length), 0 67 for _, s := range ss { 68 i += copy(output[i:], s) + 1 69 } 70 return output 71 } 72 73 // Encode a complete key including type marker (which goes at the end) 74 func encodeRangeKey(keyType byte, ss ...[]byte) []byte { 75 output := buildRangeValue(2, ss...) 76 output[len(output)-2] = keyType 77 return output 78 } 79 80 // Prefix values are used in querying the database, e.g. find all the records with a specific label value 81 func rangeValuePrefix(ss ...[]byte) []byte { 82 return buildRangeValue(0, ss...) 83 } 84 85 func decodeRangeKey(value []byte, components [][]byte) [][]byte { 86 components = components[:0] 87 i, j := 0, 0 88 for j < len(value) { 89 if value[j] != 0 { 90 j++ 91 continue 92 } 93 components = append(components, value[i:j]) 94 j++ 95 i = j 96 } 97 return components 98 } 99 100 func encodeBase64Bytes(bytes []byte) []byte { 101 encodedLen := base64.RawStdEncoding.EncodedLen(len(bytes)) 102 encoded := make([]byte, encodedLen) 103 base64.RawStdEncoding.Encode(encoded, bytes) 104 return encoded 105 } 106 107 func decodeBase64Value(bs []byte) (model.LabelValue, error) { 108 decodedLen := base64.RawStdEncoding.DecodedLen(len(bs)) 109 decoded := make([]byte, decodedLen) 110 if _, err := base64.RawStdEncoding.Decode(decoded, bs); err != nil { 111 return "", err 112 } 113 return model.LabelValue(decoded), nil 114 } 115 116 func encodeTime(t uint32) []byte { 117 // timestamps are hex encoded such that it doesn't contain null byte, 118 // but is still lexicographically sortable. 119 throughBytes := make([]byte, 4) 120 binary.BigEndian.PutUint32(throughBytes, t) 121 encodedThroughBytes := make([]byte, 8) 122 hex.Encode(encodedThroughBytes, throughBytes) 123 return encodedThroughBytes 124 } 125 126 // parseMetricNameRangeValue returns the metric name stored in metric name 127 // range values. Currently checks range value key and returns the value as the 128 // metric name. 129 func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValue, error) { 130 componentRef := componentsPool.Get().(*componentRef) 131 defer componentsPool.Put(componentRef) 132 components := decodeRangeKey(rangeValue, componentRef.components) 133 134 switch { 135 case len(components) < 4: 136 return "", fmt.Errorf("invalid metric name range value: %x", rangeValue) 137 138 // v1 has the metric name as the value (with the hash as the first component) 139 case len(components[3]) == 1 && components[3][0] == metricNameRangeKeyV1: 140 return model.LabelValue(value), nil 141 142 default: 143 return "", fmt.Errorf("unrecognised metricNameRangeKey version: %q", string(components[3])) 144 } 145 } 146 147 // parseSeriesRangeValue returns the model.Metric stored in metric fingerprint 148 // range values. 149 func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error) { 150 componentRef := componentsPool.Get().(*componentRef) 151 defer componentsPool.Put(componentRef) 152 components := decodeRangeKey(rangeValue, componentRef.components) 153 154 switch { 155 case len(components) < 4: 156 return nil, fmt.Errorf("invalid metric range value: %x", rangeValue) 157 158 // v1 has the encoded json metric as the value (with the fingerprint as the first component) 159 case len(components[3]) == 1 && components[3][0] == seriesRangeKeyV1: 160 var series model.Metric 161 if err := json.Unmarshal(value, &series); err != nil { 162 return nil, err 163 } 164 return series, nil 165 166 default: 167 return nil, fmt.Errorf("unrecognised seriesRangeKey version: %q", string(components[3])) 168 } 169 } 170 171 type componentRef struct { 172 components [][]byte 173 } 174 175 var componentsPool = sync.Pool{ 176 New: func() interface{} { 177 return &componentRef{components: make([][]byte, 0, 5)} 178 }, 179 } 180 181 // ParseChunkTimeRangeValue returns the chunkID (seriesID since v9) and labelValue for chunk time 182 // range values. 183 func ParseChunkTimeRangeValue(rangeValue []byte, value []byte) ( 184 chunkID string, labelValue model.LabelValue, err error, 185 ) { 186 componentRef := componentsPool.Get().(*componentRef) 187 defer componentsPool.Put(componentRef) 188 components := decodeRangeKey(rangeValue, componentRef.components) 189 190 switch { 191 case len(components) < 3: 192 err = errors.Errorf("invalid chunk time range value: %x", rangeValue) 193 return 194 195 // v1 & v2 schema had three components - label name, label value and chunk ID. 196 // No version number. 197 case len(components) == 3: 198 chunkID = yoloString(components[2]) 199 labelValue = model.LabelValue(yoloString(components[1])) 200 return 201 202 case len(components[3]) == 1: 203 switch components[3][0] { 204 // v3 schema had four components - label name, label value, chunk ID and version. 205 // "version" is 1 and label value is base64 encoded. 206 // (older code wrote "version" as 1, not '1') 207 case chunkTimeRangeKeyV1a, chunkTimeRangeKeyV1: 208 chunkID = yoloString(components[2]) 209 labelValue, err = decodeBase64Value(components[1]) 210 return 211 212 // v4 schema wrote v3 range keys and a new range key - version 2, 213 // with four components - <empty>, <empty>, chunk ID and version. 214 case chunkTimeRangeKeyV2: 215 chunkID = yoloString(components[2]) 216 return 217 218 // v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version 219 case chunkTimeRangeKeyV3: 220 chunkID = yoloString(components[2]) 221 return 222 223 // v5 schema version 4 range key is chunk end time, label value, chunk ID, version 224 case chunkTimeRangeKeyV4: 225 chunkID = yoloString(components[2]) 226 labelValue, err = decodeBase64Value(components[1]) 227 return 228 229 // v6 schema added version 5 range keys, which have the label value written in 230 // to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version]. 231 case chunkTimeRangeKeyV5: 232 chunkID = yoloString(components[2]) 233 labelValue = model.LabelValue(yoloString(value)) 234 return 235 236 // v9 schema actually return series IDs 237 case seriesRangeKeyV1: 238 chunkID = yoloString(components[0]) 239 return 240 241 case labelSeriesRangeKeyV1: 242 chunkID = yoloString(components[1]) 243 labelValue = model.LabelValue(yoloString(value)) 244 return 245 } 246 } 247 err = fmt.Errorf("unrecognised chunkTimeRangeKey version: %q", string(components[3])) 248 return 249 }