github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/schema_util.go (about) 1 package chunk 2 3 import ( 4 "crypto/sha256" 5 "encoding/base64" 6 "encoding/binary" 7 "encoding/hex" 8 "encoding/json" 9 "strconv" 10 "strings" 11 "sync" 12 13 "fmt" 14 15 "github.com/pkg/errors" 16 "github.com/prometheus/common/model" 17 "github.com/prometheus/prometheus/pkg/labels" 18 ) 19 20 // Backwards-compatible with model.Metric.String() 21 func labelsString(ls labels.Labels) string { 22 metricName := ls.Get(labels.MetricName) 23 if metricName != "" && len(ls) == 1 { 24 return metricName 25 } 26 var b strings.Builder 27 b.Grow(1000) 28 29 b.WriteString(metricName) 30 b.WriteByte('{') 31 i := 0 32 for _, l := range ls { 33 if l.Name == labels.MetricName { 34 continue 35 } 36 if i > 0 { 37 b.WriteByte(',') 38 b.WriteByte(' ') 39 } 40 b.WriteString(l.Name) 41 b.WriteByte('=') 42 var buf [1000]byte 43 b.Write(strconv.AppendQuote(buf[:0], l.Value)) 44 i++ 45 } 46 b.WriteByte('}') 47 48 return b.String() 49 } 50 51 func labelsSeriesID(ls labels.Labels) []byte { 52 h := sha256.Sum256([]byte(labelsString(ls))) 53 return encodeBase64Bytes(h[:]) 54 } 55 56 func sha256bytes(s string) []byte { 57 h := sha256.Sum256([]byte(s)) 58 return encodeBase64Bytes(h[:]) 59 } 60 61 // Build an index key, encoded as multiple parts separated by a 0 byte, with extra space at the end. 62 func buildRangeValue(extra int, ss ...[]byte) []byte { 63 length := extra 64 for _, s := range ss { 65 length += len(s) + 1 66 } 67 output, i := make([]byte, length), 0 68 for _, s := range ss { 69 i += copy(output[i:], s) + 1 70 } 71 return output 72 } 73 74 // Encode a complete key including type marker (which goes at the end) 75 func encodeRangeKey(keyType byte, ss ...[]byte) []byte { 76 output := buildRangeValue(2, ss...) 77 output[len(output)-2] = keyType 78 return output 79 } 80 81 // Prefix values are used in querying the database, e.g. find all the records with a specific label value 82 func rangeValuePrefix(ss ...[]byte) []byte { 83 return buildRangeValue(0, ss...) 84 } 85 86 func decodeRangeKey(value []byte, components [][]byte) [][]byte { 87 components = components[:0] 88 i, j := 0, 0 89 for j < len(value) { 90 if value[j] != 0 { 91 j++ 92 continue 93 } 94 components = append(components, value[i:j]) 95 j++ 96 i = j 97 } 98 return components 99 } 100 101 func encodeBase64Bytes(bytes []byte) []byte { 102 encodedLen := base64.RawStdEncoding.EncodedLen(len(bytes)) 103 encoded := make([]byte, encodedLen) 104 base64.RawStdEncoding.Encode(encoded, bytes) 105 return encoded 106 } 107 108 func encodeBase64Value(value string) []byte { 109 encodedLen := base64.RawStdEncoding.EncodedLen(len(value)) 110 encoded := make([]byte, encodedLen) 111 base64.RawStdEncoding.Encode(encoded, []byte(value)) 112 return encoded 113 } 114 115 func decodeBase64Value(bs []byte) (model.LabelValue, error) { 116 decodedLen := base64.RawStdEncoding.DecodedLen(len(bs)) 117 decoded := make([]byte, decodedLen) 118 if _, err := base64.RawStdEncoding.Decode(decoded, bs); err != nil { 119 return "", err 120 } 121 return model.LabelValue(decoded), nil 122 } 123 124 func encodeTime(t uint32) []byte { 125 // timestamps are hex encoded such that it doesn't contain null byte, 126 // but is still lexicographically sortable. 127 throughBytes := make([]byte, 4) 128 binary.BigEndian.PutUint32(throughBytes, t) 129 encodedThroughBytes := make([]byte, 8) 130 hex.Encode(encodedThroughBytes, throughBytes) 131 return encodedThroughBytes 132 } 133 134 // parseMetricNameRangeValue returns the metric name stored in metric name 135 // range values. Currently checks range value key and returns the value as the 136 // metric name. 137 func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValue, error) { 138 componentRef := componentsPool.Get().(*componentRef) 139 defer componentsPool.Put(componentRef) 140 components := decodeRangeKey(rangeValue, componentRef.components) 141 142 switch { 143 case len(components) < 4: 144 return "", fmt.Errorf("invalid metric name range value: %x", rangeValue) 145 146 // v1 has the metric name as the value (with the hash as the first component) 147 case len(components[3]) == 1 && components[3][0] == metricNameRangeKeyV1: 148 return model.LabelValue(value), nil 149 150 default: 151 return "", fmt.Errorf("unrecognised metricNameRangeKey version: %q", string(components[3])) 152 } 153 } 154 155 // parseSeriesRangeValue returns the model.Metric stored in metric fingerprint 156 // range values. 157 func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error) { 158 componentRef := componentsPool.Get().(*componentRef) 159 defer componentsPool.Put(componentRef) 160 components := decodeRangeKey(rangeValue, componentRef.components) 161 162 switch { 163 case len(components) < 4: 164 return nil, fmt.Errorf("invalid metric range value: %x", rangeValue) 165 166 // v1 has the encoded json metric as the value (with the fingerprint as the first component) 167 case len(components[3]) == 1 && components[3][0] == seriesRangeKeyV1: 168 var series model.Metric 169 if err := json.Unmarshal(value, &series); err != nil { 170 return nil, err 171 } 172 return series, nil 173 174 default: 175 return nil, fmt.Errorf("unrecognised seriesRangeKey version: %q", string(components[3])) 176 } 177 } 178 179 type componentRef struct { 180 components [][]byte 181 } 182 183 var componentsPool = sync.Pool{ 184 New: func() interface{} { 185 return &componentRef{components: make([][]byte, 0, 5)} 186 }, 187 } 188 189 // parseChunkTimeRangeValue returns the chunkID and labelValue for chunk time 190 // range values. 191 func parseChunkTimeRangeValue(rangeValue []byte, value []byte) ( 192 chunkID string, labelValue model.LabelValue, err error, 193 ) { 194 componentRef := componentsPool.Get().(*componentRef) 195 defer componentsPool.Put(componentRef) 196 components := decodeRangeKey(rangeValue, componentRef.components) 197 198 switch { 199 case len(components) < 3: 200 err = errors.Errorf("invalid chunk time range value: %x", rangeValue) 201 return 202 203 // v1 & v2 schema had three components - label name, label value and chunk ID. 204 // No version number. 205 case len(components) == 3: 206 chunkID = string(components[2]) 207 labelValue = model.LabelValue(components[1]) 208 return 209 210 case len(components[3]) == 1: 211 switch components[3][0] { 212 // v3 schema had four components - label name, label value, chunk ID and version. 213 // "version" is 1 and label value is base64 encoded. 214 // (older code wrote "version" as 1, not '1') 215 case chunkTimeRangeKeyV1a, chunkTimeRangeKeyV1: 216 chunkID = string(components[2]) 217 labelValue, err = decodeBase64Value(components[1]) 218 return 219 220 // v4 schema wrote v3 range keys and a new range key - version 2, 221 // with four components - <empty>, <empty>, chunk ID and version. 222 case chunkTimeRangeKeyV2: 223 chunkID = string(components[2]) 224 return 225 226 // v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version 227 case chunkTimeRangeKeyV3: 228 chunkID = string(components[2]) 229 return 230 231 // v5 schema version 4 range key is chunk end time, label value, chunk ID, version 232 case chunkTimeRangeKeyV4: 233 chunkID = string(components[2]) 234 labelValue, err = decodeBase64Value(components[1]) 235 return 236 237 // v6 schema added version 5 range keys, which have the label value written in 238 // to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version]. 239 case chunkTimeRangeKeyV5: 240 chunkID = string(components[2]) 241 labelValue = model.LabelValue(value) 242 return 243 244 // v9 schema actually return series IDs 245 case seriesRangeKeyV1: 246 chunkID = string(components[0]) 247 return 248 249 case labelSeriesRangeKeyV1: 250 chunkID = string(components[1]) 251 labelValue = model.LabelValue(value) 252 return 253 } 254 } 255 err = fmt.Errorf("unrecognised chunkTimeRangeKey version: %q", string(components[3])) 256 return 257 }