github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/schema_util.go (about)

     1  package chunk
     2  
     3  import (
     4  	"crypto/sha256"
     5  	"encoding/base64"
     6  	"encoding/binary"
     7  	"encoding/hex"
     8  	"encoding/json"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  
    13  	"fmt"
    14  
    15  	"github.com/pkg/errors"
    16  	"github.com/prometheus/common/model"
    17  	"github.com/prometheus/prometheus/pkg/labels"
    18  )
    19  
    20  // Backwards-compatible with model.Metric.String()
    21  func labelsString(ls labels.Labels) string {
    22  	metricName := ls.Get(labels.MetricName)
    23  	if metricName != "" && len(ls) == 1 {
    24  		return metricName
    25  	}
    26  	var b strings.Builder
    27  	b.Grow(1000)
    28  
    29  	b.WriteString(metricName)
    30  	b.WriteByte('{')
    31  	i := 0
    32  	for _, l := range ls {
    33  		if l.Name == labels.MetricName {
    34  			continue
    35  		}
    36  		if i > 0 {
    37  			b.WriteByte(',')
    38  			b.WriteByte(' ')
    39  		}
    40  		b.WriteString(l.Name)
    41  		b.WriteByte('=')
    42  		var buf [1000]byte
    43  		b.Write(strconv.AppendQuote(buf[:0], l.Value))
    44  		i++
    45  	}
    46  	b.WriteByte('}')
    47  
    48  	return b.String()
    49  }
    50  
    51  func labelsSeriesID(ls labels.Labels) []byte {
    52  	h := sha256.Sum256([]byte(labelsString(ls)))
    53  	return encodeBase64Bytes(h[:])
    54  }
    55  
    56  func sha256bytes(s string) []byte {
    57  	h := sha256.Sum256([]byte(s))
    58  	return encodeBase64Bytes(h[:])
    59  }
    60  
    61  // Build an index key, encoded as multiple parts separated by a 0 byte, with extra space at the end.
    62  func buildRangeValue(extra int, ss ...[]byte) []byte {
    63  	length := extra
    64  	for _, s := range ss {
    65  		length += len(s) + 1
    66  	}
    67  	output, i := make([]byte, length), 0
    68  	for _, s := range ss {
    69  		i += copy(output[i:], s) + 1
    70  	}
    71  	return output
    72  }
    73  
    74  // Encode a complete key including type marker (which goes at the end)
    75  func encodeRangeKey(keyType byte, ss ...[]byte) []byte {
    76  	output := buildRangeValue(2, ss...)
    77  	output[len(output)-2] = keyType
    78  	return output
    79  }
    80  
    81  // Prefix values are used in querying the database, e.g. find all the records with a specific label value
    82  func rangeValuePrefix(ss ...[]byte) []byte {
    83  	return buildRangeValue(0, ss...)
    84  }
    85  
    86  func decodeRangeKey(value []byte, components [][]byte) [][]byte {
    87  	components = components[:0]
    88  	i, j := 0, 0
    89  	for j < len(value) {
    90  		if value[j] != 0 {
    91  			j++
    92  			continue
    93  		}
    94  		components = append(components, value[i:j])
    95  		j++
    96  		i = j
    97  	}
    98  	return components
    99  }
   100  
   101  func encodeBase64Bytes(bytes []byte) []byte {
   102  	encodedLen := base64.RawStdEncoding.EncodedLen(len(bytes))
   103  	encoded := make([]byte, encodedLen)
   104  	base64.RawStdEncoding.Encode(encoded, bytes)
   105  	return encoded
   106  }
   107  
   108  func encodeBase64Value(value string) []byte {
   109  	encodedLen := base64.RawStdEncoding.EncodedLen(len(value))
   110  	encoded := make([]byte, encodedLen)
   111  	base64.RawStdEncoding.Encode(encoded, []byte(value))
   112  	return encoded
   113  }
   114  
   115  func decodeBase64Value(bs []byte) (model.LabelValue, error) {
   116  	decodedLen := base64.RawStdEncoding.DecodedLen(len(bs))
   117  	decoded := make([]byte, decodedLen)
   118  	if _, err := base64.RawStdEncoding.Decode(decoded, bs); err != nil {
   119  		return "", err
   120  	}
   121  	return model.LabelValue(decoded), nil
   122  }
   123  
   124  func encodeTime(t uint32) []byte {
   125  	// timestamps are hex encoded such that it doesn't contain null byte,
   126  	// but is still lexicographically sortable.
   127  	throughBytes := make([]byte, 4)
   128  	binary.BigEndian.PutUint32(throughBytes, t)
   129  	encodedThroughBytes := make([]byte, 8)
   130  	hex.Encode(encodedThroughBytes, throughBytes)
   131  	return encodedThroughBytes
   132  }
   133  
   134  // parseMetricNameRangeValue returns the metric name stored in metric name
   135  // range values. Currently checks range value key and returns the value as the
   136  // metric name.
   137  func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValue, error) {
   138  	componentRef := componentsPool.Get().(*componentRef)
   139  	defer componentsPool.Put(componentRef)
   140  	components := decodeRangeKey(rangeValue, componentRef.components)
   141  
   142  	switch {
   143  	case len(components) < 4:
   144  		return "", fmt.Errorf("invalid metric name range value: %x", rangeValue)
   145  
   146  	// v1 has the metric name as the value (with the hash as the first component)
   147  	case len(components[3]) == 1 && components[3][0] == metricNameRangeKeyV1:
   148  		return model.LabelValue(value), nil
   149  
   150  	default:
   151  		return "", fmt.Errorf("unrecognised metricNameRangeKey version: %q", string(components[3]))
   152  	}
   153  }
   154  
   155  // parseSeriesRangeValue returns the model.Metric stored in metric fingerprint
   156  // range values.
   157  func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error) {
   158  	componentRef := componentsPool.Get().(*componentRef)
   159  	defer componentsPool.Put(componentRef)
   160  	components := decodeRangeKey(rangeValue, componentRef.components)
   161  
   162  	switch {
   163  	case len(components) < 4:
   164  		return nil, fmt.Errorf("invalid metric range value: %x", rangeValue)
   165  
   166  	// v1 has the encoded json metric as the value (with the fingerprint as the first component)
   167  	case len(components[3]) == 1 && components[3][0] == seriesRangeKeyV1:
   168  		var series model.Metric
   169  		if err := json.Unmarshal(value, &series); err != nil {
   170  			return nil, err
   171  		}
   172  		return series, nil
   173  
   174  	default:
   175  		return nil, fmt.Errorf("unrecognised seriesRangeKey version: %q", string(components[3]))
   176  	}
   177  }
   178  
   179  type componentRef struct {
   180  	components [][]byte
   181  }
   182  
   183  var componentsPool = sync.Pool{
   184  	New: func() interface{} {
   185  		return &componentRef{components: make([][]byte, 0, 5)}
   186  	},
   187  }
   188  
   189  // parseChunkTimeRangeValue returns the chunkID and labelValue for chunk time
   190  // range values.
   191  func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
   192  	chunkID string, labelValue model.LabelValue, err error,
   193  ) {
   194  	componentRef := componentsPool.Get().(*componentRef)
   195  	defer componentsPool.Put(componentRef)
   196  	components := decodeRangeKey(rangeValue, componentRef.components)
   197  
   198  	switch {
   199  	case len(components) < 3:
   200  		err = errors.Errorf("invalid chunk time range value: %x", rangeValue)
   201  		return
   202  
   203  	// v1 & v2 schema had three components - label name, label value and chunk ID.
   204  	// No version number.
   205  	case len(components) == 3:
   206  		chunkID = string(components[2])
   207  		labelValue = model.LabelValue(components[1])
   208  		return
   209  
   210  	case len(components[3]) == 1:
   211  		switch components[3][0] {
   212  		// v3 schema had four components - label name, label value, chunk ID and version.
   213  		// "version" is 1 and label value is base64 encoded.
   214  		// (older code wrote "version" as 1, not '1')
   215  		case chunkTimeRangeKeyV1a, chunkTimeRangeKeyV1:
   216  			chunkID = string(components[2])
   217  			labelValue, err = decodeBase64Value(components[1])
   218  			return
   219  
   220  		// v4 schema wrote v3 range keys and a new range key - version 2,
   221  		// with four components - <empty>, <empty>, chunk ID and version.
   222  		case chunkTimeRangeKeyV2:
   223  			chunkID = string(components[2])
   224  			return
   225  
   226  		// v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version
   227  		case chunkTimeRangeKeyV3:
   228  			chunkID = string(components[2])
   229  			return
   230  
   231  		// v5 schema version 4 range key is chunk end time, label value, chunk ID, version
   232  		case chunkTimeRangeKeyV4:
   233  			chunkID = string(components[2])
   234  			labelValue, err = decodeBase64Value(components[1])
   235  			return
   236  
   237  		// v6 schema added version 5 range keys, which have the label value written in
   238  		// to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version].
   239  		case chunkTimeRangeKeyV5:
   240  			chunkID = string(components[2])
   241  			labelValue = model.LabelValue(value)
   242  			return
   243  
   244  		// v9 schema actually return series IDs
   245  		case seriesRangeKeyV1:
   246  			chunkID = string(components[0])
   247  			return
   248  
   249  		case labelSeriesRangeKeyV1:
   250  			chunkID = string(components[1])
   251  			labelValue = model.LabelValue(value)
   252  			return
   253  		}
   254  	}
   255  	err = fmt.Errorf("unrecognised chunkTimeRangeKey version: %q", string(components[3]))
   256  	return
   257  }