github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/series/index/schema_util.go (about)

     1  package index
     2  
     3  import (
     4  	"crypto/sha256"
     5  	"encoding/base64"
     6  	"encoding/binary"
     7  	"encoding/hex"
     8  	"encoding/json"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"sync"
    13  
    14  	"github.com/pkg/errors"
    15  	"github.com/prometheus/common/model"
    16  	"github.com/prometheus/prometheus/model/labels"
    17  )
    18  
    19  // Backwards-compatible with model.Metric.String()
    20  func labelsString(ls labels.Labels) string {
    21  	metricName := ls.Get(labels.MetricName)
    22  	if metricName != "" && len(ls) == 1 {
    23  		return metricName
    24  	}
    25  	var b strings.Builder
    26  	b.Grow(1000)
    27  
    28  	b.WriteString(metricName)
    29  	b.WriteByte('{')
    30  	i := 0
    31  	for _, l := range ls {
    32  		if l.Name == labels.MetricName {
    33  			continue
    34  		}
    35  		if i > 0 {
    36  			b.WriteByte(',')
    37  			b.WriteByte(' ')
    38  		}
    39  		b.WriteString(l.Name)
    40  		b.WriteByte('=')
    41  		var buf [1000]byte
    42  		b.Write(strconv.AppendQuote(buf[:0], l.Value))
    43  		i++
    44  	}
    45  	b.WriteByte('}')
    46  
    47  	return b.String()
    48  }
    49  
    50  func labelsSeriesID(ls labels.Labels) []byte {
    51  	h := sha256.Sum256([]byte(labelsString(ls)))
    52  	return encodeBase64Bytes(h[:])
    53  }
    54  
    55  func sha256bytes(s string) []byte {
    56  	h := sha256.Sum256([]byte(s))
    57  	return encodeBase64Bytes(h[:])
    58  }
    59  
    60  // Build an index key, encoded as multiple parts separated by a 0 byte, with extra space at the end.
    61  func buildRangeValue(extra int, ss ...[]byte) []byte {
    62  	length := extra
    63  	for _, s := range ss {
    64  		length += len(s) + 1
    65  	}
    66  	output, i := make([]byte, length), 0
    67  	for _, s := range ss {
    68  		i += copy(output[i:], s) + 1
    69  	}
    70  	return output
    71  }
    72  
    73  // Encode a complete key including type marker (which goes at the end)
    74  func encodeRangeKey(keyType byte, ss ...[]byte) []byte {
    75  	output := buildRangeValue(2, ss...)
    76  	output[len(output)-2] = keyType
    77  	return output
    78  }
    79  
    80  // Prefix values are used in querying the database, e.g. find all the records with a specific label value
    81  func rangeValuePrefix(ss ...[]byte) []byte {
    82  	return buildRangeValue(0, ss...)
    83  }
    84  
    85  func decodeRangeKey(value []byte, components [][]byte) [][]byte {
    86  	components = components[:0]
    87  	i, j := 0, 0
    88  	for j < len(value) {
    89  		if value[j] != 0 {
    90  			j++
    91  			continue
    92  		}
    93  		components = append(components, value[i:j])
    94  		j++
    95  		i = j
    96  	}
    97  	return components
    98  }
    99  
   100  func encodeBase64Bytes(bytes []byte) []byte {
   101  	encodedLen := base64.RawStdEncoding.EncodedLen(len(bytes))
   102  	encoded := make([]byte, encodedLen)
   103  	base64.RawStdEncoding.Encode(encoded, bytes)
   104  	return encoded
   105  }
   106  
   107  func decodeBase64Value(bs []byte) (model.LabelValue, error) {
   108  	decodedLen := base64.RawStdEncoding.DecodedLen(len(bs))
   109  	decoded := make([]byte, decodedLen)
   110  	if _, err := base64.RawStdEncoding.Decode(decoded, bs); err != nil {
   111  		return "", err
   112  	}
   113  	return model.LabelValue(decoded), nil
   114  }
   115  
   116  func encodeTime(t uint32) []byte {
   117  	// timestamps are hex encoded such that it doesn't contain null byte,
   118  	// but is still lexicographically sortable.
   119  	throughBytes := make([]byte, 4)
   120  	binary.BigEndian.PutUint32(throughBytes, t)
   121  	encodedThroughBytes := make([]byte, 8)
   122  	hex.Encode(encodedThroughBytes, throughBytes)
   123  	return encodedThroughBytes
   124  }
   125  
   126  // parseMetricNameRangeValue returns the metric name stored in metric name
   127  // range values. Currently checks range value key and returns the value as the
   128  // metric name.
   129  func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValue, error) {
   130  	componentRef := componentsPool.Get().(*componentRef)
   131  	defer componentsPool.Put(componentRef)
   132  	components := decodeRangeKey(rangeValue, componentRef.components)
   133  
   134  	switch {
   135  	case len(components) < 4:
   136  		return "", fmt.Errorf("invalid metric name range value: %x", rangeValue)
   137  
   138  	// v1 has the metric name as the value (with the hash as the first component)
   139  	case len(components[3]) == 1 && components[3][0] == metricNameRangeKeyV1:
   140  		return model.LabelValue(value), nil
   141  
   142  	default:
   143  		return "", fmt.Errorf("unrecognised metricNameRangeKey version: %q", string(components[3]))
   144  	}
   145  }
   146  
   147  // parseSeriesRangeValue returns the model.Metric stored in metric fingerprint
   148  // range values.
   149  func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error) {
   150  	componentRef := componentsPool.Get().(*componentRef)
   151  	defer componentsPool.Put(componentRef)
   152  	components := decodeRangeKey(rangeValue, componentRef.components)
   153  
   154  	switch {
   155  	case len(components) < 4:
   156  		return nil, fmt.Errorf("invalid metric range value: %x", rangeValue)
   157  
   158  	// v1 has the encoded json metric as the value (with the fingerprint as the first component)
   159  	case len(components[3]) == 1 && components[3][0] == seriesRangeKeyV1:
   160  		var series model.Metric
   161  		if err := json.Unmarshal(value, &series); err != nil {
   162  			return nil, err
   163  		}
   164  		return series, nil
   165  
   166  	default:
   167  		return nil, fmt.Errorf("unrecognised seriesRangeKey version: %q", string(components[3]))
   168  	}
   169  }
   170  
   171  type componentRef struct {
   172  	components [][]byte
   173  }
   174  
   175  var componentsPool = sync.Pool{
   176  	New: func() interface{} {
   177  		return &componentRef{components: make([][]byte, 0, 5)}
   178  	},
   179  }
   180  
   181  // ParseChunkTimeRangeValue returns the chunkID (seriesID since v9) and labelValue for chunk time
   182  // range values.
   183  func ParseChunkTimeRangeValue(rangeValue []byte, value []byte) (
   184  	chunkID string, labelValue model.LabelValue, err error,
   185  ) {
   186  	componentRef := componentsPool.Get().(*componentRef)
   187  	defer componentsPool.Put(componentRef)
   188  	components := decodeRangeKey(rangeValue, componentRef.components)
   189  
   190  	switch {
   191  	case len(components) < 3:
   192  		err = errors.Errorf("invalid chunk time range value: %x", rangeValue)
   193  		return
   194  
   195  	// v1 & v2 schema had three components - label name, label value and chunk ID.
   196  	// No version number.
   197  	case len(components) == 3:
   198  		chunkID = yoloString(components[2])
   199  		labelValue = model.LabelValue(yoloString(components[1]))
   200  		return
   201  
   202  	case len(components[3]) == 1:
   203  		switch components[3][0] {
   204  		// v3 schema had four components - label name, label value, chunk ID and version.
   205  		// "version" is 1 and label value is base64 encoded.
   206  		// (older code wrote "version" as 1, not '1')
   207  		case chunkTimeRangeKeyV1a, chunkTimeRangeKeyV1:
   208  			chunkID = yoloString(components[2])
   209  			labelValue, err = decodeBase64Value(components[1])
   210  			return
   211  
   212  		// v4 schema wrote v3 range keys and a new range key - version 2,
   213  		// with four components - <empty>, <empty>, chunk ID and version.
   214  		case chunkTimeRangeKeyV2:
   215  			chunkID = yoloString(components[2])
   216  			return
   217  
   218  		// v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version
   219  		case chunkTimeRangeKeyV3:
   220  			chunkID = yoloString(components[2])
   221  			return
   222  
   223  		// v5 schema version 4 range key is chunk end time, label value, chunk ID, version
   224  		case chunkTimeRangeKeyV4:
   225  			chunkID = yoloString(components[2])
   226  			labelValue, err = decodeBase64Value(components[1])
   227  			return
   228  
   229  		// v6 schema added version 5 range keys, which have the label value written in
   230  		// to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version].
   231  		case chunkTimeRangeKeyV5:
   232  			chunkID = yoloString(components[2])
   233  			labelValue = model.LabelValue(yoloString(value))
   234  			return
   235  
   236  		// v9 schema actually return series IDs
   237  		case seriesRangeKeyV1:
   238  			chunkID = yoloString(components[0])
   239  			return
   240  
   241  		case labelSeriesRangeKeyV1:
   242  			chunkID = yoloString(components[1])
   243  			labelValue = model.LabelValue(yoloString(value))
   244  			return
   245  		}
   246  	}
   247  	err = fmt.Errorf("unrecognised chunkTimeRangeKey version: %q", string(components[3]))
   248  	return
   249  }