github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/mapper.go (about)

     1  package ingester
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  	"strings"
     7  	"sync"
     8  
     9  	"github.com/go-kit/log/level"
    10  	"github.com/prometheus/common/model"
    11  	"github.com/prometheus/prometheus/model/labels"
    12  	"go.uber.org/atomic"
    13  
    14  	util_log "github.com/grafana/loki/pkg/util/log"
    15  )
    16  
    17  const maxMappedFP = 1 << 20 // About 1M fingerprints reserved for mapping.
    18  
    19  var separatorString = string([]byte{model.SeparatorByte})
    20  
    21  // fpMapper is used to map fingerprints in order to work around fingerprint
    22  // collisions.
    23  type fpMapper struct {
    24  	// highestMappedFP has to be aligned for atomic operations.
    25  	highestMappedFP atomic.Uint64
    26  
    27  	mtx sync.RWMutex // Protects mappings.
    28  	// maps original fingerprints to a map of string representations of
    29  	// metrics to the truly unique fingerprint.
    30  	mappings map[model.Fingerprint]map[string]model.Fingerprint
    31  
    32  	// Returns existing labels for given fingerprint, if any.
    33  	// Equality check relies on labels.Labels being sorted.
    34  	fpToLabels func(fingerprint model.Fingerprint) labels.Labels
    35  }
    36  
    37  // newFPMapper returns an fpMapper ready to use.
    38  func newFPMapper(fpToLabels func(fingerprint model.Fingerprint) labels.Labels) *fpMapper {
    39  	if fpToLabels == nil {
    40  		panic("nil fpToLabels")
    41  	}
    42  
    43  	return &fpMapper{
    44  		fpToLabels: fpToLabels,
    45  		mappings:   map[model.Fingerprint]map[string]model.Fingerprint{},
    46  	}
    47  }
    48  
    49  // mapFP takes a raw fingerprint (as returned by Metrics.FastFingerprint) and
    50  // returns a truly unique fingerprint. The caller must have locked the raw
    51  // fingerprint.
    52  func (m *fpMapper) mapFP(fp model.Fingerprint, metric labels.Labels) model.Fingerprint {
    53  	// First check if we are in the reserved FP space, in which case this is
    54  	// automatically a collision that has to be mapped.
    55  	if fp <= maxMappedFP {
    56  		return m.maybeAddMapping(fp, metric)
    57  	}
    58  
    59  	// Then check the most likely case: This fp belongs to a series that is
    60  	// already in memory.
    61  	s := m.fpToLabels(fp)
    62  	if s != nil {
    63  		// FP exists in memory, but is it for the same metric?
    64  		if labels.Equal(metric, s) {
    65  			// Yupp. We are done.
    66  			return fp
    67  		}
    68  		// Collision detected!
    69  		return m.maybeAddMapping(fp, metric)
    70  	}
    71  	// Metric is not in memory. Before doing the expensive archive lookup,
    72  	// check if we have a mapping for this metric in place already.
    73  	m.mtx.RLock()
    74  	mappedFPs, fpAlreadyMapped := m.mappings[fp]
    75  	m.mtx.RUnlock()
    76  	if fpAlreadyMapped {
    77  		// We indeed have mapped fp historically.
    78  		ms := metricToUniqueString(metric)
    79  		// fp is locked by the caller, so no further locking of
    80  		// 'collisions' required (it is specific to fp).
    81  		mappedFP, ok := mappedFPs[ms]
    82  		if ok {
    83  			// Historical mapping found, return the mapped FP.
    84  			return mappedFP
    85  		}
    86  	}
    87  	return fp
    88  }
    89  
    90  // maybeAddMapping is only used internally. It takes a detected collision and
    91  // adds it to the collisions map if not yet there. In any case, it returns the
    92  // truly unique fingerprint for the colliding metric.
    93  func (m *fpMapper) maybeAddMapping(fp model.Fingerprint, collidingMetric labels.Labels) model.Fingerprint {
    94  	ms := metricToUniqueString(collidingMetric)
    95  	m.mtx.RLock()
    96  	mappedFPs, ok := m.mappings[fp]
    97  	m.mtx.RUnlock()
    98  	if ok {
    99  		// fp is locked by the caller, so no further locking required.
   100  		mappedFP, ok := mappedFPs[ms]
   101  		if ok {
   102  			return mappedFP // Existing mapping.
   103  		}
   104  		// A new mapping has to be created.
   105  		mappedFP = m.nextMappedFP()
   106  		mappedFPs[ms] = mappedFP
   107  		level.Info(util_log.Logger).Log(
   108  			"msg", "fingerprint collision detected, mapping to new fingerprint",
   109  			"old_fp", fp,
   110  			"new_fp", mappedFP,
   111  			"metric", ms,
   112  		)
   113  		return mappedFP
   114  	}
   115  	// This is the first collision for fp.
   116  	mappedFP := m.nextMappedFP()
   117  	mappedFPs = map[string]model.Fingerprint{ms: mappedFP}
   118  	m.mtx.Lock()
   119  	m.mappings[fp] = mappedFPs
   120  	m.mtx.Unlock()
   121  	level.Info(util_log.Logger).Log(
   122  		"msg", "fingerprint collision detected, mapping to new fingerprint",
   123  		"old_fp", fp,
   124  		"new_fp", mappedFP,
   125  		"metric", collidingMetric,
   126  	)
   127  	return mappedFP
   128  }
   129  
   130  func (m *fpMapper) nextMappedFP() model.Fingerprint {
   131  	mappedFP := model.Fingerprint(m.highestMappedFP.Inc())
   132  	if mappedFP > maxMappedFP {
   133  		panic(fmt.Errorf("more than %v fingerprints mapped in collision detection", maxMappedFP))
   134  	}
   135  	return mappedFP
   136  }
   137  
   138  // metricToUniqueString turns a metric into a string in a reproducible and
   139  // unique way, i.e. the same metric will always create the same string, and
   140  // different metrics will always create different strings. In a way, it is the
   141  // "ideal" fingerprint function, only that it is more expensive than the
   142  // FastFingerprint function, and its result is not suitable as a key for maps
   143  // and indexes as it might become really large, causing a lot of hashing effort
   144  // in maps and a lot of storage overhead in indexes.
   145  func metricToUniqueString(m labels.Labels) string {
   146  	parts := make([]string, 0, len(m))
   147  	for _, pair := range m {
   148  		parts = append(parts, pair.Name+separatorString+pair.Value)
   149  	}
   150  	sort.Strings(parts)
   151  	return strings.Join(parts, separatorString)
   152  }