github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ingester/index/index.go (about)

     1  package index
     2  
     3  import (
     4  	"sort"
     5  	"sync"
     6  	"unsafe"
     7  
     8  	"github.com/prometheus/common/model"
     9  	"github.com/prometheus/prometheus/pkg/labels"
    10  
    11  	"github.com/cortexproject/cortex/pkg/chunk"
    12  	"github.com/cortexproject/cortex/pkg/cortexpb"
    13  	"github.com/cortexproject/cortex/pkg/util"
    14  )
    15  
    16  const indexShards = 32
    17  
    18  // InvertedIndex implements a in-memory inverter index from label pairs to fingerprints.
    19  // It is sharded to reduce lock contention on writes.
    20  type InvertedIndex struct {
    21  	shards []indexShard
    22  }
    23  
    24  // New returns a new InvertedIndex.
    25  func New() *InvertedIndex {
    26  	shards := make([]indexShard, indexShards)
    27  	for i := 0; i < indexShards; i++ {
    28  		shards[i].idx = map[string]indexEntry{}
    29  	}
    30  	return &InvertedIndex{
    31  		shards: shards,
    32  	}
    33  }
    34  
    35  // Add a fingerprint under the specified labels.
    36  // NOTE: memory for `labels` is unsafe; anything retained beyond the
    37  // life of this function must be copied
    38  func (ii *InvertedIndex) Add(labels []cortexpb.LabelAdapter, fp model.Fingerprint) labels.Labels {
    39  	shard := &ii.shards[util.HashFP(fp)%indexShards]
    40  	return shard.add(labels, fp) // add() returns 'interned' values so the original labels are not retained
    41  }
    42  
    43  // Lookup all fingerprints for the provided matchers.
    44  func (ii *InvertedIndex) Lookup(matchers []*labels.Matcher) []model.Fingerprint {
    45  	if len(matchers) == 0 {
    46  		return nil
    47  	}
    48  
    49  	result := []model.Fingerprint{}
    50  	for i := range ii.shards {
    51  		fps := ii.shards[i].lookup(matchers)
    52  		result = append(result, fps...)
    53  	}
    54  
    55  	return result
    56  }
    57  
    58  // LabelNames returns all label names.
    59  func (ii *InvertedIndex) LabelNames() []string {
    60  	results := make([][]string, 0, indexShards)
    61  
    62  	for i := range ii.shards {
    63  		shardResult := ii.shards[i].labelNames()
    64  		results = append(results, shardResult)
    65  	}
    66  
    67  	return mergeStringSlices(results)
    68  }
    69  
    70  // LabelValues returns the values for the given label.
    71  func (ii *InvertedIndex) LabelValues(name string) []string {
    72  	results := make([][]string, 0, indexShards)
    73  
    74  	for i := range ii.shards {
    75  		shardResult := ii.shards[i].labelValues(name)
    76  		results = append(results, shardResult)
    77  	}
    78  
    79  	return mergeStringSlices(results)
    80  }
    81  
    82  // Delete a fingerprint with the given label pairs.
    83  func (ii *InvertedIndex) Delete(labels labels.Labels, fp model.Fingerprint) {
    84  	shard := &ii.shards[util.HashFP(fp)%indexShards]
    85  	shard.delete(labels, fp)
    86  }
    87  
    88  // NB slice entries are sorted in fp order.
    89  type indexEntry struct {
    90  	name string
    91  	fps  map[string]indexValueEntry
    92  }
    93  
    94  type indexValueEntry struct {
    95  	value string
    96  	fps   []model.Fingerprint
    97  }
    98  
    99  type unlockIndex map[string]indexEntry
   100  
   101  // This is the prevalent value for Intel and AMD CPUs as-at 2018.
   102  const cacheLineSize = 64
   103  
   104  type indexShard struct {
   105  	mtx sync.RWMutex
   106  	idx unlockIndex
   107  	//nolint:structcheck,unused
   108  	pad [cacheLineSize - unsafe.Sizeof(sync.Mutex{}) - unsafe.Sizeof(unlockIndex{})]byte
   109  }
   110  
   111  func copyString(s string) string {
   112  	return string([]byte(s))
   113  }
   114  
   115  // add metric to the index; return all the name/value pairs as a fresh
   116  // sorted slice, referencing 'interned' strings from the index so that
   117  // no references are retained to the memory of `metric`.
   118  func (shard *indexShard) add(metric []cortexpb.LabelAdapter, fp model.Fingerprint) labels.Labels {
   119  	shard.mtx.Lock()
   120  	defer shard.mtx.Unlock()
   121  
   122  	internedLabels := make(labels.Labels, len(metric))
   123  
   124  	for i, pair := range metric {
   125  		values, ok := shard.idx[pair.Name]
   126  		if !ok {
   127  			values = indexEntry{
   128  				name: copyString(pair.Name),
   129  				fps:  map[string]indexValueEntry{},
   130  			}
   131  			shard.idx[values.name] = values
   132  		}
   133  		fingerprints, ok := values.fps[pair.Value]
   134  		if !ok {
   135  			fingerprints = indexValueEntry{
   136  				value: copyString(pair.Value),
   137  			}
   138  		}
   139  		// Insert into the right position to keep fingerprints sorted
   140  		j := sort.Search(len(fingerprints.fps), func(i int) bool {
   141  			return fingerprints.fps[i] >= fp
   142  		})
   143  		fingerprints.fps = append(fingerprints.fps, 0)
   144  		copy(fingerprints.fps[j+1:], fingerprints.fps[j:])
   145  		fingerprints.fps[j] = fp
   146  		values.fps[fingerprints.value] = fingerprints
   147  		internedLabels[i] = labels.Label{Name: values.name, Value: fingerprints.value}
   148  	}
   149  	sort.Sort(internedLabels)
   150  	return internedLabels
   151  }
   152  
   153  func (shard *indexShard) lookup(matchers []*labels.Matcher) []model.Fingerprint {
   154  	// index slice values must only be accessed under lock, so all
   155  	// code paths must take a copy before returning
   156  	shard.mtx.RLock()
   157  	defer shard.mtx.RUnlock()
   158  
   159  	// per-shard intersection is initially nil, which is a special case
   160  	// meaning "everything" when passed to intersect()
   161  	// loop invariant: result is sorted
   162  	var result []model.Fingerprint
   163  	for _, matcher := range matchers {
   164  		values, ok := shard.idx[matcher.Name]
   165  		if !ok {
   166  			return nil
   167  		}
   168  		var toIntersect model.Fingerprints
   169  		if matcher.Type == labels.MatchEqual {
   170  			fps := values.fps[matcher.Value]
   171  			toIntersect = append(toIntersect, fps.fps...) // deliberate copy
   172  		} else if matcher.Type == labels.MatchRegexp && len(chunk.FindSetMatches(matcher.Value)) > 0 {
   173  			// The lookup is of the form `=~"a|b|c|d"`
   174  			set := chunk.FindSetMatches(matcher.Value)
   175  			for _, value := range set {
   176  				toIntersect = append(toIntersect, values.fps[value].fps...)
   177  			}
   178  			sort.Sort(toIntersect)
   179  		} else {
   180  			// accumulate the matching fingerprints (which are all distinct)
   181  			// then sort to maintain the invariant
   182  			for value, fps := range values.fps {
   183  				if matcher.Matches(value) {
   184  					toIntersect = append(toIntersect, fps.fps...)
   185  				}
   186  			}
   187  			sort.Sort(toIntersect)
   188  		}
   189  		result = intersect(result, toIntersect)
   190  		if len(result) == 0 {
   191  			return nil
   192  		}
   193  	}
   194  
   195  	return result
   196  }
   197  
   198  func (shard *indexShard) labelNames() []string {
   199  	shard.mtx.RLock()
   200  	defer shard.mtx.RUnlock()
   201  
   202  	results := make([]string, 0, len(shard.idx))
   203  	for name := range shard.idx {
   204  		results = append(results, name)
   205  	}
   206  
   207  	sort.Strings(results)
   208  	return results
   209  }
   210  
   211  func (shard *indexShard) labelValues(name string) []string {
   212  	shard.mtx.RLock()
   213  	defer shard.mtx.RUnlock()
   214  
   215  	values, ok := shard.idx[name]
   216  	if !ok {
   217  		return nil
   218  	}
   219  
   220  	results := make([]string, 0, len(values.fps))
   221  	for val := range values.fps {
   222  		results = append(results, val)
   223  	}
   224  
   225  	sort.Strings(results)
   226  	return results
   227  }
   228  
   229  func (shard *indexShard) delete(labels labels.Labels, fp model.Fingerprint) {
   230  	shard.mtx.Lock()
   231  	defer shard.mtx.Unlock()
   232  
   233  	for _, pair := range labels {
   234  		name, value := pair.Name, pair.Value
   235  		values, ok := shard.idx[name]
   236  		if !ok {
   237  			continue
   238  		}
   239  		fingerprints, ok := values.fps[value]
   240  		if !ok {
   241  			continue
   242  		}
   243  
   244  		j := sort.Search(len(fingerprints.fps), func(i int) bool {
   245  			return fingerprints.fps[i] >= fp
   246  		})
   247  
   248  		// see if search didn't find fp which matches the condition which means we don't have to do anything.
   249  		if j >= len(fingerprints.fps) || fingerprints.fps[j] != fp {
   250  			continue
   251  		}
   252  		fingerprints.fps = fingerprints.fps[:j+copy(fingerprints.fps[j:], fingerprints.fps[j+1:])]
   253  
   254  		if len(fingerprints.fps) == 0 {
   255  			delete(values.fps, value)
   256  		} else {
   257  			values.fps[value] = fingerprints
   258  		}
   259  
   260  		if len(values.fps) == 0 {
   261  			delete(shard.idx, name)
   262  		} else {
   263  			shard.idx[name] = values
   264  		}
   265  	}
   266  }
   267  
   268  // intersect two sorted lists of fingerprints.  Assumes there are no duplicate
   269  // fingerprints within the input lists.
   270  func intersect(a, b []model.Fingerprint) []model.Fingerprint {
   271  	if a == nil {
   272  		return b
   273  	}
   274  	result := []model.Fingerprint{}
   275  	for i, j := 0, 0; i < len(a) && j < len(b); {
   276  		if a[i] == b[j] {
   277  			result = append(result, a[i])
   278  		}
   279  		if a[i] < b[j] {
   280  			i++
   281  		} else {
   282  			j++
   283  		}
   284  	}
   285  	return result
   286  }
   287  
   288  func mergeStringSlices(ss [][]string) []string {
   289  	switch len(ss) {
   290  	case 0:
   291  		return nil
   292  	case 1:
   293  		return ss[0]
   294  	case 2:
   295  		return mergeTwoStringSlices(ss[0], ss[1])
   296  	default:
   297  		halfway := len(ss) / 2
   298  		return mergeTwoStringSlices(
   299  			mergeStringSlices(ss[:halfway]),
   300  			mergeStringSlices(ss[halfway:]),
   301  		)
   302  	}
   303  }
   304  
   305  func mergeTwoStringSlices(a, b []string) []string {
   306  	result := make([]string, 0, len(a)+len(b))
   307  	i, j := 0, 0
   308  	for i < len(a) && j < len(b) {
   309  		if a[i] < b[j] {
   310  			result = append(result, a[i])
   311  			i++
   312  		} else if a[i] > b[j] {
   313  			result = append(result, b[j])
   314  			j++
   315  		} else {
   316  			result = append(result, a[i])
   317  			i++
   318  			j++
   319  		}
   320  	}
   321  	result = append(result, a[i:]...)
   322  	result = append(result, b[j:]...)
   323  	return result
   324  }