github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/aggregate_results.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package index
    22  
    23  import (
    24  	"math"
    25  	"sync"
    26  
    27  	"github.com/uber-go/tally"
    28  
    29  	"github.com/m3db/m3/src/x/ident"
    30  	"github.com/m3db/m3/src/x/instrument"
    31  )
    32  
    33  type aggregatedResults struct {
    34  	sync.RWMutex
    35  
    36  	nsID          ident.ID
    37  	aggregateOpts AggregateResultsOptions
    38  
    39  	resultsMap     *AggregateResultsMap
    40  	size           int
    41  	totalDocsCount int
    42  
    43  	// Utilization stats, do not reset.
    44  	resultsUtilizationStats resultsUtilizationStats
    45  
    46  	idPool     ident.Pool
    47  	pool       AggregateResultsPool
    48  	valuesPool AggregateValuesPool
    49  
    50  	iOpts instrument.Options
    51  }
    52  
    53  var _ AggregateUsageMetrics = (*usageMetrics)(nil)
    54  
    55  type usageMetrics struct {
    56  	total tally.Counter
    57  
    58  	totalTerms   tally.Counter
    59  	dedupedTerms tally.Counter
    60  
    61  	totalFields   tally.Counter
    62  	dedupedFields tally.Counter
    63  }
    64  
    65  func (m *usageMetrics) IncTotal(val int64) {
    66  	// NB: if metrics not set, to valid values, no-op.
    67  	if m.total != nil {
    68  		m.total.Inc(val)
    69  	}
    70  }
    71  
    72  func (m *usageMetrics) IncTotalTerms(val int64) {
    73  	// NB: if metrics not set, to valid values, no-op.
    74  	if m.totalTerms != nil {
    75  		m.totalTerms.Inc(val)
    76  	}
    77  }
    78  
    79  func (m *usageMetrics) IncDedupedTerms(val int64) {
    80  	// NB: if metrics not set, to valid values, no-op.
    81  	if m.dedupedTerms != nil {
    82  		m.dedupedTerms.Inc(val)
    83  	}
    84  }
    85  
    86  func (m *usageMetrics) IncTotalFields(val int64) {
    87  	// NB: if metrics not set, to valid values, no-op.
    88  	if m.totalFields != nil {
    89  		m.totalFields.Inc(val)
    90  	}
    91  }
    92  
    93  func (m *usageMetrics) IncDedupedFields(val int64) {
    94  	// NB: if metrics not set, to valid values, no-op.
    95  	if m.dedupedFields != nil {
    96  		m.dedupedFields.Inc(val)
    97  	}
    98  }
    99  
   100  // NewAggregateUsageMetrics builds a new aggregated usage metrics.
   101  func NewAggregateUsageMetrics(ns ident.ID, iOpts instrument.Options) AggregateUsageMetrics {
   102  	if ns == nil {
   103  		return &usageMetrics{}
   104  	}
   105  
   106  	scope := iOpts.MetricsScope()
   107  	buildCounter := func(val string) tally.Counter {
   108  		return scope.
   109  			Tagged(map[string]string{"type": val, "namespace": ns.String()}).
   110  			Counter("aggregated-results")
   111  	}
   112  
   113  	return &usageMetrics{
   114  		total:         buildCounter("total"),
   115  		totalTerms:    buildCounter("total-terms"),
   116  		dedupedTerms:  buildCounter("deduped-terms"),
   117  		totalFields:   buildCounter("total-fields"),
   118  		dedupedFields: buildCounter("deduped-fields"),
   119  	}
   120  }
   121  
   122  // NewAggregateResults returns a new AggregateResults object.
   123  func NewAggregateResults(
   124  	namespaceID ident.ID,
   125  	aggregateOpts AggregateResultsOptions,
   126  	opts Options,
   127  ) AggregateResults {
   128  	if aggregateOpts.AggregateUsageMetrics == nil {
   129  		aggregateOpts.AggregateUsageMetrics = &usageMetrics{}
   130  	}
   131  
   132  	return &aggregatedResults{
   133  		nsID:          namespaceID,
   134  		aggregateOpts: aggregateOpts,
   135  		iOpts:         opts.InstrumentOptions(),
   136  		resultsMap:    newAggregateResultsMap(opts.IdentifierPool()),
   137  		idPool:        opts.IdentifierPool(),
   138  		pool:          opts.AggregateResultsPool(),
   139  		valuesPool:    opts.AggregateValuesPool(),
   140  	}
   141  }
   142  
   143  func (r *aggregatedResults) EnforceLimits() bool { return true }
   144  
   145  func (r *aggregatedResults) Reset(
   146  	nsID ident.ID,
   147  	aggregateOpts AggregateResultsOptions,
   148  ) {
   149  	r.Lock()
   150  
   151  	if aggregateOpts.AggregateUsageMetrics == nil {
   152  		aggregateOpts.AggregateUsageMetrics = NewAggregateUsageMetrics(nsID, r.iOpts)
   153  	}
   154  
   155  	r.aggregateOpts = aggregateOpts
   156  	// finalize existing held nsID
   157  	if r.nsID != nil {
   158  		r.nsID.Finalize()
   159  	}
   160  
   161  	// make an independent copy of the new nsID
   162  	if nsID != nil {
   163  		nsID = r.idPool.Clone(nsID)
   164  	}
   165  	r.nsID = nsID
   166  
   167  	// reset all values from map first
   168  	for _, entry := range r.resultsMap.Iter() {
   169  		valueMap := entry.Value()
   170  		valueMap.finalize()
   171  	}
   172  	// reset all keys in the map next
   173  	r.resultsMap.Reset()
   174  	r.totalDocsCount = 0
   175  	r.size = 0
   176  
   177  	// NB: could do keys+value in one step but I'm trying to avoid
   178  	// using an internal method of a code-gen'd type.
   179  	r.Unlock()
   180  }
   181  
   182  func (r *aggregatedResults) AggregateResultsOptions() AggregateResultsOptions {
   183  	return r.aggregateOpts
   184  }
   185  
   186  func (r *aggregatedResults) AddFields(batch []AggregateResultsEntry) (int, int) {
   187  	r.Lock()
   188  	defer r.Unlock()
   189  
   190  	// NB: init total count with batch length, since each aggregated entry
   191  	// will have one field.
   192  	totalCount := len(batch)
   193  	for idx := 0; idx < len(batch); idx++ {
   194  		totalCount += len(batch[idx].Terms)
   195  	}
   196  
   197  	r.aggregateOpts.AggregateUsageMetrics.IncTotal(int64(totalCount))
   198  	remainingDocs := math.MaxInt64
   199  	if r.aggregateOpts.DocsLimit != 0 {
   200  		remainingDocs = r.aggregateOpts.DocsLimit - r.totalDocsCount
   201  	}
   202  
   203  	// NB: already hit doc limit.
   204  	if remainingDocs <= 0 {
   205  		for idx := 0; idx < len(batch); idx++ {
   206  			batch[idx].Field.Finalize()
   207  			r.aggregateOpts.AggregateUsageMetrics.IncTotalFields(1)
   208  			for _, term := range batch[idx].Terms {
   209  				r.aggregateOpts.AggregateUsageMetrics.IncTotalTerms(1)
   210  				term.Finalize()
   211  			}
   212  		}
   213  
   214  		return r.size, r.totalDocsCount
   215  	}
   216  
   217  	// NB: cannot insert more than max docs, so that acts as the upper bound here.
   218  	remainingInserts := remainingDocs
   219  	if r.aggregateOpts.SizeLimit != 0 {
   220  		if remaining := r.aggregateOpts.SizeLimit - r.size; remaining < remainingInserts {
   221  			remainingInserts = remaining
   222  		}
   223  	}
   224  
   225  	var (
   226  		docs       int
   227  		numInserts int
   228  		entry      AggregateResultsEntry
   229  	)
   230  
   231  	for idx := 0; idx < len(batch); idx++ {
   232  		entry = batch[idx]
   233  		r.aggregateOpts.AggregateUsageMetrics.IncTotalFields(1)
   234  
   235  		if docs >= remainingDocs || numInserts >= remainingInserts {
   236  			entry.Field.Finalize()
   237  			for _, term := range entry.Terms {
   238  				r.aggregateOpts.AggregateUsageMetrics.IncTotalTerms(1)
   239  				term.Finalize()
   240  			}
   241  
   242  			r.size += numInserts
   243  			r.totalDocsCount += docs
   244  			return r.size, r.totalDocsCount
   245  		}
   246  
   247  		docs++
   248  		f := entry.Field
   249  		aggValues, ok := r.resultsMap.Get(f)
   250  		if !ok {
   251  			if remainingInserts > numInserts {
   252  				r.aggregateOpts.AggregateUsageMetrics.IncDedupedFields(1)
   253  
   254  				numInserts++
   255  				aggValues = r.valuesPool.Get()
   256  				// we can avoid the copy because we assume ownership of the passed ident.ID,
   257  				// but still need to finalize it.
   258  				r.resultsMap.SetUnsafe(f, aggValues, AggregateResultsMapSetUnsafeOptions{
   259  					NoCopyKey:     true,
   260  					NoFinalizeKey: false,
   261  				})
   262  			} else {
   263  				// this value exceeds the limit, so should be released to the underling
   264  				// pool without adding to the map.
   265  				f.Finalize()
   266  			}
   267  		} else {
   268  			// because we already have a entry for this field, we release the ident back to
   269  			// the underlying pool.
   270  			f.Finalize()
   271  		}
   272  
   273  		valuesMap := aggValues.Map()
   274  		for _, t := range entry.Terms {
   275  			r.aggregateOpts.AggregateUsageMetrics.IncTotalTerms(1)
   276  			if remainingDocs > docs {
   277  				docs++
   278  				if !valuesMap.Contains(t) {
   279  					// we can avoid the copy because we assume ownership of the passed ident.ID,
   280  					// but still need to finalize it.
   281  					if remainingInserts > numInserts {
   282  						r.aggregateOpts.AggregateUsageMetrics.IncDedupedTerms(1)
   283  						valuesMap.SetUnsafe(t, struct{}{}, AggregateValuesMapSetUnsafeOptions{
   284  							NoCopyKey:     true,
   285  							NoFinalizeKey: false,
   286  						})
   287  						numInserts++
   288  						continue
   289  					}
   290  				}
   291  			}
   292  
   293  			t.Finalize()
   294  		}
   295  	}
   296  
   297  	r.size += numInserts
   298  	r.totalDocsCount += docs
   299  	return r.size, r.totalDocsCount
   300  }
   301  
   302  func (r *aggregatedResults) Namespace() ident.ID {
   303  	r.RLock()
   304  	ns := r.nsID
   305  	r.RUnlock()
   306  	return ns
   307  }
   308  
   309  func (r *aggregatedResults) Map() *AggregateResultsMap {
   310  	r.RLock()
   311  	m := r.resultsMap
   312  	r.RUnlock()
   313  	return m
   314  }
   315  
   316  func (r *aggregatedResults) Size() int {
   317  	r.RLock()
   318  	size := r.size
   319  	r.RUnlock()
   320  	return size
   321  }
   322  
   323  func (r *aggregatedResults) TotalDocsCount() int {
   324  	r.RLock()
   325  	count := r.totalDocsCount
   326  	r.RUnlock()
   327  	return count
   328  }
   329  
   330  func (r *aggregatedResults) Finalize() {
   331  	r.Lock()
   332  	returnToPool := r.resultsUtilizationStats.updateAndCheck(r.totalDocsCount)
   333  	r.Unlock()
   334  
   335  	r.Reset(nil, AggregateResultsOptions{})
   336  
   337  	if r.pool != nil && returnToPool {
   338  		r.pool.Put(r)
   339  	}
   340  }