github.com/m3db/m3@v1.5.0/src/dbnode/storage/index/aggregate_results.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package index 22 23 import ( 24 "math" 25 "sync" 26 27 "github.com/uber-go/tally" 28 29 "github.com/m3db/m3/src/x/ident" 30 "github.com/m3db/m3/src/x/instrument" 31 ) 32 33 type aggregatedResults struct { 34 sync.RWMutex 35 36 nsID ident.ID 37 aggregateOpts AggregateResultsOptions 38 39 resultsMap *AggregateResultsMap 40 size int 41 totalDocsCount int 42 43 // Utilization stats, do not reset. 44 resultsUtilizationStats resultsUtilizationStats 45 46 idPool ident.Pool 47 pool AggregateResultsPool 48 valuesPool AggregateValuesPool 49 50 iOpts instrument.Options 51 } 52 53 var _ AggregateUsageMetrics = (*usageMetrics)(nil) 54 55 type usageMetrics struct { 56 total tally.Counter 57 58 totalTerms tally.Counter 59 dedupedTerms tally.Counter 60 61 totalFields tally.Counter 62 dedupedFields tally.Counter 63 } 64 65 func (m *usageMetrics) IncTotal(val int64) { 66 // NB: if metrics not set, to valid values, no-op. 67 if m.total != nil { 68 m.total.Inc(val) 69 } 70 } 71 72 func (m *usageMetrics) IncTotalTerms(val int64) { 73 // NB: if metrics not set, to valid values, no-op. 74 if m.totalTerms != nil { 75 m.totalTerms.Inc(val) 76 } 77 } 78 79 func (m *usageMetrics) IncDedupedTerms(val int64) { 80 // NB: if metrics not set, to valid values, no-op. 81 if m.dedupedTerms != nil { 82 m.dedupedTerms.Inc(val) 83 } 84 } 85 86 func (m *usageMetrics) IncTotalFields(val int64) { 87 // NB: if metrics not set, to valid values, no-op. 88 if m.totalFields != nil { 89 m.totalFields.Inc(val) 90 } 91 } 92 93 func (m *usageMetrics) IncDedupedFields(val int64) { 94 // NB: if metrics not set, to valid values, no-op. 95 if m.dedupedFields != nil { 96 m.dedupedFields.Inc(val) 97 } 98 } 99 100 // NewAggregateUsageMetrics builds a new aggregated usage metrics. 101 func NewAggregateUsageMetrics(ns ident.ID, iOpts instrument.Options) AggregateUsageMetrics { 102 if ns == nil { 103 return &usageMetrics{} 104 } 105 106 scope := iOpts.MetricsScope() 107 buildCounter := func(val string) tally.Counter { 108 return scope. 109 Tagged(map[string]string{"type": val, "namespace": ns.String()}). 110 Counter("aggregated-results") 111 } 112 113 return &usageMetrics{ 114 total: buildCounter("total"), 115 totalTerms: buildCounter("total-terms"), 116 dedupedTerms: buildCounter("deduped-terms"), 117 totalFields: buildCounter("total-fields"), 118 dedupedFields: buildCounter("deduped-fields"), 119 } 120 } 121 122 // NewAggregateResults returns a new AggregateResults object. 123 func NewAggregateResults( 124 namespaceID ident.ID, 125 aggregateOpts AggregateResultsOptions, 126 opts Options, 127 ) AggregateResults { 128 if aggregateOpts.AggregateUsageMetrics == nil { 129 aggregateOpts.AggregateUsageMetrics = &usageMetrics{} 130 } 131 132 return &aggregatedResults{ 133 nsID: namespaceID, 134 aggregateOpts: aggregateOpts, 135 iOpts: opts.InstrumentOptions(), 136 resultsMap: newAggregateResultsMap(opts.IdentifierPool()), 137 idPool: opts.IdentifierPool(), 138 pool: opts.AggregateResultsPool(), 139 valuesPool: opts.AggregateValuesPool(), 140 } 141 } 142 143 func (r *aggregatedResults) EnforceLimits() bool { return true } 144 145 func (r *aggregatedResults) Reset( 146 nsID ident.ID, 147 aggregateOpts AggregateResultsOptions, 148 ) { 149 r.Lock() 150 151 if aggregateOpts.AggregateUsageMetrics == nil { 152 aggregateOpts.AggregateUsageMetrics = NewAggregateUsageMetrics(nsID, r.iOpts) 153 } 154 155 r.aggregateOpts = aggregateOpts 156 // finalize existing held nsID 157 if r.nsID != nil { 158 r.nsID.Finalize() 159 } 160 161 // make an independent copy of the new nsID 162 if nsID != nil { 163 nsID = r.idPool.Clone(nsID) 164 } 165 r.nsID = nsID 166 167 // reset all values from map first 168 for _, entry := range r.resultsMap.Iter() { 169 valueMap := entry.Value() 170 valueMap.finalize() 171 } 172 // reset all keys in the map next 173 r.resultsMap.Reset() 174 r.totalDocsCount = 0 175 r.size = 0 176 177 // NB: could do keys+value in one step but I'm trying to avoid 178 // using an internal method of a code-gen'd type. 179 r.Unlock() 180 } 181 182 func (r *aggregatedResults) AggregateResultsOptions() AggregateResultsOptions { 183 return r.aggregateOpts 184 } 185 186 func (r *aggregatedResults) AddFields(batch []AggregateResultsEntry) (int, int) { 187 r.Lock() 188 defer r.Unlock() 189 190 // NB: init total count with batch length, since each aggregated entry 191 // will have one field. 192 totalCount := len(batch) 193 for idx := 0; idx < len(batch); idx++ { 194 totalCount += len(batch[idx].Terms) 195 } 196 197 r.aggregateOpts.AggregateUsageMetrics.IncTotal(int64(totalCount)) 198 remainingDocs := math.MaxInt64 199 if r.aggregateOpts.DocsLimit != 0 { 200 remainingDocs = r.aggregateOpts.DocsLimit - r.totalDocsCount 201 } 202 203 // NB: already hit doc limit. 204 if remainingDocs <= 0 { 205 for idx := 0; idx < len(batch); idx++ { 206 batch[idx].Field.Finalize() 207 r.aggregateOpts.AggregateUsageMetrics.IncTotalFields(1) 208 for _, term := range batch[idx].Terms { 209 r.aggregateOpts.AggregateUsageMetrics.IncTotalTerms(1) 210 term.Finalize() 211 } 212 } 213 214 return r.size, r.totalDocsCount 215 } 216 217 // NB: cannot insert more than max docs, so that acts as the upper bound here. 218 remainingInserts := remainingDocs 219 if r.aggregateOpts.SizeLimit != 0 { 220 if remaining := r.aggregateOpts.SizeLimit - r.size; remaining < remainingInserts { 221 remainingInserts = remaining 222 } 223 } 224 225 var ( 226 docs int 227 numInserts int 228 entry AggregateResultsEntry 229 ) 230 231 for idx := 0; idx < len(batch); idx++ { 232 entry = batch[idx] 233 r.aggregateOpts.AggregateUsageMetrics.IncTotalFields(1) 234 235 if docs >= remainingDocs || numInserts >= remainingInserts { 236 entry.Field.Finalize() 237 for _, term := range entry.Terms { 238 r.aggregateOpts.AggregateUsageMetrics.IncTotalTerms(1) 239 term.Finalize() 240 } 241 242 r.size += numInserts 243 r.totalDocsCount += docs 244 return r.size, r.totalDocsCount 245 } 246 247 docs++ 248 f := entry.Field 249 aggValues, ok := r.resultsMap.Get(f) 250 if !ok { 251 if remainingInserts > numInserts { 252 r.aggregateOpts.AggregateUsageMetrics.IncDedupedFields(1) 253 254 numInserts++ 255 aggValues = r.valuesPool.Get() 256 // we can avoid the copy because we assume ownership of the passed ident.ID, 257 // but still need to finalize it. 258 r.resultsMap.SetUnsafe(f, aggValues, AggregateResultsMapSetUnsafeOptions{ 259 NoCopyKey: true, 260 NoFinalizeKey: false, 261 }) 262 } else { 263 // this value exceeds the limit, so should be released to the underling 264 // pool without adding to the map. 265 f.Finalize() 266 } 267 } else { 268 // because we already have a entry for this field, we release the ident back to 269 // the underlying pool. 270 f.Finalize() 271 } 272 273 valuesMap := aggValues.Map() 274 for _, t := range entry.Terms { 275 r.aggregateOpts.AggregateUsageMetrics.IncTotalTerms(1) 276 if remainingDocs > docs { 277 docs++ 278 if !valuesMap.Contains(t) { 279 // we can avoid the copy because we assume ownership of the passed ident.ID, 280 // but still need to finalize it. 281 if remainingInserts > numInserts { 282 r.aggregateOpts.AggregateUsageMetrics.IncDedupedTerms(1) 283 valuesMap.SetUnsafe(t, struct{}{}, AggregateValuesMapSetUnsafeOptions{ 284 NoCopyKey: true, 285 NoFinalizeKey: false, 286 }) 287 numInserts++ 288 continue 289 } 290 } 291 } 292 293 t.Finalize() 294 } 295 } 296 297 r.size += numInserts 298 r.totalDocsCount += docs 299 return r.size, r.totalDocsCount 300 } 301 302 func (r *aggregatedResults) Namespace() ident.ID { 303 r.RLock() 304 ns := r.nsID 305 r.RUnlock() 306 return ns 307 } 308 309 func (r *aggregatedResults) Map() *AggregateResultsMap { 310 r.RLock() 311 m := r.resultsMap 312 r.RUnlock() 313 return m 314 } 315 316 func (r *aggregatedResults) Size() int { 317 r.RLock() 318 size := r.size 319 r.RUnlock() 320 return size 321 } 322 323 func (r *aggregatedResults) TotalDocsCount() int { 324 r.RLock() 325 count := r.totalDocsCount 326 r.RUnlock() 327 return count 328 } 329 330 func (r *aggregatedResults) Finalize() { 331 r.Lock() 332 returnToPool := r.resultsUtilizationStats.updateAndCheck(r.totalDocsCount) 333 r.Unlock() 334 335 r.Reset(nil, AggregateResultsOptions{}) 336 337 if r.pool != nil && returnToPool { 338 r.pool.Put(r) 339 } 340 }