github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/aggregator/date.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package aggregator 13 14 import ( 15 "fmt" 16 "math" 17 "sort" 18 "time" 19 20 "github.com/pkg/errors" 21 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 22 "github.com/weaviate/weaviate/entities/aggregation" 23 ) 24 25 func addDateAggregations(prop *aggregation.Property, 26 aggs []aggregation.Aggregator, agg *dateAggregator, 27 ) { 28 if prop.DateAggregations == nil { 29 prop.DateAggregations = map[string]interface{}{} 30 } 31 agg.buildPairsFromCounts() 32 33 // if there are no elements to aggregate over because a filter does not match anything, calculating median etc. makes 34 // no sense. Non-existent entries evaluate to nil with an interface{} map 35 if agg.count == 0 { 36 for _, entry := range aggs { 37 if entry == aggregation.CountAggregator { 38 prop.DateAggregations["count"] = int64(agg.count) 39 break 40 } 41 } 42 return 43 } 44 45 // when combining the results from different shards, we need the raw dates to recompute the mode and median. 46 // Therefore we add a reference later which needs to be cleared out before returning the results to a user 47 for _, aProp := range aggs { 48 switch aProp { 49 case aggregation.ModeAggregator, aggregation.MedianAggregator: 50 prop.DateAggregations["_dateAggregator"] = agg 51 } 52 } 53 54 for _, aProp := range aggs { 55 switch aProp { 56 case aggregation.MinimumAggregator: 57 prop.DateAggregations[aProp.String()] = agg.Min() 58 case aggregation.MaximumAggregator: 59 prop.DateAggregations[aProp.String()] = agg.Max() 60 case aggregation.ModeAggregator: 61 prop.DateAggregations[aProp.String()] = agg.Mode() 62 case aggregation.CountAggregator: 63 prop.DateAggregations[aProp.String()] = agg.Count() 64 case aggregation.MedianAggregator: 65 prop.DateAggregations[aProp.String()] = agg.Median() 66 67 default: 68 continue 69 } 70 } 71 } 72 73 type dateAggregator struct { 74 count uint64 75 maxCount uint64 76 min timestamp 77 max timestamp 78 mode timestamp 79 pairs []timestampCountPair // for row-based median calculation 80 valueCounter map[timestamp]uint64 // for individual median calculation 81 } 82 83 func newDateAggregator() *dateAggregator { 84 return &dateAggregator{ 85 min: timestamp{epochNano: math.MaxInt64}, 86 valueCounter: map[timestamp]uint64{}, 87 pairs: make([]timestampCountPair, 0), 88 } 89 } 90 91 // timestamp allows us to contain multiple representations of a datetime 92 // the nanosecs value is needed for the numerical comparisons, and the 93 // string value is what the user expects to see 94 type timestamp struct { 95 epochNano int64 96 rfc3339 string 97 } 98 99 func newTimestamp(epochNano int64) timestamp { 100 return timestamp{ 101 epochNano: epochNano, 102 rfc3339: time.Unix(0, epochNano).UTC().Format(time.RFC3339Nano), 103 } 104 } 105 106 type timestampCountPair struct { 107 value timestamp 108 count uint64 109 } 110 111 func (a *dateAggregator) AddTimestamp(rfc3339 string) error { 112 t, err := time.Parse(time.RFC3339Nano, rfc3339) 113 if err != nil { 114 return fmt.Errorf("failed to parse timestamp: %s", err) 115 } 116 117 ts := timestamp{ 118 epochNano: t.UnixNano(), 119 rfc3339: rfc3339, 120 } 121 return a.addRow(ts, 1) 122 } 123 124 func (a *dateAggregator) AddTimestampRow(b []byte, count uint64) error { 125 nsec, err := inverted.ParseLexicographicallySortableInt64(b) 126 if err != nil { 127 return errors.Wrap(err, "read int64") 128 } 129 130 ts := newTimestamp(nsec) 131 132 return a.addRow(ts, count) 133 } 134 135 func (a *dateAggregator) addRow(ts timestamp, count uint64) error { 136 if count == 0 { 137 // skip 138 return nil 139 } 140 141 a.count += count 142 if ts.epochNano < a.min.epochNano { 143 a.min = ts 144 } 145 if ts.epochNano > a.max.epochNano { 146 a.max = ts 147 } 148 149 currentCount := a.valueCounter[ts] 150 currentCount += count 151 a.valueCounter[ts] = currentCount 152 153 return nil 154 } 155 156 func (a *dateAggregator) Max() string { 157 return a.max.rfc3339 158 } 159 160 func (a *dateAggregator) Min() string { 161 return a.min.rfc3339 162 } 163 164 // Mode does not require preparation if build from rows, but requires a call of 165 // buildPairsFromCounts() if it was built using individual objects 166 func (a *dateAggregator) Mode() string { 167 return a.mode.rfc3339 168 } 169 170 func (a *dateAggregator) Count() int64 { 171 return int64(a.count) 172 } 173 174 // Median does not require preparation if build from rows, but requires a call of 175 // buildPairsFromCounts() if it was built using individual objects 176 // 177 // Check the numericalAggregator.Median() for details about the calculation 178 func (a *dateAggregator) Median() string { 179 middleIndex := a.count / 2 180 count := uint64(0) 181 for index, pair := range a.pairs { 182 count += pair.count 183 if a.count%2 == 1 && count > middleIndex { 184 return pair.value.rfc3339 // case a) 185 } else if a.count%2 == 0 { 186 if count == middleIndex { 187 MedianEpochNano := pair.value.epochNano + (a.pairs[index+1].value.epochNano-pair.value.epochNano)/2 188 return time.Unix(0, MedianEpochNano).UTC().Format(time.RFC3339Nano) // case b2) 189 } else if count > middleIndex { 190 return pair.value.rfc3339 // case b1) 191 } 192 } 193 } 194 panic("Couldn't determine median. This should never happen. Did you add values and call buildRows before?") 195 } 196 197 // turns the value counter into a sorted list, as well as identifying the mode 198 func (a *dateAggregator) buildPairsFromCounts() { 199 a.pairs = a.pairs[:0] // clear out old values in case this function called more than once 200 for value, count := range a.valueCounter { 201 if count > a.maxCount { 202 a.maxCount = count 203 a.mode = value 204 } 205 a.pairs = append(a.pairs, timestampCountPair{value: value, count: count}) 206 } 207 208 sort.Slice(a.pairs, func(x, y int) bool { 209 return a.pairs[x].value.epochNano < a.pairs[y].value.epochNano 210 }) 211 }