eintopf.info@v0.13.16/service/search/aggregation.go (about) 1 // Copyright (C) 2022 The Eintopf authors 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <https://www.gnu.org/licenses/>. 15 16 package search 17 18 import ( 19 "fmt" 20 "sort" 21 "time" 22 ) 23 24 type AggregationType string 25 26 const ( 27 TermsAggregation = "terms" 28 ObjectsAggregation = "objects" 29 DateRangeAggregation = "daterange" 30 ) 31 32 type Aggregation struct { 33 Type AggregationType `json:"type"` 34 Field string `json:"field"` 35 Filters []Filter `json:"filters"` 36 } 37 38 // CacheKey returns the same string for every identical aggregations. All 39 // aggregations resulting in the same bucket (given the same index) should 40 // return an equal cache key. 41 func (a Aggregation) CacheKey() string { 42 cacheKey := fmt.Sprint(a.Type) + a.Field 43 for _, filter := range a.Filters { 44 cacheKey += filter.CacheKey() 45 } 46 return cacheKey 47 } 48 49 // aggregator aggregates values into a bucket. 50 type aggregator interface { 51 // aggregate aggregates a single value. 52 // It may return an error if the type or value is invalid. 53 aggregate(value interface{}) error 54 // bucket returns the resulting bucket. 55 bucket() Bucket 56 } 57 58 type termsAggregator struct { 59 // terms maps unique terms to its count. 60 terms map[string]int 61 } 62 63 // aggregate takes a value with one of the following types: 64 // - string 65 // - []string 66 // - []interface{} 67 // 68 // Returns an error if the value has a different type. 69 func (t *termsAggregator) aggregate(value interface{}) error { 70 // Determine the term value and increase the count in the terms map. 71 switch v := value.(type) { 72 case string: 73 t.terms[v]++ 74 case []string: 75 for _, vv := range v { 76 t.terms[vv]++ 77 } 78 case []interface{}: 79 for _, vv := range v { 80 t.aggregate(vv) 81 } 82 default: 83 return fmt.Errorf("invalid type: %T", v) 84 } 85 return nil 86 } 87 88 // bucket returns a new TermsBucket. 89 func (t *termsAggregator) bucket() Bucket { 90 terms := make(TermsBucket, 0, len(t.terms)) 91 for term, count := range t.terms { 92 terms = append(terms, Term{Term: term, Count: count}) 93 } 94 // Sort the terms slice, to provide a stable output. 95 sort.Slice(terms, func(i, j int) bool { return terms[i].Term < terms[j].Term }) 96 return terms 97 } 98 99 // DateLayout is the date layout used internally for date aggregations. 100 const DateLayout = "2006-01-02T15:04:05Z07:00" 101 102 type dateRangeAggregator struct { 103 min time.Time 104 max time.Time 105 } 106 107 func (d *dateRangeAggregator) aggregate(value interface{}) error { 108 switch v := value.(type) { 109 case string: 110 return d.aggregateString(v) 111 case []interface{}: 112 for _, vv := range v { 113 switch vvv := vv.(type) { 114 case string: 115 err := d.aggregateString(vvv) 116 if err != nil { 117 return err 118 } 119 default: 120 return fmt.Errorf("invalid type: %T", v) 121 } 122 } 123 return nil 124 default: 125 return fmt.Errorf("invalid type: %T", v) 126 } 127 } 128 129 func (d *dateRangeAggregator) aggregateString(value string) error { 130 date, err := time.Parse(DateLayout, value) 131 if err != nil { 132 return fmt.Errorf("invalid date format: %s", err) 133 } 134 135 if date.Unix() < d.min.Unix() { 136 d.min = date 137 } 138 if date.Unix() > d.max.Unix() { 139 d.max = date 140 } 141 return nil 142 } 143 144 func (d *dateRangeAggregator) bucket() Bucket { 145 return DateRangeBucket{Min: d.min, Max: d.max} 146 } 147 148 type Bucket interface { 149 BucketType() AggregationType 150 } 151 152 type TermsBucket []Term 153 154 func (t TermsBucket) BucketType() AggregationType { 155 return TermsAggregation 156 } 157 158 type Term struct { 159 Term string `json:"term"` 160 Count int `json:"count"` 161 } 162 163 type DateRangeBucket struct { 164 Min time.Time `json:"min"` 165 Max time.Time `json:"max"` 166 } 167 168 func (d DateRangeBucket) BucketType() AggregationType { 169 return DateRangeAggregation 170 }