eintopf.info@v0.13.16/service/search/aggregation.go (about)

     1  // Copyright (C) 2022 The Eintopf authors
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <https://www.gnu.org/licenses/>.
    15  
    16  package search
    17  
    18  import (
    19  	"fmt"
    20  	"sort"
    21  	"time"
    22  )
    23  
    24  type AggregationType string
    25  
    26  const (
    27  	TermsAggregation     = "terms"
    28  	ObjectsAggregation   = "objects"
    29  	DateRangeAggregation = "daterange"
    30  )
    31  
    32  type Aggregation struct {
    33  	Type    AggregationType `json:"type"`
    34  	Field   string          `json:"field"`
    35  	Filters []Filter        `json:"filters"`
    36  }
    37  
    38  // CacheKey returns the same string for every identical aggregations. All
    39  // aggregations resulting in the same bucket (given the same index) should
    40  // return an equal cache key.
    41  func (a Aggregation) CacheKey() string {
    42  	cacheKey := fmt.Sprint(a.Type) + a.Field
    43  	for _, filter := range a.Filters {
    44  		cacheKey += filter.CacheKey()
    45  	}
    46  	return cacheKey
    47  }
    48  
    49  // aggregator aggregates values into a bucket.
    50  type aggregator interface {
    51  	// aggregate aggregates a single value.
    52  	// It may return an error if the type or value is invalid.
    53  	aggregate(value interface{}) error
    54  	// bucket returns the resulting bucket.
    55  	bucket() Bucket
    56  }
    57  
    58  type termsAggregator struct {
    59  	// terms maps unique terms to its count.
    60  	terms map[string]int
    61  }
    62  
    63  // aggregate takes a value with one of the following types:
    64  //   - string
    65  //   - []string
    66  //   - []interface{}
    67  //
    68  // Returns an error if the value has a different type.
    69  func (t *termsAggregator) aggregate(value interface{}) error {
    70  	// Determine the term value and increase the count in the terms map.
    71  	switch v := value.(type) {
    72  	case string:
    73  		t.terms[v]++
    74  	case []string:
    75  		for _, vv := range v {
    76  			t.terms[vv]++
    77  		}
    78  	case []interface{}:
    79  		for _, vv := range v {
    80  			t.aggregate(vv)
    81  		}
    82  	default:
    83  		return fmt.Errorf("invalid type: %T", v)
    84  	}
    85  	return nil
    86  }
    87  
    88  // bucket returns a new TermsBucket.
    89  func (t *termsAggregator) bucket() Bucket {
    90  	terms := make(TermsBucket, 0, len(t.terms))
    91  	for term, count := range t.terms {
    92  		terms = append(terms, Term{Term: term, Count: count})
    93  	}
    94  	// Sort the terms slice, to provide a stable output.
    95  	sort.Slice(terms, func(i, j int) bool { return terms[i].Term < terms[j].Term })
    96  	return terms
    97  }
    98  
    99  // DateLayout is the date layout used internally for date aggregations.
   100  const DateLayout = "2006-01-02T15:04:05Z07:00"
   101  
   102  type dateRangeAggregator struct {
   103  	min time.Time
   104  	max time.Time
   105  }
   106  
   107  func (d *dateRangeAggregator) aggregate(value interface{}) error {
   108  	switch v := value.(type) {
   109  	case string:
   110  		return d.aggregateString(v)
   111  	case []interface{}:
   112  		for _, vv := range v {
   113  			switch vvv := vv.(type) {
   114  			case string:
   115  				err := d.aggregateString(vvv)
   116  				if err != nil {
   117  					return err
   118  				}
   119  			default:
   120  				return fmt.Errorf("invalid type: %T", v)
   121  			}
   122  		}
   123  		return nil
   124  	default:
   125  		return fmt.Errorf("invalid type: %T", v)
   126  	}
   127  }
   128  
   129  func (d *dateRangeAggregator) aggregateString(value string) error {
   130  	date, err := time.Parse(DateLayout, value)
   131  	if err != nil {
   132  		return fmt.Errorf("invalid date format: %s", err)
   133  	}
   134  
   135  	if date.Unix() < d.min.Unix() {
   136  		d.min = date
   137  	}
   138  	if date.Unix() > d.max.Unix() {
   139  		d.max = date
   140  	}
   141  	return nil
   142  }
   143  
   144  func (d *dateRangeAggregator) bucket() Bucket {
   145  	return DateRangeBucket{Min: d.min, Max: d.max}
   146  }
   147  
   148  type Bucket interface {
   149  	BucketType() AggregationType
   150  }
   151  
   152  type TermsBucket []Term
   153  
   154  func (t TermsBucket) BucketType() AggregationType {
   155  	return TermsAggregation
   156  }
   157  
   158  type Term struct {
   159  	Term  string `json:"term"`
   160  	Count int    `json:"count"`
   161  }
   162  
   163  type DateRangeBucket struct {
   164  	Min time.Time `json:"min"`
   165  	Max time.Time `json:"max"`
   166  }
   167  
   168  func (d DateRangeBucket) BucketType() AggregationType {
   169  	return DateRangeAggregation
   170  }