github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/aggregator/text.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package aggregator 13 14 import ( 15 "sort" 16 17 "github.com/pkg/errors" 18 "github.com/weaviate/weaviate/entities/aggregation" 19 "github.com/weaviate/weaviate/entities/schema" 20 "github.com/weaviate/weaviate/entities/storobj" 21 ) 22 23 func extractLimitFromTopOccs(aggs []aggregation.Aggregator) int { 24 for _, agg := range aggs { 25 if agg.Type == aggregation.TopOccurrencesType && agg.Limit != nil { 26 return *agg.Limit 27 } 28 } 29 30 // we couldn't extract a limit, default to something reasonable 31 return 5 32 } 33 34 func newTextAggregator(limit int) *textAggregator { 35 return &textAggregator{itemCounter: map[string]int{}, max: limit} 36 } 37 38 type textAggregator struct { 39 max int 40 count uint64 41 42 itemCounter map[string]int 43 44 // always keep sorted, so we can cut off the last elem, when it grows larger 45 // than max 46 topPairs []aggregation.TextOccurrence 47 } 48 49 func (a *Aggregator) parseAndAddTextRow(agg *textAggregator, 50 v []byte, propName schema.PropertyName, 51 ) error { 52 items, ok, err := storobj.ParseAndExtractTextProp(v, propName.String()) 53 if err != nil { 54 return errors.Wrap(err, "parse and extract prop") 55 } 56 57 if !ok { 58 return nil 59 } 60 61 for i := range items { 62 if err := agg.AddText(items[i]); err != nil { 63 return err 64 } 65 } 66 return nil 67 } 68 69 func (a *textAggregator) AddText(value string) error { 70 a.count++ 71 72 itemCount := a.itemCounter[value] 73 itemCount++ 74 a.itemCounter[value] = itemCount 75 return nil 76 } 77 78 func (a *textAggregator) insertOrdered(elem aggregation.TextOccurrence) { 79 if len(a.topPairs) == 0 { 80 a.topPairs = []aggregation.TextOccurrence{elem} 81 return 82 } 83 84 added := false 85 for i, pair := range a.topPairs { 86 if pair.Occurs > elem.Occurs { 87 continue 88 } 89 // if number of occurrences is the same, 90 // skip if string is after one in topPairs 91 if pair.Occurs == elem.Occurs && pair.Value < elem.Value { 92 continue 93 } 94 95 // we have found the first one that's smaller so me must insert before i 96 a.topPairs = append( 97 a.topPairs[:i], append( 98 []aggregation.TextOccurrence{elem}, 99 a.topPairs[i:]..., 100 )..., 101 ) 102 103 added = true 104 break 105 } 106 107 if len(a.topPairs) > a.max { 108 a.topPairs = a.topPairs[:len(a.topPairs)-1] 109 } 110 111 if !added && len(a.topPairs) < a.max { 112 a.topPairs = append(a.topPairs, elem) 113 } 114 } 115 116 func (a *textAggregator) Res() aggregation.Text { 117 out := aggregation.Text{} 118 if a.count == 0 { 119 return out 120 } 121 122 for value, count := range a.itemCounter { 123 a.insertOrdered(aggregation.TextOccurrence{ 124 Value: value, 125 Occurs: count, 126 }) 127 } 128 129 out.Items = a.topPairs 130 sort.SliceStable(out.Items, func(a, b int) bool { 131 countA := out.Items[a].Occurs 132 countB := out.Items[b].Occurs 133 134 if countA != countB { 135 return countA > countB 136 } 137 138 valueA := out.Items[a].Value 139 valueB := out.Items[b].Value 140 if len(valueA) == 0 || len(valueB) == 0 { 141 return false // order doesn't matter in this case, just prevent a panic 142 } 143 144 return valueA[0] < valueB[0] 145 }) 146 147 out.Count = int(a.count) 148 return out 149 }