Spaces:
Running
Running
File size: 3,388 Bytes
b110593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package aggregator
import (
"sort"
"github.com/pkg/errors"
"github.com/weaviate/weaviate/entities/aggregation"
"github.com/weaviate/weaviate/entities/schema"
"github.com/weaviate/weaviate/entities/storobj"
)
func extractLimitFromTopOccs(aggs []aggregation.Aggregator) int {
for _, agg := range aggs {
if agg.Type == aggregation.TopOccurrencesType && agg.Limit != nil {
return *agg.Limit
}
}
// we couldn't extract a limit, default to something reasonable
return 5
}
func newTextAggregator(limit int) *textAggregator {
return &textAggregator{itemCounter: map[string]int{}, max: limit}
}
type textAggregator struct {
max int
count uint64
itemCounter map[string]int
// always keep sorted, so we can cut off the last elem, when it grows larger
// than max
topPairs []aggregation.TextOccurrence
}
func (a *Aggregator) parseAndAddTextRow(agg *textAggregator,
v []byte, propName schema.PropertyName,
) error {
items, ok, err := storobj.ParseAndExtractTextProp(v, propName.String())
if err != nil {
return errors.Wrap(err, "parse and extract prop")
}
if !ok {
return nil
}
for i := range items {
if err := agg.AddText(items[i]); err != nil {
return err
}
}
return nil
}
func (a *textAggregator) AddText(value string) error {
a.count++
itemCount := a.itemCounter[value]
itemCount++
a.itemCounter[value] = itemCount
return nil
}
func (a *textAggregator) insertOrdered(elem aggregation.TextOccurrence) {
if len(a.topPairs) == 0 {
a.topPairs = []aggregation.TextOccurrence{elem}
return
}
added := false
for i, pair := range a.topPairs {
if pair.Occurs > elem.Occurs {
continue
}
// if number of occurrences is the same,
// skip if string is after one in topPairs
if pair.Occurs == elem.Occurs && pair.Value < elem.Value {
continue
}
// we have found the first one that's smaller so me must insert before i
a.topPairs = append(
a.topPairs[:i], append(
[]aggregation.TextOccurrence{elem},
a.topPairs[i:]...,
)...,
)
added = true
break
}
if len(a.topPairs) > a.max {
a.topPairs = a.topPairs[:len(a.topPairs)-1]
}
if !added && len(a.topPairs) < a.max {
a.topPairs = append(a.topPairs, elem)
}
}
func (a *textAggregator) Res() aggregation.Text {
out := aggregation.Text{}
if a.count == 0 {
return out
}
for value, count := range a.itemCounter {
a.insertOrdered(aggregation.TextOccurrence{
Value: value,
Occurs: count,
})
}
out.Items = a.topPairs
sort.SliceStable(out.Items, func(a, b int) bool {
countA := out.Items[a].Occurs
countB := out.Items[b].Occurs
if countA != countB {
return countA > countB
}
valueA := out.Items[a].Value
valueB := out.Items[b].Value
if len(valueA) == 0 || len(valueB) == 0 {
return false // order doesn't matter in this case, just prevent a panic
}
return valueA[0] < valueB[0]
})
out.Count = int(a.count)
return out
}
|