KevinStephenson
Adding in weaviate code
b110593
raw
history blame
3.39 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package aggregator
import (
"sort"
"github.com/pkg/errors"
"github.com/weaviate/weaviate/entities/aggregation"
"github.com/weaviate/weaviate/entities/schema"
"github.com/weaviate/weaviate/entities/storobj"
)
func extractLimitFromTopOccs(aggs []aggregation.Aggregator) int {
for _, agg := range aggs {
if agg.Type == aggregation.TopOccurrencesType && agg.Limit != nil {
return *agg.Limit
}
}
// we couldn't extract a limit, default to something reasonable
return 5
}
func newTextAggregator(limit int) *textAggregator {
return &textAggregator{itemCounter: map[string]int{}, max: limit}
}
type textAggregator struct {
max int
count uint64
itemCounter map[string]int
// always keep sorted, so we can cut off the last elem, when it grows larger
// than max
topPairs []aggregation.TextOccurrence
}
func (a *Aggregator) parseAndAddTextRow(agg *textAggregator,
v []byte, propName schema.PropertyName,
) error {
items, ok, err := storobj.ParseAndExtractTextProp(v, propName.String())
if err != nil {
return errors.Wrap(err, "parse and extract prop")
}
if !ok {
return nil
}
for i := range items {
if err := agg.AddText(items[i]); err != nil {
return err
}
}
return nil
}
func (a *textAggregator) AddText(value string) error {
a.count++
itemCount := a.itemCounter[value]
itemCount++
a.itemCounter[value] = itemCount
return nil
}
func (a *textAggregator) insertOrdered(elem aggregation.TextOccurrence) {
if len(a.topPairs) == 0 {
a.topPairs = []aggregation.TextOccurrence{elem}
return
}
added := false
for i, pair := range a.topPairs {
if pair.Occurs > elem.Occurs {
continue
}
// if number of occurrences is the same,
// skip if string is after one in topPairs
if pair.Occurs == elem.Occurs && pair.Value < elem.Value {
continue
}
// we have found the first one that's smaller so me must insert before i
a.topPairs = append(
a.topPairs[:i], append(
[]aggregation.TextOccurrence{elem},
a.topPairs[i:]...,
)...,
)
added = true
break
}
if len(a.topPairs) > a.max {
a.topPairs = a.topPairs[:len(a.topPairs)-1]
}
if !added && len(a.topPairs) < a.max {
a.topPairs = append(a.topPairs, elem)
}
}
func (a *textAggregator) Res() aggregation.Text {
out := aggregation.Text{}
if a.count == 0 {
return out
}
for value, count := range a.itemCounter {
a.insertOrdered(aggregation.TextOccurrence{
Value: value,
Occurs: count,
})
}
out.Items = a.topPairs
sort.SliceStable(out.Items, func(a, b int) bool {
countA := out.Items[a].Occurs
countB := out.Items[b].Occurs
if countA != countB {
return countA > countB
}
valueA := out.Items[a].Value
valueB := out.Items[b].Value
if len(valueA) == 0 || len(valueB) == 0 {
return false // order doesn't matter in this case, just prevent a panic
}
return valueA[0] < valueB[0]
})
out.Count = int(a.count)
return out
}