SemanticSearchPOC / adapters /repos /db /aggregations_integration_test.go
KevinStephenson
Adding in weaviate code
b110593
raw
history blame
72.6 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
//go:build integrationTest
// +build integrationTest
package db
import (
"context"
"fmt"
"testing"
"time"
"github.com/go-openapi/strfmt"
"github.com/google/uuid"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/weaviate/weaviate/entities/aggregation"
"github.com/weaviate/weaviate/entities/filters"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
)
func Test_Aggregations(t *testing.T) {
dirName := t.TempDir()
shardState := singleShardState()
logger := logrus.New()
schemaGetter := &fakeSchemaGetter{
schema: schema.Schema{Objects: &models.Schema{Classes: nil}},
shardState: shardState,
}
repo, err := New(logger, Config{
MemtablesFlushIdleAfter: 60,
RootPath: dirName,
QueryMaximumResults: 10000,
MaxImportGoroutinesFactor: 1,
}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
require.Nil(t, err)
repo.SetSchemaGetter(schemaGetter)
require.Nil(t, repo.WaitForStartup(testCtx()))
migrator := NewMigrator(repo, logger)
t.Run("prepare test schema and data ",
prepareCompanyTestSchemaAndData(repo, migrator, schemaGetter))
t.Run("numerical aggregations with grouping",
testNumericalAggregationsWithGrouping(repo, true))
t.Run("numerical aggregations without grouping (formerly Meta)",
testNumericalAggregationsWithoutGrouping(repo, true))
t.Run("numerical aggregations with filters",
testNumericalAggregationsWithFilters(repo))
t.Run("date aggregations with grouping",
testDateAggregationsWithGrouping(repo, true))
t.Run("date aggregations without grouping",
testDateAggregationsWithoutGrouping(repo, true))
t.Run("date aggregations with filters",
testDateAggregationsWithFilters(repo))
t.Run("clean up",
cleanupCompanyTestSchemaAndData(repo, migrator))
}
func Test_Aggregations_MultiShard(t *testing.T) {
dirName := t.TempDir()
shardState := fixedMultiShardState()
logger := logrus.New()
schemaGetter := &fakeSchemaGetter{
schema: schema.Schema{Objects: &models.Schema{Classes: nil}},
shardState: shardState,
}
repo, err := New(logger, Config{
MemtablesFlushIdleAfter: 60,
RootPath: dirName,
QueryMaximumResults: 10000,
MaxImportGoroutinesFactor: 1,
}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
require.Nil(t, err)
repo.SetSchemaGetter(schemaGetter)
require.Nil(t, repo.WaitForStartup(testCtx()))
migrator := NewMigrator(repo, logger)
t.Run("prepare test schema and data ",
prepareCompanyTestSchemaAndData(repo, migrator, schemaGetter))
t.Run("numerical aggregations with grouping",
testNumericalAggregationsWithGrouping(repo, false))
t.Run("numerical aggregations without grouping (formerly Meta)",
testNumericalAggregationsWithoutGrouping(repo, false))
t.Run("numerical aggregations with filters",
testNumericalAggregationsWithFilters(repo))
t.Run("date aggregations with grouping",
testDateAggregationsWithGrouping(repo, true))
t.Run("date aggregations without grouping",
testDateAggregationsWithoutGrouping(repo, true))
t.Run("date aggregations with filters",
testDateAggregationsWithFilters(repo))
t.Run("clean up",
cleanupCompanyTestSchemaAndData(repo, migrator))
}
func prepareCompanyTestSchemaAndData(repo *DB,
migrator *Migrator, schemaGetter *fakeSchemaGetter,
) func(t *testing.T) {
return func(t *testing.T) {
schema := schema.Schema{
Objects: &models.Schema{
Classes: []*models.Class{
productClass,
companyClass,
arrayTypesClass,
customerClass,
},
},
}
schemaGetter.schema = schema
t.Run("creating the class", func(t *testing.T) {
require.Nil(t,
migrator.AddClass(context.Background(), productClass, schemaGetter.shardState))
require.Nil(t,
migrator.AddClass(context.Background(), companyClass, schemaGetter.shardState))
require.Nil(t,
migrator.AddClass(context.Background(), arrayTypesClass, schemaGetter.shardState))
require.Nil(t,
migrator.AddClass(context.Background(), customerClass, schemaGetter.shardState))
})
schemaGetter.schema = schema
t.Run("import products", func(t *testing.T) {
for i, schema := range products {
t.Run(fmt.Sprintf("importing product %d", i), func(t *testing.T) {
fixture := models.Object{
Class: productClass.Class,
ID: productsIds[i],
Properties: schema,
}
require.Nil(t,
repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.2, 0.01, 0.2}, nil))
})
}
})
t.Run("import companies", func(t *testing.T) {
for j := 0; j < importFactor; j++ {
for i, schema := range companies {
t.Run(fmt.Sprintf("importing company %d", i), func(t *testing.T) {
fixture := models.Object{
Class: companyClass.Class,
ID: companyIDs[j*(importFactor-1)+i],
Properties: schema,
}
require.Nil(t,
repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.1, 0.1, 0.1}, nil))
})
}
}
})
t.Run("import array types", func(t *testing.T) {
for i, schema := range arrayTypes {
t.Run(fmt.Sprintf("importing array type %d", i), func(t *testing.T) {
fixture := models.Object{
Class: arrayTypesClass.Class,
ID: strfmt.UUID(uuid.Must(uuid.NewRandom()).String()),
Properties: schema,
}
require.Nil(t,
repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.1, 0.1, 0.1}, nil))
})
}
})
t.Run("import customers", func(t *testing.T) {
for i, schema := range customers {
t.Run(fmt.Sprintf("importing customer #%d", i), func(t *testing.T) {
fixture := models.Object{
Class: customerClass.Class,
ID: strfmt.UUID(uuid.Must(uuid.NewRandom()).String()),
Properties: schema,
}
require.Nil(t,
repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.1, 0.1, 0.1}, nil))
})
}
})
}
}
func cleanupCompanyTestSchemaAndData(repo *DB,
migrator *Migrator,
) func(t *testing.T) {
return func(t *testing.T) {
assert.Nil(t, repo.Shutdown(context.Background()))
}
}
func testNumericalAggregationsWithGrouping(repo *DB, exact bool) func(t *testing.T) {
return func(t *testing.T) {
epsilon := 0.1
if !exact {
epsilon = 1.0
}
t.Run("single field, single aggregator", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(companyClass.Class),
Property: schema.PropertyName("sector"),
},
IncludeMetaCount: true,
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 60,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"sector"},
Value: "Food",
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.066666666666666,
},
},
},
},
{
Count: 30,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"sector"},
Value: "Financials",
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.1999999999999999,
},
},
},
},
},
}
require.Equal(t, len(expectedResult.Groups), len(res.Groups))
for i := 0; i <= 1; i++ {
assert.Equal(t, expectedResult.Groups[i].Count,
res.Groups[i].Count)
expectedDivYield := expectedResult.Groups[i].Properties["dividendYield"]
actualDivYield := res.Groups[i].Properties["dividendYield"]
assert.InEpsilon(t, expectedDivYield.NumericalAggregations["mean"],
actualDivYield.NumericalAggregations["mean"], epsilon)
}
})
t.Run("grouping by a non-numerical, non-string prop", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(companyClass.Class),
Property: schema.PropertyName("listedInIndex"),
},
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 80,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"listedInIndex"},
Value: true,
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.375,
},
},
},
},
{
Count: 10,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"listedInIndex"},
Value: false,
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 0.0,
},
},
},
},
},
}
// there is now way to use InEpsilon or InDelta on nested structs with
// testify, so unfortunately we have to do a manual deep equal:
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count)
assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy)
assert.InEpsilon(t, expectedResult.Groups[0].Properties["dividendYield"].
NumericalAggregations["mean"],
res.Groups[0].Properties["dividendYield"].NumericalAggregations["mean"],
epsilon)
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[1].Count, res.Groups[1].Count)
assert.Equal(t, expectedResult.Groups[1].GroupedBy, res.Groups[1].GroupedBy)
assert.InDelta(t, expectedResult.Groups[1].Properties["dividendYield"].
NumericalAggregations["mean"],
res.Groups[1].Properties["dividendYield"].NumericalAggregations["mean"],
epsilon)
})
t.Run("multiple fields, multiple aggregators, grouped by string", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(companyClass.Class),
Property: schema.PropertyName("sector"),
},
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator,
},
},
{
Name: schema.PropertyName("price"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
// aggregation.ModeAggregator, // ignore as there is no most common value
aggregation.MedianAggregator,
aggregation.CountAggregator,
},
},
{
Name: schema.PropertyName("listedInIndex"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.PercentageTrueAggregator,
aggregation.PercentageFalseAggregator,
aggregation.TotalTrueAggregator,
aggregation.TotalFalseAggregator,
},
},
{
Name: schema.PropertyName("location"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.NewTopOccurrencesAggregator(ptInt(5)),
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 60,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"sector"},
Value: "Food",
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.06667,
"maximum": 8.0,
"minimum": 0.0,
"sum": 124,
"mode": 0.,
"median": 1.1,
"count": 60,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 218.33333,
"maximum": 800.,
"minimum": 10.,
"sum": 13100.,
// "mode": 70,
"median": 115,
"count": 60,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 50,
TotalFalse: 10,
PercentageTrue: 0.8333333333333334,
PercentageFalse: 0.16666666666666666,
Count: 60,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 60,
Items: []aggregation.TextOccurrence{
{
Value: "Atlanta",
Occurs: 20,
},
{
Value: "Detroit",
Occurs: 10,
},
{
Value: "Los Angeles",
Occurs: 10,
},
{
Value: "New York",
Occurs: 10,
},
{
Value: "San Francisco",
Occurs: 10,
},
},
},
},
},
},
{
Count: 30,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"sector"},
Value: "Financials",
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.2,
"maximum": 4.0,
"minimum": 1.3,
"sum": 66.,
"mode": 1.3,
"median": 1.3,
"count": 30,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 265.66667,
"maximum": 600.,
"minimum": 47.,
"sum": 7970.,
// "mode": 47,
"median": 150.,
"count": 30.,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 30,
TotalFalse: 0,
PercentageTrue: 1,
PercentageFalse: 0,
Count: 30,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 30,
Items: []aggregation.TextOccurrence{
{
Value: "New York",
Occurs: 20,
},
{
Value: "San Francisco",
Occurs: 10,
},
},
},
},
},
},
},
}
// there is now way to use InEpsilon or InDelta on nested structs with
// testify, so unfortunately we have to do a manual deep equal:
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count)
assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy)
expectedProps := expectedResult.Groups[0].Properties
actualProps := res.Groups[0].Properties
assert.Equal(t, expectedProps["location"].TextAggregation.Count,
actualProps["location"].TextAggregation.Count)
assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items,
actualProps["location"].TextAggregation.Items)
assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"])
assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations,
actualProps["dividendYield"].NumericalAggregations, epsilon*100)
assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations,
actualProps["price"].NumericalAggregations, epsilon*100)
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[1].Count, res.Groups[1].Count)
assert.Equal(t, expectedResult.Groups[1].GroupedBy, res.Groups[1].GroupedBy)
expectedProps = expectedResult.Groups[1].Properties
actualProps = res.Groups[1].Properties
assert.Equal(t, expectedProps["location"], actualProps["location"])
assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"])
assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations,
actualProps["dividendYield"].NumericalAggregations, epsilon*100)
assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations,
actualProps["price"].NumericalAggregations, epsilon*500)
})
t.Run("with filters, grouped by string", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(companyClass.Class),
Property: schema.PropertyName("sector"),
},
Filters: &filters.LocalFilter{
Root: &filters.Clause{
Operator: filters.OperatorLessThan,
Value: &filters.Value{
Type: schema.DataTypeInt,
Value: 600,
},
On: &filters.Path{
Property: "price",
},
},
},
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
// aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator,
},
},
{
Name: schema.PropertyName("price"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
// aggregation.ModeAggregator, // ignore as there is no most common value
aggregation.MedianAggregator,
aggregation.CountAggregator,
},
},
{
Name: schema.PropertyName("listedInIndex"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.PercentageTrueAggregator,
aggregation.PercentageFalseAggregator,
aggregation.TotalTrueAggregator,
aggregation.TotalFalseAggregator,
},
},
{
Name: schema.PropertyName("location"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.NewTopOccurrencesAggregator(ptInt(5)),
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 50,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"sector"},
Value: "Food",
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.48,
"maximum": 8.0,
"minimum": 0.0,
"sum": 124.,
"median": 1.3,
"count": 50,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 102.,
"maximum": 200.,
"minimum": 10.,
"sum": 5100.,
"median": 70.,
"count": 50.,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 50,
TotalFalse: 0,
PercentageTrue: 1,
PercentageFalse: 0,
Count: 50,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 50,
Items: []aggregation.TextOccurrence{
{
Value: "Atlanta",
Occurs: 20,
},
{
Value: "Detroit",
Occurs: 10,
},
{
Value: "New York",
Occurs: 10,
},
{
Value: "San Francisco",
Occurs: 10,
},
},
},
},
},
},
{
Count: 20,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"sector"},
Value: "Financials",
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 1.3,
"maximum": 1.3,
"minimum": 1.3,
"sum": 26.,
"median": 1.3,
"count": 20.,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 98.5,
"maximum": 150.,
"minimum": 47.,
"sum": 1970.,
"median": 98.5,
"count": 20.,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 20,
TotalFalse: 0,
PercentageTrue: 1,
PercentageFalse: 0,
Count: 20,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 20,
Items: []aggregation.TextOccurrence{
{
Value: "New York",
Occurs: 10,
},
{
Value: "San Francisco",
Occurs: 10,
},
},
},
},
},
},
},
}
// there is now way to use InEpsilon or InDelta on nested structs with
// testify, so unfortunately we have to do a manual deep equal:
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count)
assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy)
expectedProps := expectedResult.Groups[0].Properties
actualProps := res.Groups[0].Properties
assert.Equal(t, expectedProps["location"].TextAggregation.Count,
actualProps["location"].TextAggregation.Count)
assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items,
actualProps["location"].TextAggregation.Items)
assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"])
assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations,
actualProps["dividendYield"].NumericalAggregations, epsilon*100)
assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations,
actualProps["price"].NumericalAggregations, epsilon*100)
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[1].Count, res.Groups[1].Count)
assert.Equal(t, expectedResult.Groups[1].GroupedBy, res.Groups[1].GroupedBy)
expectedProps = expectedResult.Groups[1].Properties
actualProps = res.Groups[1].Properties
assert.Equal(t, expectedProps["location"].TextAggregation.Count,
actualProps["location"].TextAggregation.Count)
assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items,
actualProps["location"].TextAggregation.Items)
assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"])
assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations,
actualProps["dividendYield"].NumericalAggregations, epsilon*100)
assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations,
actualProps["price"].NumericalAggregations, epsilon*100)
})
t.Run("no filters, grouped by ref prop", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(companyClass.Class),
Property: schema.PropertyName("makesProduct"),
},
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
// aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator,
},
},
{
Name: schema.PropertyName("price"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
// aggregation.ModeAggregator, // ignore as there is no most common value
aggregation.MedianAggregator,
aggregation.CountAggregator,
},
},
{
Name: schema.PropertyName("listedInIndex"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.PercentageTrueAggregator,
aggregation.PercentageFalseAggregator,
aggregation.TotalTrueAggregator,
aggregation.TotalFalseAggregator,
},
},
{
Name: schema.PropertyName("location"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.NewTopOccurrencesAggregator(ptInt(5)),
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 10,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"makesProduct"},
Value: strfmt.URI("weaviate://localhost/1295c052-263d-4aae-99dd-920c5a370d06"),
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 8.0,
"maximum": 8.0,
"minimum": 8.0,
"sum": 80.0,
"median": 8.0,
"count": 10.,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 10.,
"maximum": 10.,
"minimum": 10.,
"sum": 100.,
"median": 10.,
"count": 10.,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 10,
TotalFalse: 0,
PercentageTrue: 1,
PercentageFalse: 0,
Count: 10,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 10,
Items: []aggregation.TextOccurrence{
{
Value: "Detroit",
Occurs: 10,
},
},
},
},
},
},
},
}
// there is now way to use InEpsilon or InDelta on nested structs with
// testify, so unfortunately we have to do a manual deep equal:
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count)
assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy)
expectedProps := expectedResult.Groups[0].Properties
actualProps := res.Groups[0].Properties
assert.Equal(t, expectedProps["location"].TextAggregation.Count,
actualProps["location"].TextAggregation.Count)
assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items,
actualProps["location"].TextAggregation.Items)
assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"])
assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations,
actualProps["dividendYield"].NumericalAggregations, epsilon*100)
assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations,
actualProps["price"].NumericalAggregations, epsilon*100)
})
t.Run("with ref filter, grouped by string", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(companyClass.Class),
Property: schema.PropertyName("sector"),
},
Filters: &filters.LocalFilter{
Root: &filters.Clause{
Operator: filters.OperatorEqual,
Value: &filters.Value{
Type: schema.DataTypeText,
Value: "Superbread",
},
On: &filters.Path{
Property: "makesProduct",
Child: &filters.Path{
Class: "AggregationsTestProduct",
Property: "name",
},
},
},
},
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
// aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator,
},
},
{
Name: schema.PropertyName("price"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
// aggregation.ModeAggregator, // ignore as there is no most common value
aggregation.MedianAggregator,
aggregation.CountAggregator,
},
},
{
Name: schema.PropertyName("listedInIndex"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.PercentageTrueAggregator,
aggregation.PercentageFalseAggregator,
aggregation.TotalTrueAggregator,
aggregation.TotalFalseAggregator,
},
},
{
Name: schema.PropertyName("location"),
Aggregators: []aggregation.Aggregator{
aggregation.TypeAggregator,
aggregation.NewTopOccurrencesAggregator(ptInt(5)),
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
require.NotNil(t, res)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 10,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"sector"},
Value: "Food",
},
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 8.0,
"maximum": 8.0,
"minimum": 8.0,
"sum": 80.,
"median": 8.0,
"count": 10.,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 10.,
"maximum": 10.,
"minimum": 10.,
"sum": 100.,
"median": 10.,
"count": 10.,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 10,
TotalFalse: 0,
PercentageTrue: 1,
PercentageFalse: 0,
Count: 10,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 10,
Items: []aggregation.TextOccurrence{
{
Value: "Detroit",
Occurs: 10,
},
},
},
},
},
},
},
}
// there is now way to use InEpsilon or InDelta on nested structs with
// testify, so unfortunately we have to do a manual deep equal:
assert.Equal(t, len(res.Groups), len(expectedResult.Groups))
assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count)
assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy)
expectedProps := expectedResult.Groups[0].Properties
actualProps := res.Groups[0].Properties
assert.Equal(t, expectedProps["location"], actualProps["location"])
assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"])
assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations,
actualProps["dividendYield"].NumericalAggregations, 0.001)
assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations,
actualProps["price"].NumericalAggregations, 0.001)
})
t.Run("array types, single aggregator strings", func(t *testing.T) {
if !exact {
t.Skip()
}
params := aggregation.Params{
ClassName: schema.ClassName(arrayTypesClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(arrayTypesClass.Class),
Property: schema.PropertyName("strings"),
},
IncludeMetaCount: true,
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 2,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"strings"},
Value: "a",
},
Properties: map[string]aggregation.Property{},
},
{
Count: 1,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"strings"},
Value: "b",
},
Properties: map[string]aggregation.Property{},
},
{
Count: 1,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"strings"},
Value: "c",
},
Properties: map[string]aggregation.Property{},
},
},
}
assert.ElementsMatch(t, expectedResult.Groups, res.Groups)
})
t.Run("array types, single aggregator numbers", func(t *testing.T) {
if !exact {
t.Skip()
}
params := aggregation.Params{
ClassName: schema.ClassName(arrayTypesClass.Class),
GroupBy: &filters.Path{
Class: schema.ClassName(arrayTypesClass.Class),
Property: schema.PropertyName("numbers"),
},
IncludeMetaCount: true,
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 2,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"numbers"},
Value: float64(1.0),
},
Properties: map[string]aggregation.Property{},
},
{
Count: 2,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"numbers"},
Value: float64(2.0),
},
Properties: map[string]aggregation.Property{},
},
{
Count: 1,
GroupedBy: &aggregation.GroupedBy{
Path: []string{"numbers"},
Value: float64(3.0),
},
Properties: map[string]aggregation.Property{},
},
},
}
assert.ElementsMatch(t, expectedResult.Groups, res.Groups)
})
}
}
func testDateAggregationsWithFilters(repo *DB) func(t *testing.T) {
return func(t *testing.T) {
t.Run("Aggregations with filter that matches nothing", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(customerClass.Class),
Filters: &filters.LocalFilter{
Root: &filters.Clause{
Operator: filters.OperatorGreaterThan,
Value: &filters.Value{
Type: schema.DataTypeDate,
Value: "0312-06-16T17:30:17.231346Z", // hello roman empire!
},
On: &filters.Path{
Property: "timeArrived",
},
},
},
IncludeMetaCount: true,
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("timeArrived"),
Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator, aggregation.CountAggregator, aggregation.MaximumAggregator, aggregation.MedianAggregator, aggregation.MinimumAggregator, aggregation.ModeAggregator, aggregation.TypeAggregator},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
// No results match the filter, so only a count of 0 is included
require.Nil(t, err)
require.Equal(t, 1, len(res.Groups))
require.Equal(t, 1, len(res.Groups[0].Properties))
require.Equal(t, 1, len(res.Groups[0].Properties["timeArrived"].DateAggregations))
require.Equal(t, int64(0), res.Groups[0].Properties["timeArrived"].DateAggregations["count"].(int64))
})
}
}
func testNumericalAggregationsWithFilters(repo *DB) func(t *testing.T) {
return func(t *testing.T) {
t.Run("Aggregations with filter that matches nothing", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
Filters: &filters.LocalFilter{
Root: &filters.Clause{
Operator: filters.OperatorLessThan,
Value: &filters.Value{
Type: schema.DataTypeInt,
Value: -5, // price is positive everywhere
},
On: &filters.Path{
Property: "price",
},
},
},
IncludeMetaCount: true,
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator, aggregation.CountAggregator, aggregation.MaximumAggregator, aggregation.MedianAggregator, aggregation.MinimumAggregator, aggregation.ModeAggregator, aggregation.TypeAggregator},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
// No results match the filter, so only a count of 0 is included
require.Nil(t, err)
require.Equal(t, 1, len(res.Groups))
require.Equal(t, 1, len(res.Groups[0].Properties))
require.Equal(t, 1, len(res.Groups[0].Properties["dividendYield"].NumericalAggregations))
require.Equal(t, float64(0), res.Groups[0].Properties["dividendYield"].NumericalAggregations["count"].(float64))
})
}
}
func testNumericalAggregationsWithoutGrouping(repo *DB,
exact bool,
) func(t *testing.T) {
return func(t *testing.T) {
t.Run("only meta count, no other aggregations", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
IncludeMetaCount: true,
GroupBy: nil, // explicitly set to nil
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
GroupedBy: nil,
Count: 90,
},
},
}
require.NotNil(t, res)
assert.Equal(t, expectedResult.Groups, res.Groups)
})
t.Run("single field, single aggregator", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: nil, // explicitly set to nil
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
if exact {
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
GroupedBy: nil,
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.111111111111111,
},
},
},
},
},
}
assert.Equal(t, expectedResult.Groups, res.Groups)
} else {
require.Len(t, res.Groups, 1)
divYield := res.Groups[0].Properties["dividendYield"]
assert.Equal(t, aggregation.PropertyTypeNumerical, divYield.Type)
assert.InDelta(t, 2.1111, divYield.NumericalAggregations["mean"], 2)
}
})
t.Run("multiple fields, multiple aggregators", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: nil, // explicitly set to nil,
IncludeMetaCount: true,
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("price"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("listedInIndex"),
Aggregators: []aggregation.Aggregator{
aggregation.PercentageTrueAggregator,
aggregation.PercentageFalseAggregator,
aggregation.TotalTrueAggregator,
aggregation.TotalFalseAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("location"),
Aggregators: []aggregation.Aggregator{
// limit is so high, it's not really restrictive
aggregation.NewTopOccurrencesAggregator(ptInt(10)),
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("sector"),
Aggregators: []aggregation.Aggregator{
// limit is very restrictive
aggregation.NewTopOccurrencesAggregator(ptInt(1)),
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
// we are not expecting any result from the following agg, as this is
// handled in the usecase. However, we at least want to make sure it
// doesn't block or lead to any errors
{
Name: schema.PropertyName("makesProduct"),
Aggregators: []aggregation.Aggregator{
aggregation.PointingToAggregator,
aggregation.TypeAggregator,
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 90, // because includeMetaCount was set
Properties: map[string]aggregation.Property{
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.111111111111111,
"maximum": 8.0,
"minimum": 0.0,
"sum": 190.,
"mode": 1.3,
"median": 1.3,
"count": 90.,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 234.11111111111111,
"maximum": 800.,
"minimum": 10.,
"sum": 21070.,
"mode": 70.,
"median": 150.,
"count": 90.,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 80,
TotalFalse: 10,
PercentageTrue: 0.8888888888888888,
PercentageFalse: 0.1111111111111111,
Count: 90,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 90,
Items: []aggregation.TextOccurrence{
{
Value: "New York",
Occurs: 30,
},
{
Value: "Atlanta",
Occurs: 20,
},
{
Value: "San Francisco",
Occurs: 20,
},
{
Value: "Detroit",
Occurs: 10,
},
{
Value: "Los Angeles",
Occurs: 10,
},
},
},
},
"sector": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 90,
Items: []aggregation.TextOccurrence{
{
Value: "Food",
Occurs: 60,
},
},
},
},
},
},
},
}
if exact {
assert.Equal(t, expectedResult.Groups, res.Groups)
} else {
t.Run("numerical fields", func(t *testing.T) {
aggs := res.Groups[0].Properties["dividendYield"].NumericalAggregations
expextedAggs := expectedResult.Groups[0].Properties["dividendYield"].NumericalAggregations
// max, min, count, sum are always exact matches, but we need an
// epsilon check because of floating point arithmetics
assert.InEpsilon(t, expextedAggs["maximum"], aggs["maximum"], 0.1)
assert.Equal(t, expextedAggs["minimum"], aggs["minimum"]) // equal because the result == 0
assert.InEpsilon(t, expextedAggs["count"], aggs["count"], 0.1)
assert.InEpsilon(t, expextedAggs["sum"], aggs["sum"], 0.1)
// mean, mode, median are always fuzzy
assert.InDelta(t, expextedAggs["mean"], aggs["mean"], 2)
assert.InDelta(t, expextedAggs["mode"], aggs["mode"], 2)
assert.InDelta(t, expextedAggs["median"], aggs["median"], 2)
})
t.Run("int fields", func(t *testing.T) {
aggs := res.Groups[0].Properties["price"].NumericalAggregations
expextedAggs := expectedResult.Groups[0].Properties["price"].NumericalAggregations
// max, min, count, sum are always exact matches, but we need an
// epsilon check because of floating point arithmetics
assert.InEpsilon(t, expextedAggs["maximum"], aggs["maximum"], 0.1)
assert.InEpsilon(t, expextedAggs["minimum"], aggs["minimum"], 0.1)
assert.InEpsilon(t, expextedAggs["count"], aggs["count"], 0.1)
assert.InEpsilon(t, expextedAggs["sum"], aggs["sum"], 0.1)
// mean, mode, median are always fuzzy
assert.InEpsilon(t, expextedAggs["mean"], aggs["mean"], 0.5, "mean")
assert.InEpsilon(t, expextedAggs["mode"], aggs["mode"], 10, "mode")
assert.InEpsilon(t, expextedAggs["median"], aggs["median"], 0.5, "median")
})
t.Run("boolean fields", func(t *testing.T) {
aggs := res.Groups[0].Properties["listedInIndex"].BooleanAggregation
expectedAggs := expectedResult.Groups[0].Properties["listedInIndex"].BooleanAggregation
assert.InEpsilon(t, expectedAggs.TotalTrue, aggs.TotalTrue, 0.1)
assert.InEpsilon(t, expectedAggs.TotalFalse, aggs.TotalFalse, 0.1)
assert.InEpsilon(t, expectedAggs.PercentageTrue, aggs.PercentageTrue, 0.1)
assert.InEpsilon(t, expectedAggs.PercentageFalse, aggs.PercentageFalse, 0.1)
assert.InEpsilon(t, expectedAggs.Count, aggs.Count, 0.1)
})
t.Run("text fields (location)", func(t *testing.T) {
aggs := res.Groups[0].Properties["location"].TextAggregation
expectedAggs := expectedResult.Groups[0].Properties["location"].TextAggregation
assert.Equal(t, expectedAggs.Count, aggs.Count)
assert.ElementsMatch(t, expectedAggs.Items, aggs.Items)
})
t.Run("text fields (sector)", func(t *testing.T) {
aggs := res.Groups[0].Properties["sector"].TextAggregation
expectedAggs := expectedResult.Groups[0].Properties["sector"].TextAggregation
assert.Equal(t, expectedAggs.Count, aggs.Count)
assert.ElementsMatch(t, expectedAggs.Items, aggs.Items)
})
}
})
t.Run("multiple fields, multiple aggregators, single-level filter", func(t *testing.T) {
if !exact {
// filtering is happening inside a shard, so there is no need to test
// this again for multi-sharding. This saves us from adapting all the
// assertions to work with fuzzy values
t.Skip()
}
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: nil, // explicitly set to nil,
Filters: sectorEqualsFoodFilter(),
IncludeMetaCount: true,
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("price"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("listedInIndex"),
Aggregators: []aggregation.Aggregator{
aggregation.PercentageTrueAggregator,
aggregation.PercentageFalseAggregator,
aggregation.TotalTrueAggregator,
aggregation.TotalFalseAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("location"),
Aggregators: []aggregation.Aggregator{
// limit is so high, it's not really restrictive
aggregation.NewTopOccurrencesAggregator(ptInt(10)),
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("sector"),
Aggregators: []aggregation.Aggregator{
// limit is very restrictive
aggregation.NewTopOccurrencesAggregator(ptInt(1)),
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
// we are not expecting any result from the following agg, as this is
// handled in the usecase. However, we at least want to make sure it
// doesn't block or lead to any errors
{
Name: schema.PropertyName("makesProduct"),
Aggregators: []aggregation.Aggregator{
aggregation.PointingToAggregator,
aggregation.TypeAggregator,
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
actualDivYield := res.Groups[0].Properties["dividendYield"]
delete(res.Groups[0].Properties, "dividendYield")
actualPrice := res.Groups[0].Properties["price"]
delete(res.Groups[0].Properties, "price")
actualMakesProduct := res.Groups[0].Properties["makesProduct"]
delete(res.Groups[0].Properties, "makesProduct")
expectedDivYield := aggregation.Property{
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.066666666666666,
"maximum": 8.0,
"minimum": 0.0,
"sum": 124,
"mode": 0.0,
"median": 1.2,
"count": 60.,
},
}
expectedPrice := aggregation.Property{
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 218.33333333333334,
"maximum": 800.,
"minimum": 10.,
"sum": 13100.,
"mode": 70.,
"median": 115.,
"count": 60.,
},
}
expectedMakesProduct := aggregation.Property{
Type: aggregation.PropertyTypeReference,
ReferenceAggregation: aggregation.Reference{
PointingTo: []string{"weaviate://localhost/1295c052-263d-4aae-99dd-920c5a370d06"},
},
}
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 60, // because includeMetaCount was set
Properties: map[string]aggregation.Property{
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 50,
TotalFalse: 10,
PercentageTrue: 0.8333333333333334,
PercentageFalse: 0.16666666666666666,
Count: 60,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 60,
Items: []aggregation.TextOccurrence{
{
Value: "Atlanta",
Occurs: 20,
},
{
Value: "Detroit",
Occurs: 10,
},
{
Value: "Los Angeles",
Occurs: 10,
},
{
Value: "New York",
Occurs: 10,
},
{
Value: "San Francisco",
Occurs: 10,
},
},
},
},
"sector": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 60,
Items: []aggregation.TextOccurrence{
{
Value: "Food",
Occurs: 60,
},
},
},
},
},
},
},
}
assert.Equal(t, expectedResult.Groups, res.Groups)
// floating point arithmetic for numerical fields
assert.InEpsilon(t, expectedDivYield.NumericalAggregations["mean"],
actualDivYield.NumericalAggregations["mean"], 0.1)
assert.InEpsilon(t, expectedPrice.NumericalAggregations["mean"],
actualPrice.NumericalAggregations["mean"], 0.1)
assert.InEpsilon(t, expectedDivYield.NumericalAggregations["maximum"],
actualDivYield.NumericalAggregations["maximum"], 0.1)
assert.InEpsilon(t, expectedPrice.NumericalAggregations["maximum"],
actualPrice.NumericalAggregations["maximum"], 0.1)
assert.Equal(t, expectedDivYield.NumericalAggregations["minimum"],
actualDivYield.NumericalAggregations["minimum"])
assert.Equal(t, expectedPrice.NumericalAggregations["minimum"],
actualPrice.NumericalAggregations["minimum"])
assert.Equal(t, expectedDivYield.NumericalAggregations["mode"],
actualDivYield.NumericalAggregations["mode"])
assert.Equal(t, expectedPrice.NumericalAggregations["mode"],
actualPrice.NumericalAggregations["mode"])
assert.InEpsilon(t, expectedDivYield.NumericalAggregations["median"],
actualDivYield.NumericalAggregations["median"], 0.1)
assert.InEpsilon(t, expectedPrice.NumericalAggregations["median"],
actualPrice.NumericalAggregations["median"], 0.1)
assert.InEpsilon(t, expectedDivYield.NumericalAggregations["count"],
actualDivYield.NumericalAggregations["count"], 0.1)
assert.InEpsilon(t, expectedPrice.NumericalAggregations["count"],
actualPrice.NumericalAggregations["count"], 0.1)
assert.Equal(t, expectedMakesProduct.ReferenceAggregation.PointingTo,
actualMakesProduct.ReferenceAggregation.PointingTo)
})
t.Run("multiple fields, multiple aggregators, ref filter", func(t *testing.T) {
if !exact {
// filtering is happening inside a shard, so there is no need to test
// this again for multi-sharding. This saves us from adapting all the
// assertions to work with fuzzy values
t.Skip()
}
params := aggregation.Params{
ClassName: schema.ClassName(companyClass.Class),
GroupBy: nil, // explicitly set to nil,
Filters: &filters.LocalFilter{
Root: &filters.Clause{
Operator: filters.OperatorEqual,
Value: &filters.Value{
Type: schema.DataTypeText,
Value: "Superbread",
},
On: &filters.Path{
Property: "makesProduct",
Child: &filters.Path{
Class: "AggregationsTestProduct",
Property: "name",
},
},
},
},
IncludeMetaCount: true,
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("dividendYield"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("price"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("listedInIndex"),
Aggregators: []aggregation.Aggregator{
aggregation.PercentageTrueAggregator,
aggregation.PercentageFalseAggregator,
aggregation.TotalTrueAggregator,
aggregation.TotalFalseAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("location"),
Aggregators: []aggregation.Aggregator{
// limit is so high, it's not really restrictive
aggregation.NewTopOccurrencesAggregator(ptInt(10)),
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
{
Name: schema.PropertyName("sector"),
Aggregators: []aggregation.Aggregator{
// limit is very restrictive
aggregation.NewTopOccurrencesAggregator(ptInt(1)),
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
// we are not expecting any result from the following agg, as this is
// handled in the usecase. However, we at least want to make sure it
// doesn't block or lead to any errors
{
Name: schema.PropertyName("makesProduct"),
Aggregators: []aggregation.Aggregator{
aggregation.PointingToAggregator,
aggregation.TypeAggregator,
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
Count: 10,
Properties: map[string]aggregation.Property{
"makesProduct": {
Type: aggregation.PropertyTypeReference,
ReferenceAggregation: aggregation.Reference{PointingTo: []string{"weaviate://localhost/1295c052-263d-4aae-99dd-920c5a370d06"}},
},
"dividendYield": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 8.0,
"maximum": 8.0,
"minimum": 8.0,
"sum": 80.,
"mode": 8.0,
"median": 8.0,
"count": 10.,
},
},
"price": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 10.,
"maximum": 10.,
"minimum": 10.,
"sum": 100.,
"mode": 10.,
"median": 10.,
"count": 10.,
},
},
"listedInIndex": {
Type: aggregation.PropertyTypeBoolean,
BooleanAggregation: aggregation.Boolean{
TotalTrue: 10,
TotalFalse: 0,
PercentageTrue: 1,
PercentageFalse: 0,
Count: 10,
},
},
"location": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 10,
Items: []aggregation.TextOccurrence{
{
Value: "Detroit",
Occurs: 10,
},
},
},
},
"sector": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 10,
Items: []aggregation.TextOccurrence{
{
Value: "Food",
Occurs: 10,
},
},
},
},
},
},
},
}
assert.Equal(t, expectedResult.Groups, res.Groups)
})
t.Run("array types, only meta count, no other aggregations", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(arrayTypesClass.Class),
IncludeMetaCount: true,
GroupBy: nil, // explicitly set to nil
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
GroupedBy: nil,
Count: 2,
},
},
}
require.NotNil(t, res)
assert.Equal(t, expectedResult.Groups, res.Groups)
})
t.Run("array types, single aggregator numbers", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(arrayTypesClass.Class),
GroupBy: nil, // explicitly set to nil
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("numbers"),
Aggregators: []aggregation.Aggregator{
aggregation.MeanAggregator,
aggregation.MaximumAggregator,
aggregation.MinimumAggregator,
aggregation.SumAggregator,
aggregation.ModeAggregator,
aggregation.MedianAggregator,
aggregation.CountAggregator,
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
GroupedBy: nil,
Properties: map[string]aggregation.Property{
"numbers": {
Type: aggregation.PropertyTypeNumerical,
NumericalAggregations: map[string]interface{}{
"mean": 2.0,
"maximum": 3.0,
"minimum": 1.0,
"sum": 14.0,
"mode": 2.0,
"median": 2.0,
"count": 7.,
},
},
},
},
},
}
assert.Equal(t, expectedResult.Groups, res.Groups)
})
t.Run("array types, single aggregator strings", func(t *testing.T) {
if !exact {
t.Skip()
}
params := aggregation.Params{
ClassName: schema.ClassName(arrayTypesClass.Class),
GroupBy: nil, // explicitly set to nil
Properties: []aggregation.ParamProperty{
{
Name: schema.PropertyName("strings"),
Aggregators: []aggregation.Aggregator{
// limit is very restrictive
aggregation.NewTopOccurrencesAggregator(ptInt(1)),
aggregation.TypeAggregator, // ignored in the repo, but can't block
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
expectedResult := &aggregation.Result{
Groups: []aggregation.Group{
{
GroupedBy: nil,
Properties: map[string]aggregation.Property{
"strings": {
Type: aggregation.PropertyTypeText,
TextAggregation: aggregation.Text{
Count: 4,
Items: []aggregation.TextOccurrence{
{
Value: "a",
Occurs: 2,
},
},
},
},
},
},
},
}
assert.Equal(t, expectedResult.Groups, res.Groups)
})
}
}
func testDateAggregationsWithGrouping(repo *DB, exact bool) func(t *testing.T) {
return func(t *testing.T) {
t.Run("group on only unique values", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(customerClass.Class),
IncludeMetaCount: true,
GroupBy: &filters.Path{
Class: schema.ClassName(customerClass.Class),
// Each customer obj has a unique value for the `internalId` field
Property: schema.PropertyName("internalId"),
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
require.NotNil(t, res)
assert.Len(t, res.Groups, len(customers))
})
t.Run("group on only identical values", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(customerClass.Class),
IncludeMetaCount: true,
GroupBy: &filters.Path{
Class: schema.ClassName(customerClass.Class),
// Each customer obj has the same value for the `countryOfOrigin` field
Property: schema.PropertyName("countryOfOrigin"),
},
Properties: []aggregation.ParamProperty{
{
Name: "timeArrived",
Aggregators: []aggregation.Aggregator{
aggregation.CountAggregator,
aggregation.MinimumAggregator,
aggregation.MaximumAggregator,
aggregation.MedianAggregator,
aggregation.ModeAggregator,
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
require.NotNil(t, res)
assert.Len(t, res.Groups, 1)
expectedProperties := map[string]interface{}{
"count": int64(10),
"minimum": "2022-06-16T17:30:17.231346Z",
"maximum": "2022-06-16T17:30:26.451235Z",
"median": "2022-06-16T17:30:21.1179905Z",
"mode": "2022-06-16T17:30:17.231346Z",
}
receivedProperties := res.Groups[0].Properties["timeArrived"].DateAggregations
assert.EqualValues(t, expectedProperties, receivedProperties)
})
t.Run("group on some unique values", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(customerClass.Class),
IncludeMetaCount: true,
GroupBy: &filters.Path{
Class: schema.ClassName(customerClass.Class),
// should result in two groups due to bool value
Property: schema.PropertyName("isNewCustomer"),
},
Properties: []aggregation.ParamProperty{
{
Name: "timeArrived",
Aggregators: []aggregation.Aggregator{
aggregation.CountAggregator,
aggregation.MinimumAggregator,
aggregation.MaximumAggregator,
aggregation.MedianAggregator,
aggregation.ModeAggregator,
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
require.NotNil(t, res)
assert.Len(t, res.Groups, 2)
expectedResult := []aggregation.Group{
{
Properties: map[string]aggregation.Property{
"timeArrived": {
Type: "date",
DateAggregations: map[string]interface{}{
"count": int64(6),
"maximum": "2022-06-16T17:30:25.524536Z",
"median": "2022-06-16T17:30:19.6718905Z",
"minimum": "2022-06-16T17:30:17.231346Z",
"mode": "2022-06-16T17:30:17.231346Z",
},
},
},
GroupedBy: &aggregation.GroupedBy{
Value: false,
Path: []string{"isNewCustomer"},
},
Count: 6,
},
{
Properties: map[string]aggregation.Property{
"timeArrived": {
Type: "date",
DateAggregations: map[string]interface{}{
"count": int64(4),
"maximum": "2022-06-16T17:30:26.451235Z",
"median": "2022-06-16T17:30:22.224622Z",
"minimum": "2022-06-16T17:30:20.123546Z",
"mode": "2022-06-16T17:30:20.123546Z",
},
},
},
GroupedBy: &aggregation.GroupedBy{
Value: true,
Path: []string{"isNewCustomer"},
},
Count: 4,
},
}
assert.EqualValues(t, expectedResult, res.Groups)
})
}
}
func testDateAggregationsWithoutGrouping(repo *DB, exact bool) func(t *testing.T) {
return func(t *testing.T) {
t.Run("without grouping", func(t *testing.T) {
params := aggregation.Params{
ClassName: schema.ClassName(customerClass.Class),
GroupBy: nil,
Properties: []aggregation.ParamProperty{
{
Name: "timeArrived",
Aggregators: []aggregation.Aggregator{
aggregation.CountAggregator,
aggregation.MinimumAggregator,
aggregation.MaximumAggregator,
aggregation.MedianAggregator,
aggregation.ModeAggregator,
},
},
},
}
res, err := repo.Aggregate(context.Background(), params)
require.Nil(t, err)
require.NotNil(t, res)
require.Len(t, res.Groups, 1)
})
}
}
func ptInt(in int) *int {
return &in
}
func sectorEqualsFoodFilter() *filters.LocalFilter {
return &filters.LocalFilter{
Root: &filters.Clause{
Operator: filters.OperatorEqual,
On: &filters.Path{
Class: "Company",
Property: "sector",
},
Value: &filters.Value{
Value: "Food",
Type: schema.DataTypeText,
},
},
}
}
func mustStringToTime(s string) time.Time {
asTime, err := time.ParseInLocation(time.RFC3339Nano, s, time.UTC)
if err != nil {
panic(fmt.Sprintf("failed to parse time: %s, %s", s, err))
}
return asTime
}