KevinStephenson
Adding in weaviate code
b110593
raw
history blame
5.51 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package compressionhelpers
import (
"encoding/binary"
"math"
"sync/atomic"
"gonum.org/v1/gonum/stat/distuv"
)
type distribution interface {
Transform(x float64) float64
CDF(x float64) float64
Quantile(x float64) float64
}
type logNormalDistribution struct {
dist *distuv.LogNormal
}
func newLogNormalDistribution(mean float64, std float64) distribution {
return &logNormalDistribution{
dist: &distuv.LogNormal{
Mu: mean,
Sigma: std,
},
}
}
func (d *logNormalDistribution) Transform(x float64) float64 {
if x > 0 {
return math.Log(x)
}
return 0
}
func (d *logNormalDistribution) CDF(x float64) float64 {
return d.dist.CDF(x)
}
func (d *logNormalDistribution) Quantile(x float64) float64 {
return d.dist.Quantile(x)
}
type normalDistribution struct {
dist *distuv.Normal
}
func newNormalDistribution(mean float64, std float64) distribution {
return &normalDistribution{
dist: &distuv.Normal{
Mu: mean,
Sigma: std,
},
}
}
func (d *normalDistribution) Transform(x float64) float64 {
return x
}
func (d *normalDistribution) CDF(x float64) float64 {
return d.dist.CDF(x)
}
func (d *normalDistribution) Quantile(x float64) float64 {
return d.dist.Quantile(x)
}
type Centroid struct {
Center []float32
Calculated atomic.Bool
}
type EncoderDistribution byte
const (
NormalEncoderDistribution EncoderDistribution = 0
LogNormalEncoderDistribution EncoderDistribution = 1
)
type TileEncoder struct {
bins float64
mean float64
stdDev float64
size float64
s1 float64
s2 float64
segment int
centroids []Centroid
encoderDistribution EncoderDistribution
distribution distribution
}
func NewTileEncoder(bits int, segment int, encoderDistribution EncoderDistribution) *TileEncoder {
centroids := math.Pow(2, float64(bits))
te := &TileEncoder{
bins: centroids,
mean: 0,
stdDev: 0,
size: 0,
s1: 0,
s2: 0,
segment: segment,
centroids: make([]Centroid, int(centroids)),
encoderDistribution: encoderDistribution,
}
te.setEncoderDistribution()
return te
}
func RestoreTileEncoder(bins float64, mean float64, stdDev float64, size float64, s1 float64, s2 float64, segment uint16, encoderDistribution byte) *TileEncoder {
te := &TileEncoder{
bins: bins,
mean: mean,
stdDev: stdDev,
size: size,
s1: s1,
s2: s2,
segment: int(segment),
encoderDistribution: EncoderDistribution(encoderDistribution),
}
te.setEncoderDistribution()
return te
}
func (te *TileEncoder) ExposeDataForRestore() []byte {
buffer := make([]byte, 51)
binary.LittleEndian.PutUint64(buffer[0:8], math.Float64bits(te.bins))
binary.LittleEndian.PutUint64(buffer[8:16], math.Float64bits(te.mean))
binary.LittleEndian.PutUint64(buffer[16:24], math.Float64bits(te.stdDev))
binary.LittleEndian.PutUint64(buffer[24:32], math.Float64bits(te.size))
binary.LittleEndian.PutUint64(buffer[32:40], math.Float64bits(te.s1))
binary.LittleEndian.PutUint64(buffer[40:48], math.Float64bits(te.s2))
binary.LittleEndian.PutUint16(buffer[48:50], uint16(te.segment))
buffer[50] = byte(te.encoderDistribution)
return buffer
}
func (te *TileEncoder) Fit(data [][]float32) error {
te.setEncoderDistribution()
return nil
}
func (te *TileEncoder) setEncoderDistribution() {
switch te.encoderDistribution {
case LogNormalEncoderDistribution:
te.distribution = newLogNormalDistribution(te.mean, te.stdDev)
case NormalEncoderDistribution:
te.distribution = newNormalDistribution(te.mean, te.stdDev)
}
}
func (te *TileEncoder) Add(x []float32) {
// calculate mean and stddev iteratively
x64 := te.distribution.Transform(float64(x[te.segment]))
te.s1 += x64
te.s2 += x64 * x64
te.size++
te.mean = te.s1 / te.size
sum := te.s2 + te.size*te.mean*te.mean
prod := 2 * te.mean * te.s1
te.stdDev = math.Sqrt((sum - prod) / te.size)
}
func (te *TileEncoder) Encode(x []float32) byte {
cdf := te.distribution.CDF(float64(x[te.segment]))
intPart, _ := math.Modf(cdf * float64(te.bins))
return byte(intPart)
}
func (te *TileEncoder) centroid(b byte) []float32 {
res := make([]float32, 0, 1)
if b == 0 {
res = append(res, float32(te.distribution.Quantile(1/te.bins)))
} else if b == byte(te.bins) {
res = append(res, float32(te.distribution.Quantile((te.bins-1)/te.bins)))
} else {
b64 := float64(b)
mean := (b64/te.bins + (b64+1)/te.bins) / 2
res = append(res, float32(te.distribution.Quantile(mean)))
}
return res
}
func (te *TileEncoder) Centroid(b byte) []float32 {
if te.centroids[b].Calculated.Load() {
return te.centroids[b].Center
}
te.centroids[b].Center = te.centroid(b)
te.centroids[b].Calculated.Store(true)
return te.centroids[b].Center
}