File size: 683 Bytes
21e70fe
 
 
 
544e2ab
21e70fe
544e2ab
 
21e70fe
 
 
 
 
 
 
 
 
 
 
 
 
544e2ab
21e70fe
 
ca97c6b
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from lynxkite.core.ops import op
import staticvectors
import pandas as pd

ENV = "LynxKite Graph Analytics"


@op(ENV, "Word2vec for the top 1000 words", cache=True)
def word2vec_1000():
    model = staticvectors.StaticVectors("neuml/word2vec-quantized")
    with open("wordlist.txt") as f:
        words = [w.strip() for w in f.read().strip().split("\n")]
    df = pd.DataFrame(
        {
            "word": words,
            "embedding": model.embeddings(words).tolist(),
        }
    )
    return df


@op(ENV, "Take first N")
def first_n(df: pd.DataFrame, *, n=10):
    return df.head(n)


@op(ENV, "Sample N")
def sample_n(df: pd.DataFrame, *, n=10):
    return df.sample(n)