misinfo_detection_app / scripts /upsert_climate_test.py
stefanjwojcik's picture
add scripts
143b0d4 verified
raw
history blame contribute delete
666 Bytes
## Chunk and upload vectors from a DataFrame to Pinecone
index = pc.Index("test-index")
## Working Example 1
df = pd.read_csv('data/climate_test.csv')
pc = Pinecone("5faec954-a6c5-4af5-a577-89dbd2e4e5b0")
model = "multilingual-e5-large"
df = chunk_and_embed(pc, model, df)
df['id'] = [sqids.encode([i, i+1, i+2]) for i in range(len(df))]
# drop everything except 'Embeddings', 'text', and 'CSV_File'
#df = df[['Embeddings', 'text', 'id']]
# create 4 random embeddings for each row
vectors = create_vectors_from_df(df)
index.upsert(
vectors=vectors[0:12],
namespace="test-namespace"
)
chunk_df_and_upsert(index, df, chunk_size=100)