maya-persistence / src /indexer.py
anubhav77's picture
v0.1.3
adeac66
raw
history blame
4.63 kB
from langchain.vectorstores import Chroma
from chromadb.api.fastapi import requests
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from llm.llmFactory import LLMFactory
from datetime import datetime
model_name = "BAAI/bge-large-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
embedding = HuggingFaceBgeEmbeddings(
model_name=model_name,
model_kwargs={'device': 'cpu'},
encode_kwargs=encode_kwargs
)
persist_directory = 'db'
docs = [
Document(
page_content="Complex, layered, rich red with dark fruit flavors",
metadata={"name":"Opus One", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
),
Document(
page_content="Luxurious, sweet wine with flavors of honey, apricot, and peach",
metadata={"name":"Château d'Yquem", "year": 2015, "rating": 98, "grape": "Sémillon", "color":"white", "country":"France"},
),
Document(
page_content="Full-bodied red with notes of black fruit and spice",
metadata={"name":"Penfolds Grange", "year": 2017, "rating": 97, "grape": "Shiraz", "color":"red", "country":"Australia"},
),
Document(
page_content="Elegant, balanced red with herbal and berry nuances",
metadata={"name":"Sassicaia", "year": 2016, "rating": 95, "grape": "Cabernet Franc", "color":"red", "country":"Italy"},
),
Document(
page_content="Highly sought-after Pinot Noir with red fruit and earthy notes",
metadata={"name":"Domaine de la Romanée-Conti", "year": 2018, "rating": 100, "grape": "Pinot Noir", "color":"red", "country":"France"},
),
Document(
page_content="Crisp white with tropical fruit and citrus flavors",
metadata={"name":"Cloudy Bay", "year": 2021, "rating": 92, "grape": "Sauvignon Blanc", "color":"white", "country":"New Zealand"},
),
Document(
page_content="Rich, complex Champagne with notes of brioche and citrus",
metadata={"name":"Krug Grande Cuvée", "year": 2010, "rating": 93, "grape": "Chardonnay blend", "color":"sparkling", "country":"New Zealand"},
),
Document(
page_content="Intense, dark fruit flavors with hints of chocolate",
metadata={"name":"Caymus Special Selection", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
),
Document(
page_content="Exotic, aromatic white with stone fruit and floral notes",
metadata={"name":"Jermann Vintage Tunina", "year": 2020, "rating": 91, "grape": "Sauvignon Blanc blend", "color":"white", "country":"Italy"},
),
]
vectorstore = Chroma.from_documents(documents=docs,
embedding=embedding,
persist_directory=persist_directory)
metadata_field_info = [
AttributeInfo(
name="grape",
description="The grape used to make the wine",
type="string or list[string]",
),
AttributeInfo(
name="name",
description="The name of the wine",
type="string or list[string]",
),
AttributeInfo(
name="color",
description="The color of the wine",
type="string or list[string]",
),
AttributeInfo(
name="year",
description="The year the wine was released",
type="integer",
),
AttributeInfo(
name="country",
description="The name of the country the wine comes from",
type="string",
),
AttributeInfo(
name="rating", description="The Robert Parker rating for the wine 0-100", type="integer" #float
),
]
document_content_description = "Brief description of the wine"
lf=LLMFactory()
llm=lf.get_llm("executor2")
retriever = SelfQueryRetriever.from_llm(
llm,
vectorstore,
document_content_description,
metadata_field_info,
verbose=True
)
meta_defaults={
"date":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f"),
"source":"conversation",
"ID":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")+"-conversation"
}
def getRelevantDocs(query:str):
return retriever.get_relevant_documents(query)
def addText(inStr:str,metadata):
md=meta_defaults
for key in metadata.keys():
md[key]=metadata[key]
docs = [
Document(page_content=inStr, metadata=md)]
return vectorstore.add_documents(docs)