from langchain.vectorstores import Chroma from chromadb.api.fastapi import requests from langchain.schema import Document from langchain.chains import RetrievalQA from langchain.embeddings import HuggingFaceBgeEmbeddings from langchain.retrievers.self_query.base import SelfQueryRetriever from langchain.chains.query_constructor.base import AttributeInfo from llm.llmFactory import LLMFactory from datetime import datetime model_name = "BAAI/bge-large-en-v1.5" encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity embedding = HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs=encode_kwargs ) persist_directory = 'db' docs = [ Document( page_content="Complex, layered, rich red with dark fruit flavors", metadata={"name":"Opus One", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"}, ), Document( page_content="Luxurious, sweet wine with flavors of honey, apricot, and peach", metadata={"name":"Château d'Yquem", "year": 2015, "rating": 98, "grape": "Sémillon", "color":"white", "country":"France"}, ), Document( page_content="Full-bodied red with notes of black fruit and spice", metadata={"name":"Penfolds Grange", "year": 2017, "rating": 97, "grape": "Shiraz", "color":"red", "country":"Australia"}, ), Document( page_content="Elegant, balanced red with herbal and berry nuances", metadata={"name":"Sassicaia", "year": 2016, "rating": 95, "grape": "Cabernet Franc", "color":"red", "country":"Italy"}, ), Document( page_content="Highly sought-after Pinot Noir with red fruit and earthy notes", metadata={"name":"Domaine de la Romanée-Conti", "year": 2018, "rating": 100, "grape": "Pinot Noir", "color":"red", "country":"France"}, ), Document( page_content="Crisp white with tropical fruit and citrus flavors", metadata={"name":"Cloudy Bay", "year": 2021, "rating": 92, "grape": "Sauvignon Blanc", "color":"white", "country":"New Zealand"}, ), Document( page_content="Rich, complex Champagne with notes of brioche and citrus", metadata={"name":"Krug Grande Cuvée", "year": 2010, "rating": 93, "grape": "Chardonnay blend", "color":"sparkling", "country":"New Zealand"}, ), Document( page_content="Intense, dark fruit flavors with hints of chocolate", metadata={"name":"Caymus Special Selection", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"}, ), Document( page_content="Exotic, aromatic white with stone fruit and floral notes", metadata={"name":"Jermann Vintage Tunina", "year": 2020, "rating": 91, "grape": "Sauvignon Blanc blend", "color":"white", "country":"Italy"}, ), ] vectorstore = Chroma.from_documents(documents=docs, embedding=embedding, persist_directory=persist_directory) metadata_field_info = [ AttributeInfo( name="grape", description="The grape used to make the wine", type="string or list[string]", ), AttributeInfo( name="name", description="The name of the wine", type="string or list[string]", ), AttributeInfo( name="color", description="The color of the wine", type="string or list[string]", ), AttributeInfo( name="year", description="The year the wine was released", type="integer", ), AttributeInfo( name="country", description="The name of the country the wine comes from", type="string", ), AttributeInfo( name="rating", description="The Robert Parker rating for the wine 0-100", type="integer" #float ), ] document_content_description = "Brief description of the wine" lf=LLMFactory() llm=lf.get_llm("executor2") retriever = SelfQueryRetriever.from_llm( llm, vectorstore, document_content_description, metadata_field_info, verbose=True ) meta_defaults={ "date":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f"), "source":"conversation", "ID":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")+"-conversation" } def getRelevantDocs(query:str): return retriever.get_relevant_documents(query) def addText(inStr:str,metadata): md=meta_defaults for key in metadata.keys(): md[key]=metadata[key] docs = [ Document(page_content=inStr, metadata=md)] return vectorstore.add_documents(docs)