Spaces:
Runtime error
Runtime error
File size: 5,899 Bytes
ebd06cc adeac66 e99c3d5 f71faa1 ebd06cc c1313e9 ebd06cc e99c3d5 0756fa3 e99c3d5 ebd06cc adeac66 e99c3d5 adeac66 f71faa1 ebd06cc f71faa1 59b904b 0756fa3 857c3ab 0756fa3 857c3ab 0756fa3 f71faa1 adeac66 e99c3d5 e3a3f56 adeac66 1ccf3d5 e99c3d5 1ccf3d5 e99c3d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
from langchain.vectorstores import Chroma
from chromadb.api.fastapi import requests
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from llm.llmFactory import LLMFactory
from datetime import datetime
import baseInfra.dropbox_handler as dbh
from baseInfra.dbInterface import DbInterface
db_interface=DbInterface()
model_name = "BAAI/bge-large-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
embedding = HuggingFaceBgeEmbeddings(
model_name=model_name,
model_kwargs={'device': 'cpu'},
encode_kwargs=encode_kwargs
)
persist_directory = 'db'
try:
dbh.restoreFolder("db")
except:
print("Probably folder doesn't exist as it is brand new setup")
docs = [
Document(
page_content="Complex, layered, rich red with dark fruit flavors",
metadata={"name":"Opus One", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
),
Document(
page_content="Luxurious, sweet wine with flavors of honey, apricot, and peach",
metadata={"name":"Château d'Yquem", "year": 2015, "rating": 98, "grape": "Sémillon", "color":"white", "country":"France"},
),
Document(
page_content="Full-bodied red with notes of black fruit and spice",
metadata={"name":"Penfolds Grange", "year": 2017, "rating": 97, "grape": "Shiraz", "color":"red", "country":"Australia"},
),
Document(
page_content="Elegant, balanced red with herbal and berry nuances",
metadata={"name":"Sassicaia", "year": 2016, "rating": 95, "grape": "Cabernet Franc", "color":"red", "country":"Italy"},
),
Document(
page_content="Highly sought-after Pinot Noir with red fruit and earthy notes",
metadata={"name":"Domaine de la Romanée-Conti", "year": 2018, "rating": 100, "grape": "Pinot Noir", "color":"red", "country":"France"},
),
Document(
page_content="Crisp white with tropical fruit and citrus flavors",
metadata={"name":"Cloudy Bay", "year": 2021, "rating": 92, "grape": "Sauvignon Blanc", "color":"white", "country":"New Zealand"},
),
Document(
page_content="Rich, complex Champagne with notes of brioche and citrus",
metadata={"name":"Krug Grande Cuvée", "year": 2010, "rating": 93, "grape": "Chardonnay blend", "color":"sparkling", "country":"New Zealand"},
),
Document(
page_content="Intense, dark fruit flavors with hints of chocolate",
metadata={"name":"Caymus Special Selection", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
),
Document(
page_content="Exotic, aromatic white with stone fruit and floral notes",
metadata={"name":"Jermann Vintage Tunina", "year": 2020, "rating": 91, "grape": "Sauvignon Blanc blend", "color":"white", "country":"Italy"},
),
]
vectorstore = Chroma.from_documents(documents=docs,
embedding=embedding,
persist_directory=persist_directory)
metadata_field_info = [
AttributeInfo(
name="grape",
description="The grape used to make the wine",
type="string or list[string]",
),
AttributeInfo(
name="name",
description="The name of the wine",
type="string or list[string]",
),
AttributeInfo(
name="color",
description="The color of the wine",
type="string or list[string]",
),
AttributeInfo(
name="year",
description="The year the wine was released",
type="integer",
),
AttributeInfo(
name="country",
description="The name of the country the wine comes from",
type="string",
),
AttributeInfo(
name="rating", description="The Robert Parker rating for the wine 0-100", type="integer" #float
),
]
document_content_description = "Brief description of the wine"
lf=LLMFactory()
llm=lf.get_llm("executor2")
retriever = SelfQueryRetriever.from_llm(
llm,
vectorstore,
document_content_description,
metadata_field_info,
verbose=True
)
meta_defaults={
"timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f"),
"source":"conversation",
"ID":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")+"-conversation"
}
def getRelevantDocs(query:str):
"""This should also post the result to firebase"""
retVal=retriever.get_relevant_documents(query)
value=[]
try:
for item in retVal:
v="Info:"+item['page_content']+" "
for key in item.metadata.keys():
if key != "ID":
v+=key+":"+str(item.metadata[key])+" "
value.append(v)
db_interface.add_to_cache(input=query,value=value)
except:
for item in retVal:
v="Info:"+item.page_content+" "
for key in item.metadata.keys():
if key != "ID":
v+=key+":"+str(item.metadata[key])+" "
value.append(v)
db_interface.add_to_cache(input=query,value=value)
return retVal
def addText(inStr:str,metadata):
md=meta_defaults
for key in metadata.keys():
md[key]=metadata[key]
md['timestamp']=datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")
md['ID']=datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")+"-conversation"
docs = [
Document(page_content=inStr, metadata=md)]
try:
return vectorstore.add_documents(docs,ids=[md.ID])
except:
print("inside expect of addText")
return vectorstore.add_documents(docs,ids=[md['ID']])
def persist():
vectorstore.persist()
|