File size: 5,899 Bytes
ebd06cc
 
 
 
 
 
 
 
adeac66
e99c3d5
f71faa1
 
 
ebd06cc
c1313e9
ebd06cc
 
 
 
 
 
 
 
 
e99c3d5
0756fa3
e99c3d5
 
ebd06cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adeac66
e99c3d5
adeac66
 
 
f71faa1
ebd06cc
f71faa1
 
59b904b
0756fa3
 
 
 
 
857c3ab
0756fa3
 
 
 
 
 
 
857c3ab
0756fa3
 
f71faa1
 
adeac66
 
 
 
 
e99c3d5
e3a3f56
 
adeac66
 
1ccf3d5
e99c3d5
1ccf3d5
 
e99c3d5
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from langchain.vectorstores import Chroma
from chromadb.api.fastapi import requests
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from llm.llmFactory import LLMFactory
from datetime import datetime
import baseInfra.dropbox_handler as dbh
from baseInfra.dbInterface import DbInterface

db_interface=DbInterface()

model_name = "BAAI/bge-large-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

embedding = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cpu'},  
    encode_kwargs=encode_kwargs
)

persist_directory = 'db'
try:
    dbh.restoreFolder("db")
except:
    print("Probably folder doesn't exist as it is brand new setup")
docs = [
    Document(
        page_content="Complex, layered, rich red with dark fruit flavors",
        metadata={"name":"Opus One", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Luxurious, sweet wine with flavors of honey, apricot, and peach",
        metadata={"name":"Château d'Yquem", "year": 2015, "rating": 98, "grape": "Sémillon", "color":"white", "country":"France"},
    ),
    Document(
        page_content="Full-bodied red with notes of black fruit and spice",
        metadata={"name":"Penfolds Grange", "year": 2017, "rating": 97, "grape": "Shiraz", "color":"red", "country":"Australia"},
    ),
    Document(
        page_content="Elegant, balanced red with herbal and berry nuances",
        metadata={"name":"Sassicaia", "year": 2016, "rating": 95, "grape": "Cabernet Franc", "color":"red", "country":"Italy"},
    ),
    Document(
        page_content="Highly sought-after Pinot Noir with red fruit and earthy notes",
        metadata={"name":"Domaine de la Romanée-Conti", "year": 2018, "rating": 100, "grape": "Pinot Noir", "color":"red", "country":"France"},
    ),
    Document(
        page_content="Crisp white with tropical fruit and citrus flavors",
        metadata={"name":"Cloudy Bay", "year": 2021, "rating": 92, "grape": "Sauvignon Blanc", "color":"white", "country":"New Zealand"},
    ),
    Document(
        page_content="Rich, complex Champagne with notes of brioche and citrus",
        metadata={"name":"Krug Grande Cuvée", "year": 2010, "rating": 93, "grape": "Chardonnay blend", "color":"sparkling", "country":"New Zealand"},
    ),
    Document(
        page_content="Intense, dark fruit flavors with hints of chocolate",
        metadata={"name":"Caymus Special Selection", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Exotic, aromatic white with stone fruit and floral notes",
        metadata={"name":"Jermann Vintage Tunina", "year": 2020, "rating": 91, "grape": "Sauvignon Blanc blend", "color":"white", "country":"Italy"},
    ),
]

vectorstore = Chroma.from_documents(documents=docs,
                                  embedding=embedding,
                                  persist_directory=persist_directory)

metadata_field_info = [
    AttributeInfo(
        name="grape",
        description="The grape used to make the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="name",
        description="The name of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="color",
        description="The color of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="year",
        description="The year the wine was released",
        type="integer",
    ),
    AttributeInfo(
        name="country",
        description="The name of the country the wine comes from",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="The Robert Parker rating for the wine 0-100", type="integer" #float
    ),
]
document_content_description = "Brief description of the wine"
lf=LLMFactory()
llm=lf.get_llm("executor2")

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    verbose=True
)

meta_defaults={
    "timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f"),
    "source":"conversation",
    "ID":datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")+"-conversation"
}

def getRelevantDocs(query:str):
    """This should also post the result to firebase"""
    retVal=retriever.get_relevant_documents(query)
    value=[]
    try:
        for item in retVal:
            v="Info:"+item['page_content']+" "
            for key in item.metadata.keys():
                if key != "ID":
                    v+=key+":"+str(item.metadata[key])+" "
            value.append(v)
        db_interface.add_to_cache(input=query,value=value)
    except:
        for item in retVal:
            v="Info:"+item.page_content+" "
            for key in item.metadata.keys():
                if key != "ID":
                    v+=key+":"+str(item.metadata[key])+" "
            value.append(v)
        db_interface.add_to_cache(input=query,value=value)
    return retVal
    

def addText(inStr:str,metadata):
    md=meta_defaults
    for key in metadata.keys():
        md[key]=metadata[key]
    md['timestamp']=datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")
    md['ID']=datetime.now().strftime("%Y-%m-%d %H:%M:%S::%f")+"-conversation"

    docs = [
        Document(page_content=inStr, metadata=md)]
    try:
        return vectorstore.add_documents(docs,ids=[md.ID])
    except:
        print("inside expect of addText")
        return vectorstore.add_documents(docs,ids=[md['ID']])
    
def persist():
    vectorstore.persist()