thejagstudio commited on
Commit
21a7097
·
verified ·
1 Parent(s): 620af75

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +53 -116
main.py CHANGED
@@ -15,74 +15,32 @@ CORS(app)
15
 
16
  class MyEmbeddingFunction(EmbeddingFunction):
17
  def embed_documents(self, input: Documents) -> Embeddings:
18
- for i in range(5):
19
- try:
20
- embeddings = []
21
- url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
22
-
23
- payload = {
24
- "inputs": input
25
- }
26
- headers = {
27
- 'accept': '*/*',
28
- 'accept-language': 'en-US,en;q=0.9',
29
- 'content-type': 'application/json',
30
- 'origin': 'https://huggingface.co',
31
- 'priority': 'u=1, i',
32
- 'referer': 'https://huggingface.co/',
33
- 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
34
- 'sec-ch-ua-mobile': '?0',
35
- 'sec-ch-ua-platform': '"Windows"',
36
- 'sec-fetch-dest': 'empty',
37
- 'sec-fetch-mode': 'cors',
38
- 'sec-fetch-site': 'same-site',
39
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
40
- }
41
-
42
- response = requests.post(url, headers=headers, json=payload)
43
- return response.json()[0][0]
44
- except:
45
- pass
46
 
47
  def embed_query(self, input: Documents) -> Embeddings:
48
- for i in range(5):
49
- try:
50
- embeddings = []
51
- url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
52
-
53
- payload = {
54
- "inputs": [input]
55
- }
56
- headers = {
57
- 'accept': '*/*',
58
- 'accept-language': 'en-US,en;q=0.9',
59
- 'content-type': 'application/json',
60
- 'origin': 'https://huggingface.co',
61
- 'priority': 'u=1, i',
62
- 'referer': 'https://huggingface.co/',
63
- 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
64
- 'sec-ch-ua-mobile': '?0',
65
- 'sec-ch-ua-platform': '"Windows"',
66
- 'sec-fetch-dest': 'empty',
67
- 'sec-fetch-mode': 'cors',
68
- 'sec-fetch-site': 'same-site',
69
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
70
- }
71
 
72
- response = requests.post(url, headers=headers, json=payload)
73
- return response.json()[0][0]
74
- except Exception as e:
75
- print("Error in Embeding :",str(e))
 
 
 
 
 
 
 
 
 
 
76
 
77
  try:
78
  CHROMA_PATH = "chroma"
79
  custom_embeddings = MyEmbeddingFunction()
80
- db = Chroma(
81
- persist_directory=CHROMA_PATH,embedding_function=custom_embeddings
82
- )
83
- #
84
  except Exception as e:
85
- print("Error in database :",str(e))
86
 
87
  # Initialize the database without persist_directory
88
  try:
@@ -91,27 +49,20 @@ try:
91
 
92
  # Load documents from chroma.sqlite3
93
  def load_documents_from_sqlite(db_path="chroma.sqlite3"):
94
- conn = sqlite3.connect(db_path)
95
- cursor = conn.cursor()
96
-
97
- # Assuming your table structure has "id", "content", and "embedding"
98
- cursor.execute("SELECT id, content, embedding FROM documents")
99
- rows = cursor.fetchall()
100
-
101
- collection = db.get_or_create_collection("default_collection")
102
-
103
- for row in rows:
104
- doc_id = row[0]
105
- content = row[1]
106
- embedding = json.loads(row[2]) # If embeddings are stored as JSON strings
107
- collection.add(
108
- ids=[doc_id],
109
- documents=[content],
110
- embeddings=[embedding]
111
- )
112
-
113
- conn.close()
114
- print("Loaded documents into Chroma.")
115
 
116
  load_documents_from_sqlite() # Call to load data
117
 
@@ -121,44 +72,30 @@ except Exception as e:
121
 
122
  def embeddingGen(query):
123
  url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
124
-
125
- payload = {
126
- "inputs": [query]
127
- }
128
- headers = {
129
- 'accept': '*/*',
130
- 'accept-language': 'en-US,en;q=0.9',
131
- 'content-type': 'application/json',
132
- 'origin': 'https://huggingface.co',
133
- 'priority': 'u=1, i',
134
- 'referer': 'https://huggingface.co/',
135
- 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
136
- 'sec-ch-ua-mobile': '?0',
137
- 'sec-ch-ua-platform': '"Windows"',
138
- 'sec-fetch-dest': 'empty',
139
- 'sec-fetch-mode': 'cors',
140
- 'sec-fetch-site': 'same-site',
141
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
142
- }
143
-
144
- response = requests.post(url, headers=headers, json=payload)
145
- return response.json()[0][0]
146
 
147
 
148
  def strings_ranked_by_relatedness(query, df, top_n=5):
149
- def relatedness_fn(x, y):
150
- x_norm = np.linalg.norm(x)
151
- y_norm = np.linalg.norm(y)
152
- return np.dot(x, y) / (x_norm * y_norm)
153
-
154
- query_embedding_response = embeddingGen(query)
155
- query_embedding = query_embedding_response
156
- strings_and_relatednesses = [
157
- (row["text"], relatedness_fn(query_embedding, row["embedding"])) for row in df
158
- ]
159
- strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
160
- strings, relatednesses = zip(*strings_and_relatednesses)
161
- return strings[:top_n], relatednesses[:top_n]
162
 
163
 
164
  @app.route("/api/gpt", methods=["POST", "GET"])
 
15
 
16
  class MyEmbeddingFunction(EmbeddingFunction):
17
  def embed_documents(self, input: Documents) -> Embeddings:
18
+ return self._call_hf_api(input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def embed_query(self, input: Documents) -> Embeddings:
21
+ return self._call_hf_api([input])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ def _call_hf_api(self, inputs):
24
+ url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
25
+ headers = {
26
+ 'accept': '*/*',
27
+ 'content-type': 'application/json',
28
+ }
29
+ payload = {"inputs": inputs}
30
+ try:
31
+ response = requests.post(url, headers=headers, json=payload)
32
+ response.raise_for_status()
33
+ return response.json()[0]
34
+ except Exception as e:
35
+ print("Embedding API Error:", str(e))
36
+ return []
37
 
38
  try:
39
  CHROMA_PATH = "chroma"
40
  custom_embeddings = MyEmbeddingFunction()
41
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=custom_embeddings)
 
 
 
42
  except Exception as e:
43
+ print("Database Initialization Error:", str(e))
44
 
45
  # Initialize the database without persist_directory
46
  try:
 
49
 
50
  # Load documents from chroma.sqlite3
51
  def load_documents_from_sqlite(db_path="chroma.sqlite3"):
52
+ try:
53
+ conn = sqlite3.connect(db_path)
54
+ cursor = conn.cursor()
55
+ cursor.execute("SELECT id, content, embedding FROM documents")
56
+ rows = cursor.fetchall()
57
+ collection = db.get_or_create_collection("default_collection")
58
+ for row in rows:
59
+ doc_id, content, embedding_json = row
60
+ embedding = json.loads(embedding_json)
61
+ collection.add(ids=[doc_id], documents=[content], embeddings=[embedding])
62
+ conn.close()
63
+ print("Documents loaded into Chroma.")
64
+ except Exception as e:
65
+ print("Error loading documents:", str(e))
 
 
 
 
 
 
 
66
 
67
  load_documents_from_sqlite() # Call to load data
68
 
 
72
 
73
  def embeddingGen(query):
74
  url = "https://api-inference.huggingface.co/models/BAAI/bge-large-en-v1.5"
75
+ headers = {'accept': '*/*', 'content-type': 'application/json'}
76
+ payload = {"inputs": [query]}
77
+ try:
78
+ response = requests.post(url, headers=headers, json=payload)
79
+ response.raise_for_status()
80
+ return response.json()[0]
81
+ except Exception as e:
82
+ print("Embedding Generation Error:", str(e))
83
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  def strings_ranked_by_relatedness(query, df, top_n=5):
87
+ def cosine_similarity(x, y):
88
+ return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
89
+
90
+ query_embedding = embeddingGen(query)
91
+ ranked = sorted(
92
+ [(row["text"], cosine_similarity(query_embedding, row["embedding"])) for row in df],
93
+ key=lambda x: x[1],
94
+ reverse=True
95
+ )
96
+ strings, scores = zip(*ranked)
97
+ return strings[:top_n], scores[:top_n]
98
+
 
99
 
100
 
101
  @app.route("/api/gpt", methods=["POST", "GET"])