alibidaran commited on
Commit
4829948
·
verified ·
1 Parent(s): 2c771c2

Upload 2 files

Browse files
Files changed (2) hide show
  1. src/RAG.py +228 -0
  2. src/database_center.py +28 -0
src/RAG.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from texts import *
2
+ from langgraph.graph import StateGraph
3
+ from langchain_core.runnables import RunnableLambda
4
+ from openai import OpenAI
5
+ import os
6
+ import openai
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ import torch
9
+ from typing import TypedDict
10
+ import requests
11
+ import json
12
+ from typing import List, Dict, Any
13
+ from sklearn.cluster import KMeans
14
+ import numpy as np
15
+ from dotenv import load_dotenv
16
+ file_text=''
17
+
18
+ load_dotenv()
19
+ text_splitter = RecursiveCharacterTextSplitter(
20
+ chunk_size=1024,
21
+ chunk_overlap=256,
22
+ length_function=len
23
+ )
24
+ openai.api_key = os.getenv('OPENAI_API_KEY')
25
+ client=OpenAI()
26
+
27
+ # def get_embedding(text, model="text-embedding-3-small"):
28
+ # # response = openai.embeddings.create(
29
+ # # input=[text],
30
+ # # model=model
31
+ # # )
32
+ # embeddings = encoder.encode([text], convert_to_tensor=True, show_progress_bar=True)
33
+ # embeddings = embeddings.cpu().numpy()
34
+ # return embeddings
35
+ # text=chuncked_text+Focusing_text+planning_text+Focusing_text2+Evoking_text
36
+ # chunks=text_splitter.split_text(text)
37
+ # embeddings=[get_embedding(chunk) for chunk in chunks]
38
+ # embedds=np.array(embeddings)
39
+ # kmeans=KMeans(n_clusters=3,max_iter=1000)
40
+ # kmeans.fit(embedds)
41
+
42
+ # def embed_document(state,file_text):
43
+ # chunks=text_splitter.split_text(file_text)
44
+
45
+ # embeddings=encoder.encode(chunks, convert_to_tensor=True, show_progress_bar=False)
46
+ # embeddings=embeddings.cpu().numpy()
47
+ # print(len(embeddings))
48
+ # return {'single_query':state['single_query'],'embeddings':embeddings,'chunks':chunks}
49
+
50
+
51
+
52
+ def get_knowledge(state,embeddings,chunks,method='cosine'):
53
+ query_embedding = state['embedded_query']
54
+ if method=='cosine':
55
+ # Convert to tensor
56
+ query_tensor = torch.tensor(query_embedding, dtype=torch.float32).to('cpu') # shape: [embedding_dim]
57
+ #embeddings_tensor = torch.tensor(embeddings, dtype=torch.float32).to('cuda') # shape: [num_chunks, embedding_dim]
58
+ # Normalize
59
+ embeddings_tensor=torch.from_numpy(embeddings).to('cpu')
60
+ query_tensor = query_tensor / query_tensor.norm()
61
+ embeddings_tensor = embeddings_tensor / embeddings_tensor.norm(dim=1, keepdim=True)
62
+ # Compute cosine similarity
63
+ similarities = torch.matmul(query_tensor, embeddings_tensor.T)
64
+ print(similarities.shape)
65
+ top_k = 5
66
+ top_k_indices = torch.topk(similarities, k=top_k).indices
67
+ print(top_k_indices)
68
+ return {'embedded_query':state['embedded_query'],'knowledge':[chunks[i] for i in top_k_indices.squeeze(0)]}
69
+ # elif method=='Kmeans':
70
+ # query_emb = np.array(get_embedding(state['single_query']))
71
+ # # Predict the closest cluster
72
+ # cluster_idx = kmeans.predict([query_emb])[0]
73
+ # # Find indices of documents in this cluster
74
+ # cluster_doc_indices = np.where(kmeans.labels_ == cluster_idx)[0]
75
+ # # Compute L2 (Euclidean) distance within the cluster
76
+ # cluster_embs = np.array(embeddings)[cluster_doc_indices]
77
+ # distances = np.linalg.norm(cluster_embs - query_emb, axis=1)
78
+ # top_k=5
79
+ # # Get top_k most similar documents (smallest distances)
80
+ # top_indices = distances.argsort()[:top_k]
81
+ # return {'single_query':state['single_query'],'knowledge':[chunks[cluster_doc_indices[i]] for i in top_indices]}
82
+
83
+
84
+ def summerise_knowledge(state):
85
+ prompt="""
86
+ [system]
87
+ ## Instructions:
88
+ You are skillfull text analysist. Summerise the extracted information from uploaded file by the user. Make your summary as concice as possible.
89
+ ### Inputs:
90
+ Extracted Knowledge:
91
+ {}
92
+ ### Output:
93
+ """
94
+ text=""
95
+ chunks=state['knowledge']
96
+ for chunk in chunks:
97
+ text+=chunk
98
+ response=client.chat.completions.create(
99
+ model="gpt-5-mini",
100
+ messages =[{'role':'user','content':prompt.format(text)}])
101
+ # url = "https://11434-dep-01k080agynagw33vkkb9xfxpkb-d.cloudspaces.litng.ai/api/chat"
102
+ # s = requests.Session()
103
+ # s.headers.update({"Authorization": "Bearer bf54d08f-e88a-4a4a-bd14-444c984eaa6e"})
104
+ # response = s.post(url, json={
105
+ # "model": "hf.co/alibidaran/LLAMA3-instructive_reasoning-GGUF:Q8_0",
106
+ # "messages":[{'role':'user','content':prompt.format(text)}] ,
107
+ # "options": {
108
+ # "temperature": 0.7,
109
+ # "top_p": 0.95 # Set your desired top_p here
110
+ # }
111
+
112
+ # })
113
+ # full_response = ""
114
+ # for line in response.iter_lines():
115
+ # if line:
116
+ # data = json.loads(line.decode("utf-8"))
117
+ # message = data.get("message", {})
118
+ # content = message.get("content", "")
119
+ # full_response += content
120
+ # if data.get("done", Falsurl = "https://800-01jy9pekct42qjmqxcap35g81s.cloudspaces.litng.ai/predict"):
121
+ # break
122
+ print(response.choices[0].message)
123
+ return {'embedded_query':state['embedded_query'],'summary':response.choices[0].message.content}
124
+ #return {'single_query':state['single_query'],'summary':full_response}
125
+
126
+ # def making_instructions(state):
127
+ # prompt="""
128
+ # [system]
129
+ # ## Instructions:
130
+ # You are skill full certificated psychologist. Create an instruction for the practitioner to help them how to behaive and respond to the client effectively.
131
+ # ## Input:
132
+ # [summary]
133
+ # {}
134
+ # ### Output:
135
+ # """
136
+
137
+ # response=client.chat.completions.create(
138
+ # model="gpt-4o-mini",
139
+ # # api_base="https://litellm.llemma.net/",
140
+ # # api_key="sk-ZsStrG5lPoGnCHZl4NgcOA",
141
+ # messages =[{'role':'user','content':prompt.format(state['summary'])}],
142
+ # temperature=0.7,
143
+ # top_p=0.95,
144
+ # max_tokens=800)
145
+ # return {'query':state['query'],'instruction':response.choices[0].message}
146
+
147
+ def respond(state):
148
+ system_prompt="""
149
+ You are a reasonable expert who thinks and answer the users question.
150
+ Before respond first think and create a chain of thoughts in your mind.
151
+ Then respond to the client. Also follow the retrived information in the ##Summary section.
152
+ Your chain of thought and reflection must be in <thinking>..</thinking> format and your respond
153
+ should be in the <output>..</output> format.
154
+ """
155
+ user_prompt="""
156
+ ## Instructions:
157
+ {}
158
+ ## Summary:
159
+ {}
160
+ """
161
+ url="https://8000-01jy9pekct42qjmqxcap35g81s.cloudspaces.litng.ai/predict"
162
+ payload = { "user_prompt":user_prompt.format(state['single_query'],state['summary'])}
163
+ response = requests.post(url, data=payload)
164
+ return {'final_response':response.json()['output'][0]}
165
+ # messages=[
166
+ # {'role':'system', 'content':system_prompt},
167
+ # ]
168
+ # messages+=[{'role':'user','content':user_prompt.format(state['single_query'],state['summary'])}]
169
+ # url = "https://11434-dep-01jx2gzqqspsvcvtgmabz6jkkz-d.cloudspaces.litng.ai/api/chat"
170
+ # s = requests.Session()
171
+ # s.headers.update({"Authorization": "Bearer bf54d08f-e88a-4a4a-bd14-444c984eaa6e"})
172
+ # response = s.post(url, json={
173
+ # "model": "hf.co/alibidaran/LLAMA3-intructive_reasoning_GGUF:Q8_0",
174
+ # "messages": messages,
175
+ # "options": {
176
+ # "temperature": 0.7 # Set your desired temperature here
177
+ # }
178
+
179
+ # })
180
+ # # Collect the assistant's output
181
+ # full_response = ""
182
+
183
+ # for line in response.iter_lines():
184
+ # if line:
185
+ # data = json.loads(line.decode("utf-8"))
186
+ # message = data.get("message", {})
187
+ # content = message.get("content", "")
188
+ # full_response += content
189
+ # if data.get("done", False):
190
+ # break
191
+ # response=client.chat.completions.create(
192
+ # model="gpt-4o-mini",
193
+ # # api_base="https://litellm.llemma.net/",
194
+ # # api_key="sk-ZsStrG5lPoGnCHZl4NgcOA",
195
+ # messages =messages,
196
+ # temperature=0.7,
197
+ # top_p=0.95,)
198
+ #return {'final_response':full_response}
199
+
200
+ def end_node(state):
201
+ #print("Response:\n", state["final_response"])
202
+ return {'knowledge':state["summary"]}
203
+ class MyState(TypedDict):
204
+ #query: List[Dict[str, Any]]
205
+ embedded_query:list
206
+ knowledge: list
207
+ summary: str
208
+
209
+ def load_graph(embeddings,chunks):
210
+ graph_builder = StateGraph(state_schema=MyState)
211
+
212
+ # Add nodes
213
+ graph_builder.set_entry_point('get_knowledge')
214
+ graph_builder.add_node("get_knowledge", RunnableLambda(lambda state: get_knowledge(state,embeddings,chunks,method='cosine')))
215
+ graph_builder.add_node("summarise", RunnableLambda(summerise_knowledge))
216
+ #graph_builder.add_node('instructuons', RunnableLambda(making_instructions))
217
+ #graph_builder.add_node("respond", RunnableLambda(respond))
218
+ graph_builder.add_node("end", RunnableLambda(end_node))
219
+
220
+ # Add edges
221
+ graph_builder.add_edge("get_knowledge", "summarise")
222
+ #graph_builder.add_edge("summarise", "respond")
223
+ #graph_builder.add_edge("instructuons",'respond')
224
+ graph_builder.add_edge("summarise", "end")
225
+
226
+ # Compile the graph
227
+ # graph = graph_builder.compile()
228
+ return graph_builder
src/database_center.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import firebase_admin
2
+ from firebase_admin import credentials
3
+ from firebase_admin import db,firestore
4
+ import uuid
5
+ import json
6
+ import dotenv
7
+ import os
8
+ dotenv.load_dotenv()
9
+ # if not firebase_admin._apps:
10
+ # cred = credentials.Certificate("streamlitapp-13c03-firebase-adminsdk-fbsvc-0a346d6d6b.json")
11
+ # firebase_admin.initialize_app(cred)
12
+ # db=firestore.client()
13
+
14
+ #db_transaction=db.collection('test_transactions')
15
+ cred_json=os.environ['Firebase_Credential']
16
+ if not firebase_admin._apps:
17
+ cred = credentials.Certificate(json.loads(cred_json))
18
+ firebase_admin.initialize_app(cred)
19
+ db=firestore.client()
20
+
21
+ db_transaction=db.collection('test_transactions')
22
+ # db_transaction.set(
23
+ # {
24
+ # 'id':str(uuid.uuid4()),
25
+ # 'app':'sparkAnime',
26
+ # 'transaction-id':'igjepgjagpwogj'
27
+ # }
28
+ # )