Spaces:
Build error
Build error
Commit
·
daf02fd
1
Parent(s):
200f9e3
add full tool
Browse files
app.py
CHANGED
@@ -19,11 +19,144 @@ from langchain.memory import ConversationBufferWindowMemory
|
|
19 |
from langchain.prompts import MessagesPlaceholder
|
20 |
from langchain.agents import ConversationalChatAgent, AgentExecutor
|
21 |
from langchain.callbacks import StreamlitCallbackHandler
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
global CurrentAgent
|
25 |
CurrentAgent = 'Structured Zero Short Agent'
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
class GPTRemote(LLM):
|
29 |
n: int
|
@@ -88,6 +221,72 @@ class GPTRemote(LLM):
|
|
88 |
|
89 |
GPTfake = GPTRemote(n=0)
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
|
93 |
async def start_playwright(question: str):
|
@@ -156,7 +355,7 @@ memory3 = ConversationBufferWindowMemory(memory_key="chat_history", return_messa
|
|
156 |
|
157 |
input_variables=["input", "chat_history", "agent_scratchpad"]
|
158 |
|
159 |
-
tools_remote = []
|
160 |
|
161 |
agent_STRUCTURED_ZEROSHOT_REACT = initialize_agent(tools_remote, GPTfake,
|
162 |
# agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
|
|
19 |
from langchain.prompts import MessagesPlaceholder
|
20 |
from langchain.agents import ConversationalChatAgent, AgentExecutor
|
21 |
from langchain.callbacks import StreamlitCallbackHandler
|
22 |
+
from langchain.chains import RetrievalQA
|
23 |
+
import pinecone
|
24 |
+
from langchain.vectorstores import Pinecone
|
25 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
26 |
+
from langchain.tools import DuckDuckGoSearchRun
|
27 |
+
from langchain.utilities import WikipediaAPIWrapper
|
28 |
+
import soundfile as sf
|
29 |
+
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
30 |
+
from datasets import load_dataset
|
31 |
+
import torch
|
32 |
+
from langchain.chains import LLMMathChain
|
33 |
+
from interpreter.code_interpreter import CodeInterpreter
|
34 |
|
35 |
global CurrentAgent
|
36 |
CurrentAgent = 'Structured Zero Short Agent'
|
37 |
|
38 |
+
class DB_Search2(BaseTool):
|
39 |
+
name = "Vector Database Search"
|
40 |
+
description = "This is the internal vector database to search information firstly (i.e. engineering data, acronym.)"
|
41 |
+
def _run(self, query: str) -> str:
|
42 |
+
response, source = QAQuery_p(query)
|
43 |
+
# response = "test db_search feedback"
|
44 |
+
return response
|
45 |
+
|
46 |
+
def _arun(self, query: str):
|
47 |
+
raise NotImplementedError("N/A")
|
48 |
+
|
49 |
+
pinecone.init(
|
50 |
+
api_key = os.environ["pinecone_api_key"],
|
51 |
+
# environment='asia-southeast1-gcp-free',
|
52 |
+
environment='us-west4-gcp-free',
|
53 |
+
# openapi_config=openapi_config
|
54 |
+
)
|
55 |
+
# index_name = 'stla-baby'
|
56 |
+
global index_name
|
57 |
+
index_name = 'stla-back'
|
58 |
+
index = pinecone.Index(index_name)
|
59 |
+
# index.delete(delete_all=True, namespace='')
|
60 |
+
print(pinecone.whoami())
|
61 |
+
print(index.describe_index_stats())
|
62 |
+
|
63 |
+
embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
|
64 |
+
device = 'cpu'
|
65 |
+
embeddings_miniLM = HuggingFaceEmbeddings(
|
66 |
+
model_name=embed_model_id,
|
67 |
+
model_kwargs={'device': device},
|
68 |
+
)
|
69 |
+
|
70 |
+
# embeddings = embeddings_openai
|
71 |
+
embeddings = embeddings_miniLM
|
72 |
+
|
73 |
+
|
74 |
+
global vectordb_p
|
75 |
+
vectordb_p = Pinecone.from_existing_index(index_name, embeddings)
|
76 |
+
|
77 |
+
def QAQuery_p(question: str):
|
78 |
+
global vectordb_p
|
79 |
+
global agent
|
80 |
+
# global Choice
|
81 |
+
global CurrentAgent
|
82 |
+
# vectordb = Chroma(persist_directory='db', embedding_function=embeddings)
|
83 |
+
retriever = vectordb_p.as_retriever()
|
84 |
+
retriever.search_kwargs['k'] = int(os.environ["search_kwargs_k"])
|
85 |
+
# retriever.search_kwargs['fetch_k'] = 100
|
86 |
+
# if agent == agent_ZEROSHOT_REACT_2 or agent == agent_ZEROSHOT_AGENT_2:
|
87 |
+
if 1:
|
88 |
+
print("--------------- QA with Remote --------------")
|
89 |
+
qa = RetrievalQA.from_chain_type(llm=GPTfake, chain_type="stuff",
|
90 |
+
retriever=retriever, return_source_documents = True,
|
91 |
+
verbose = True)
|
92 |
+
else:
|
93 |
+
pass
|
94 |
+
# qa = VectorDBQA.from_chain_type(llm=chat, chain_type="stuff", vectorstore=vectordb, return_source_documents=True)
|
95 |
+
# res = qa.run(question)
|
96 |
+
res = qa({"query": question})
|
97 |
+
|
98 |
+
print("-" * 20)
|
99 |
+
# print("Question:", question)
|
100 |
+
# print("Answer:", res)
|
101 |
+
# print("Answer:", res['result'])
|
102 |
+
print("-" * 20)
|
103 |
+
# print("Source:", res['source_documents'])
|
104 |
+
response = res['result']
|
105 |
+
# response = res['source_documents']
|
106 |
+
source = res['source_documents']
|
107 |
+
return response, source
|
108 |
+
|
109 |
+
Netsearch = DuckDuckGoSearchRun()
|
110 |
+
duckduckgo_tool2 = Tool(
|
111 |
+
name = "Duckduckgo Internet Search",
|
112 |
+
func = Netsearch.run,
|
113 |
+
description = "Useful to search in internet for real-time information and additional information which is not available in other tools"
|
114 |
+
)
|
115 |
+
|
116 |
+
Wikipedia = WikipediaAPIWrapper()
|
117 |
+
wikipedia_tool2 = Tool(
|
118 |
+
name = "Wikipedia Search",
|
119 |
+
func = Wikipedia.run,
|
120 |
+
description = "Useful to search a topic, country or person when there is no availble information in vector database"
|
121 |
+
)
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
def text_to_speech_loc2(Text_input):
|
128 |
+
global Audio_output
|
129 |
+
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
130 |
+
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
131 |
+
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
132 |
+
|
133 |
+
inputs = processor(text = Text_input, return_tensors="pt")
|
134 |
+
|
135 |
+
# load xvector containing speaker's voice characteristics from a dataset
|
136 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
137 |
+
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
138 |
+
|
139 |
+
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
|
140 |
+
print("Type of speech: ", type(speech))
|
141 |
+
|
142 |
+
timestr = time.strftime("%Y%m%d-%H%M%S")
|
143 |
+
# sampling_rate = 16000
|
144 |
+
with open('sample-' + timestr + '.wav', 'wb') as audio:
|
145 |
+
sf.write(audio, speech.numpy(), samplerate=16000)
|
146 |
+
# audio = sf.write("convert1.wav", speech, samplerate=16000)
|
147 |
+
print("audio: ", audio)
|
148 |
+
Audio_output.append(audio.name)
|
149 |
+
return audio
|
150 |
+
|
151 |
+
|
152 |
+
|
153 |
+
Text2Sound_tool_loc = Tool(
|
154 |
+
name = "Text To Sound API 2",
|
155 |
+
# func = Text2Sound,
|
156 |
+
func = text_to_speech_loc2,
|
157 |
+
description = "Useful when you need to convert text into sound file."
|
158 |
+
)
|
159 |
+
|
160 |
|
161 |
class GPTRemote(LLM):
|
162 |
n: int
|
|
|
221 |
|
222 |
GPTfake = GPTRemote(n=0)
|
223 |
|
224 |
+
llm_math_2 = LLMMathChain.from_llm(GPTfake)
|
225 |
+
|
226 |
+
|
227 |
+
math_tool_2 = Tool(
|
228 |
+
name ='Calculator',
|
229 |
+
func = llm_math_2.run,
|
230 |
+
description ='Useful for when you need to answer questions about math.'
|
231 |
+
)
|
232 |
+
|
233 |
+
|
234 |
+
class CodeBlock:
|
235 |
+
'''
|
236 |
+
CodeBlock Class which is able to run in Code Runner
|
237 |
+
'''
|
238 |
+
def __init__(self, code):
|
239 |
+
self.code = code
|
240 |
+
self.output = ""
|
241 |
+
self.active_line = None
|
242 |
+
|
243 |
+
def refresh(self):
|
244 |
+
print(f"Active line: {self.active_line}")
|
245 |
+
print(f"Output: {self.output}")
|
246 |
+
|
247 |
+
|
248 |
+
def Code_Runner(code_raw: str):
|
249 |
+
# interpreter = CodeInterpreter(language="python", debug_mode=True)
|
250 |
+
global CurrentAgent
|
251 |
+
if CurrentAgent == "Zero Short React 2":
|
252 |
+
code_raw = RemoveIndent(code_raw)
|
253 |
+
if '!pip' in code_raw or 'pip install' in code_raw:
|
254 |
+
try:
|
255 |
+
code_raw=code_raw.replace('!pip', 'pip')
|
256 |
+
except Exception as e:
|
257 |
+
print(e)
|
258 |
+
interpreter = CodeInterpreter(language="shell", debug_mode=True)
|
259 |
+
else:
|
260 |
+
interpreter = CodeInterpreter(language="python", debug_mode=True)
|
261 |
+
# interpreter = CodeInterpreter(language=lang, debug_mode=True)
|
262 |
+
code_block = CodeBlock(code_raw)
|
263 |
+
interpreter.active_block = code_block
|
264 |
+
output = interpreter.run()
|
265 |
+
print("Real Output: \n", output)
|
266 |
+
try:
|
267 |
+
if output.strip() =="" or output == []:
|
268 |
+
output = "It is Done. No Error Found."
|
269 |
+
except Exception as e:
|
270 |
+
print(e)
|
271 |
+
return output
|
272 |
+
|
273 |
+
def RemoveIndent(code_string, indentation_level=4):
|
274 |
+
lines = code_string.split('\n')
|
275 |
+
corrected_lines = []
|
276 |
+
for line in lines:
|
277 |
+
if line.strip() == "":
|
278 |
+
continue
|
279 |
+
line_without_indentation = line[indentation_level:] \
|
280 |
+
if line.startswith(' ' * indentation_level) else line
|
281 |
+
corrected_lines.append(line_without_indentation)
|
282 |
+
corrected_content = '\n'.join(corrected_lines)
|
283 |
+
return corrected_content
|
284 |
+
|
285 |
+
python_tool3 = Tool(
|
286 |
+
name = "Code Runner",
|
287 |
+
func = Code_Runner,
|
288 |
+
description = """Code Interpreter which is able to run code block in local machine.\n It is capable to treat **any** task by running the code and output the result. (i.e. analyzer data, modify/creat documents, draw diagram/flowchart ...)\n You should input detail code with right indentation."""
|
289 |
+
)
|
290 |
|
291 |
|
292 |
async def start_playwright(question: str):
|
|
|
355 |
|
356 |
input_variables=["input", "chat_history", "agent_scratchpad"]
|
357 |
|
358 |
+
tools_remote = [DB_Search2(), duckduckgo_tool2, wikipedia_tool2, python_tool3, math_tool_2, Text2Sound_tool_loc]
|
359 |
|
360 |
agent_STRUCTURED_ZEROSHOT_REACT = initialize_agent(tools_remote, GPTfake,
|
361 |
# agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|