OuroborosM commited on
Commit
daf02fd
·
1 Parent(s): 200f9e3

add full tool

Browse files
Files changed (1) hide show
  1. app.py +201 -2
app.py CHANGED
@@ -19,11 +19,144 @@ from langchain.memory import ConversationBufferWindowMemory
19
  from langchain.prompts import MessagesPlaceholder
20
  from langchain.agents import ConversationalChatAgent, AgentExecutor
21
  from langchain.callbacks import StreamlitCallbackHandler
22
-
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  global CurrentAgent
25
  CurrentAgent = 'Structured Zero Short Agent'
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  class GPTRemote(LLM):
29
  n: int
@@ -88,6 +221,72 @@ class GPTRemote(LLM):
88
 
89
  GPTfake = GPTRemote(n=0)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
  async def start_playwright(question: str):
@@ -156,7 +355,7 @@ memory3 = ConversationBufferWindowMemory(memory_key="chat_history", return_messa
156
 
157
  input_variables=["input", "chat_history", "agent_scratchpad"]
158
 
159
- tools_remote = []
160
 
161
  agent_STRUCTURED_ZEROSHOT_REACT = initialize_agent(tools_remote, GPTfake,
162
  # agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
 
19
  from langchain.prompts import MessagesPlaceholder
20
  from langchain.agents import ConversationalChatAgent, AgentExecutor
21
  from langchain.callbacks import StreamlitCallbackHandler
22
+ from langchain.chains import RetrievalQA
23
+ import pinecone
24
+ from langchain.vectorstores import Pinecone
25
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
26
+ from langchain.tools import DuckDuckGoSearchRun
27
+ from langchain.utilities import WikipediaAPIWrapper
28
+ import soundfile as sf
29
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
30
+ from datasets import load_dataset
31
+ import torch
32
+ from langchain.chains import LLMMathChain
33
+ from interpreter.code_interpreter import CodeInterpreter
34
 
35
  global CurrentAgent
36
  CurrentAgent = 'Structured Zero Short Agent'
37
 
38
+ class DB_Search2(BaseTool):
39
+ name = "Vector Database Search"
40
+ description = "This is the internal vector database to search information firstly (i.e. engineering data, acronym.)"
41
+ def _run(self, query: str) -> str:
42
+ response, source = QAQuery_p(query)
43
+ # response = "test db_search feedback"
44
+ return response
45
+
46
+ def _arun(self, query: str):
47
+ raise NotImplementedError("N/A")
48
+
49
+ pinecone.init(
50
+ api_key = os.environ["pinecone_api_key"],
51
+ # environment='asia-southeast1-gcp-free',
52
+ environment='us-west4-gcp-free',
53
+ # openapi_config=openapi_config
54
+ )
55
+ # index_name = 'stla-baby'
56
+ global index_name
57
+ index_name = 'stla-back'
58
+ index = pinecone.Index(index_name)
59
+ # index.delete(delete_all=True, namespace='')
60
+ print(pinecone.whoami())
61
+ print(index.describe_index_stats())
62
+
63
+ embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
64
+ device = 'cpu'
65
+ embeddings_miniLM = HuggingFaceEmbeddings(
66
+ model_name=embed_model_id,
67
+ model_kwargs={'device': device},
68
+ )
69
+
70
+ # embeddings = embeddings_openai
71
+ embeddings = embeddings_miniLM
72
+
73
+
74
+ global vectordb_p
75
+ vectordb_p = Pinecone.from_existing_index(index_name, embeddings)
76
+
77
+ def QAQuery_p(question: str):
78
+ global vectordb_p
79
+ global agent
80
+ # global Choice
81
+ global CurrentAgent
82
+ # vectordb = Chroma(persist_directory='db', embedding_function=embeddings)
83
+ retriever = vectordb_p.as_retriever()
84
+ retriever.search_kwargs['k'] = int(os.environ["search_kwargs_k"])
85
+ # retriever.search_kwargs['fetch_k'] = 100
86
+ # if agent == agent_ZEROSHOT_REACT_2 or agent == agent_ZEROSHOT_AGENT_2:
87
+ if 1:
88
+ print("--------------- QA with Remote --------------")
89
+ qa = RetrievalQA.from_chain_type(llm=GPTfake, chain_type="stuff",
90
+ retriever=retriever, return_source_documents = True,
91
+ verbose = True)
92
+ else:
93
+ pass
94
+ # qa = VectorDBQA.from_chain_type(llm=chat, chain_type="stuff", vectorstore=vectordb, return_source_documents=True)
95
+ # res = qa.run(question)
96
+ res = qa({"query": question})
97
+
98
+ print("-" * 20)
99
+ # print("Question:", question)
100
+ # print("Answer:", res)
101
+ # print("Answer:", res['result'])
102
+ print("-" * 20)
103
+ # print("Source:", res['source_documents'])
104
+ response = res['result']
105
+ # response = res['source_documents']
106
+ source = res['source_documents']
107
+ return response, source
108
+
109
+ Netsearch = DuckDuckGoSearchRun()
110
+ duckduckgo_tool2 = Tool(
111
+ name = "Duckduckgo Internet Search",
112
+ func = Netsearch.run,
113
+ description = "Useful to search in internet for real-time information and additional information which is not available in other tools"
114
+ )
115
+
116
+ Wikipedia = WikipediaAPIWrapper()
117
+ wikipedia_tool2 = Tool(
118
+ name = "Wikipedia Search",
119
+ func = Wikipedia.run,
120
+ description = "Useful to search a topic, country or person when there is no availble information in vector database"
121
+ )
122
+
123
+
124
+
125
+
126
+
127
+ def text_to_speech_loc2(Text_input):
128
+ global Audio_output
129
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
130
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
131
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
132
+
133
+ inputs = processor(text = Text_input, return_tensors="pt")
134
+
135
+ # load xvector containing speaker's voice characteristics from a dataset
136
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
137
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
138
+
139
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
140
+ print("Type of speech: ", type(speech))
141
+
142
+ timestr = time.strftime("%Y%m%d-%H%M%S")
143
+ # sampling_rate = 16000
144
+ with open('sample-' + timestr + '.wav', 'wb') as audio:
145
+ sf.write(audio, speech.numpy(), samplerate=16000)
146
+ # audio = sf.write("convert1.wav", speech, samplerate=16000)
147
+ print("audio: ", audio)
148
+ Audio_output.append(audio.name)
149
+ return audio
150
+
151
+
152
+
153
+ Text2Sound_tool_loc = Tool(
154
+ name = "Text To Sound API 2",
155
+ # func = Text2Sound,
156
+ func = text_to_speech_loc2,
157
+ description = "Useful when you need to convert text into sound file."
158
+ )
159
+
160
 
161
  class GPTRemote(LLM):
162
  n: int
 
221
 
222
  GPTfake = GPTRemote(n=0)
223
 
224
+ llm_math_2 = LLMMathChain.from_llm(GPTfake)
225
+
226
+
227
+ math_tool_2 = Tool(
228
+ name ='Calculator',
229
+ func = llm_math_2.run,
230
+ description ='Useful for when you need to answer questions about math.'
231
+ )
232
+
233
+
234
+ class CodeBlock:
235
+ '''
236
+ CodeBlock Class which is able to run in Code Runner
237
+ '''
238
+ def __init__(self, code):
239
+ self.code = code
240
+ self.output = ""
241
+ self.active_line = None
242
+
243
+ def refresh(self):
244
+ print(f"Active line: {self.active_line}")
245
+ print(f"Output: {self.output}")
246
+
247
+
248
+ def Code_Runner(code_raw: str):
249
+ # interpreter = CodeInterpreter(language="python", debug_mode=True)
250
+ global CurrentAgent
251
+ if CurrentAgent == "Zero Short React 2":
252
+ code_raw = RemoveIndent(code_raw)
253
+ if '!pip' in code_raw or 'pip install' in code_raw:
254
+ try:
255
+ code_raw=code_raw.replace('!pip', 'pip')
256
+ except Exception as e:
257
+ print(e)
258
+ interpreter = CodeInterpreter(language="shell", debug_mode=True)
259
+ else:
260
+ interpreter = CodeInterpreter(language="python", debug_mode=True)
261
+ # interpreter = CodeInterpreter(language=lang, debug_mode=True)
262
+ code_block = CodeBlock(code_raw)
263
+ interpreter.active_block = code_block
264
+ output = interpreter.run()
265
+ print("Real Output: \n", output)
266
+ try:
267
+ if output.strip() =="" or output == []:
268
+ output = "It is Done. No Error Found."
269
+ except Exception as e:
270
+ print(e)
271
+ return output
272
+
273
+ def RemoveIndent(code_string, indentation_level=4):
274
+ lines = code_string.split('\n')
275
+ corrected_lines = []
276
+ for line in lines:
277
+ if line.strip() == "":
278
+ continue
279
+ line_without_indentation = line[indentation_level:] \
280
+ if line.startswith(' ' * indentation_level) else line
281
+ corrected_lines.append(line_without_indentation)
282
+ corrected_content = '\n'.join(corrected_lines)
283
+ return corrected_content
284
+
285
+ python_tool3 = Tool(
286
+ name = "Code Runner",
287
+ func = Code_Runner,
288
+ description = """Code Interpreter which is able to run code block in local machine.\n It is capable to treat **any** task by running the code and output the result. (i.e. analyzer data, modify/creat documents, draw diagram/flowchart ...)\n You should input detail code with right indentation."""
289
+ )
290
 
291
 
292
  async def start_playwright(question: str):
 
355
 
356
  input_variables=["input", "chat_history", "agent_scratchpad"]
357
 
358
+ tools_remote = [DB_Search2(), duckduckgo_tool2, wikipedia_tool2, python_tool3, math_tool_2, Text2Sound_tool_loc]
359
 
360
  agent_STRUCTURED_ZEROSHOT_REACT = initialize_agent(tools_remote, GPTfake,
361
  # agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,