wt002 commited on
Commit
9c685c5
·
verified ·
1 Parent(s): ef60401

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -14
app.py CHANGED
@@ -42,6 +42,7 @@ def init_state(question: str):
42
  }
43
 
44
 
 
45
  # ====== Tool Definitions ======
46
  @tool
47
  def duckduckgo_search(query: str) -> str:
@@ -102,6 +103,45 @@ def python_execution(code: str) -> str:
102
  except Exception as e:
103
  return f"Error: {str(e)}"
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def indent_code(code: str) -> str:
106
  return '\n '.join(code.splitlines())
107
 
@@ -149,58 +189,72 @@ def should_continue(state: AgentState) -> str:
149
 
150
  def reasoning_node(state: AgentState) -> AgentState:
151
  import os
 
152
  from langchain_google_genai import ChatGoogleGenerativeAI
153
  from langchain.schema import HumanMessage, AIMessage
154
  from langchain.prompts import ChatPromptTemplate
 
155
 
156
- # Load and verify the Google API key
157
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
158
  if not GOOGLE_API_KEY:
159
  raise ValueError("GOOGLE_API_KEY not set in environment variables.")
160
 
161
- # Ensure history is initialized and ends with a HumanMessage
162
  if "history" not in state or not isinstance(state["history"], list):
163
  state["history"] = []
164
  if not state["history"] or not isinstance(state["history"][-1], HumanMessage):
165
  state["history"].append(HumanMessage(content="Continue."))
166
 
167
- # Ensure context is a dictionary
168
- if not isinstance(state.get("context"), dict):
169
- state["context"] = {}
170
-
171
- # Ensure reasoning and iterations keys are present
172
  state.setdefault("reasoning", "")
173
  state.setdefault("iterations", 0)
174
 
175
- # Initialize Gemini model via LangChain
176
  llm = ChatGoogleGenerativeAI(
177
  model="gemini-1.5-flash",
178
  temperature=0.1,
179
  google_api_key=GOOGLE_API_KEY
180
  )
181
 
182
- # Create prompt
183
  prompt = ChatPromptTemplate.from_messages([
184
  ("system", (
185
  "You're an expert problem solver. Analyze the question, select the best tool, "
186
  "and provide reasoning. Available tools: duckduckgo_search, wikipedia_search, "
187
  "arxiv_search, document_qa, python_execution.\n\n"
 
188
  "Current Context:\n{context}\n\n"
189
  "Reasoning Steps:\n{reasoning}\n\n"
190
- "Response Format:\nReasoning: [Your analysis]\nAction: [Tool name OR 'Final Answer']\n"
 
 
191
  "Action Input: [Input for tool OR final response]"
192
  )),
193
  *state["history"]
194
  ])
195
 
196
- # Invoke model
197
  chain = prompt | llm
198
- response = chain.invoke({
 
 
 
 
 
 
 
 
 
 
 
 
199
  "context": state["context"],
200
  "reasoning": state["reasoning"],
201
  "question": state["question"]
202
  })
203
 
 
204
  content = response.content
205
  reasoning, action, action_input = parse_agent_response(content)
206
 
@@ -209,7 +263,6 @@ def reasoning_node(state: AgentState) -> AgentState:
209
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
210
  state["iterations"] += 1
211
 
212
- # Store either final answer or tool to call
213
  if "final answer" in action.lower():
214
  state["history"].append(AIMessage(content=f"FINAL ANSWER: {action_input}"))
215
  else:
@@ -222,6 +275,7 @@ def reasoning_node(state: AgentState) -> AgentState:
222
 
223
 
224
 
 
225
 
226
 
227
  def tool_node(state: AgentState) -> AgentState:
@@ -316,7 +370,8 @@ class BasicAgent:
316
  wikipedia_search,
317
  arxiv_search,
318
  document_qa,
319
- python_execution
 
320
  ]
321
 
322
  def __call__(self, question: str) -> str:
 
42
  }
43
 
44
 
45
+
46
  # ====== Tool Definitions ======
47
  @tool
48
  def duckduckgo_search(query: str) -> str:
 
103
  except Exception as e:
104
  return f"Error: {str(e)}"
105
 
106
+ from langchain_core.tools import BaseTool
107
+ from youtube_transcript_api import YouTubeTranscriptApi
108
+
109
+ class VideoTranscriptionTool(BaseTool):
110
+ name = "transcript_video"
111
+ description = "Fetch text transcript from YouTube videos using URL or ID. Optionally include timestamps."
112
+
113
+ def _run(self, url: str, include_timestamps: bool = False) -> str:
114
+ video_id = None
115
+ if "youtube.com/watch?v=" in url:
116
+ video_id = url.split("v=")[1].split("&")[0]
117
+ elif "youtu.be/" in url:
118
+ video_id = url.split("youtu.be/")[1].split("?")[0]
119
+ elif len(url.strip()) == 11 and not ("http://" in url or "https://" in url):
120
+ video_id = url.strip()
121
+
122
+ if not video_id:
123
+ return f"Invalid or unsupported YouTube URL/ID: {url}"
124
+
125
+ try:
126
+ transcription = YouTubeTranscriptApi.get_transcript(video_id)
127
+
128
+ if include_timestamps:
129
+ formatted = []
130
+ for part in transcription:
131
+ timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
132
+ formatted.append(f"[{timestamp}] {part['text']}")
133
+ return "\n".join(formatted)
134
+ else:
135
+ return " ".join([part['text'] for part in transcription])
136
+
137
+ except Exception as e:
138
+ return f"Error fetching transcript: {str(e)}"
139
+
140
+ def _arun(self, *args, **kwargs):
141
+ raise NotImplementedError("This tool does not support async yet.")
142
+
143
+
144
+
145
  def indent_code(code: str) -> str:
146
  return '\n '.join(code.splitlines())
147
 
 
189
 
190
  def reasoning_node(state: AgentState) -> AgentState:
191
  import os
192
+ import time
193
  from langchain_google_genai import ChatGoogleGenerativeAI
194
  from langchain.schema import HumanMessage, AIMessage
195
  from langchain.prompts import ChatPromptTemplate
196
+ from google.api_core.exceptions import ResourceExhausted
197
 
198
+ # Load API key
199
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
200
  if not GOOGLE_API_KEY:
201
  raise ValueError("GOOGLE_API_KEY not set in environment variables.")
202
 
203
+ # Ensure history is well-formed
204
  if "history" not in state or not isinstance(state["history"], list):
205
  state["history"] = []
206
  if not state["history"] or not isinstance(state["history"][-1], HumanMessage):
207
  state["history"].append(HumanMessage(content="Continue."))
208
 
209
+ # Ensure context and reasoning fields
210
+ state.setdefault("context", {})
 
 
 
211
  state.setdefault("reasoning", "")
212
  state.setdefault("iterations", 0)
213
 
214
+ # Create Gemini model wrapper
215
  llm = ChatGoogleGenerativeAI(
216
  model="gemini-1.5-flash",
217
  temperature=0.1,
218
  google_api_key=GOOGLE_API_KEY
219
  )
220
 
221
+ # Create prompt chain
222
  prompt = ChatPromptTemplate.from_messages([
223
  ("system", (
224
  "You're an expert problem solver. Analyze the question, select the best tool, "
225
  "and provide reasoning. Available tools: duckduckgo_search, wikipedia_search, "
226
  "arxiv_search, document_qa, python_execution.\n\n"
227
+ "Important: You must select a tool for questions involving video, audio, or code.\n\n"
228
  "Current Context:\n{context}\n\n"
229
  "Reasoning Steps:\n{reasoning}\n\n"
230
+ "Response Format:\n"
231
+ "Reasoning: [Your analysis]\n"
232
+ "Action: [Tool name OR 'Final Answer']\n"
233
  "Action Input: [Input for tool OR final response]"
234
  )),
235
  *state["history"]
236
  ])
237
 
 
238
  chain = prompt | llm
239
+
240
+ # === Add Retry Logic ===
241
+ def call_with_retry(inputs, retries=3, delay=60):
242
+ for attempt in range(retries):
243
+ try:
244
+ return chain.invoke(inputs)
245
+ except ResourceExhausted as e:
246
+ print(f"[Retry {attempt+1}] Gemini rate limit hit. Waiting {delay}s...")
247
+ time.sleep(delay)
248
+ raise RuntimeError("Failed after multiple retries due to Gemini quota limit.")
249
+
250
+ # Call model with retry protection
251
+ response = call_with_retry({
252
  "context": state["context"],
253
  "reasoning": state["reasoning"],
254
  "question": state["question"]
255
  })
256
 
257
+ # Parse output
258
  content = response.content
259
  reasoning, action, action_input = parse_agent_response(content)
260
 
 
263
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
264
  state["iterations"] += 1
265
 
 
266
  if "final answer" in action.lower():
267
  state["history"].append(AIMessage(content=f"FINAL ANSWER: {action_input}"))
268
  else:
 
275
 
276
 
277
 
278
+
279
 
280
 
281
  def tool_node(state: AgentState) -> AgentState:
 
370
  wikipedia_search,
371
  arxiv_search,
372
  document_qa,
373
+ python_execution,
374
+ VideoTranscriptionTool()
375
  ]
376
 
377
  def __call__(self, question: str) -> str: