wt002 commited on
Commit
396779e
·
verified ·
1 Parent(s): dff9f1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -29
app.py CHANGED
@@ -148,6 +148,8 @@ class VideoTranscriptionTool(BaseTool):
148
 
149
 
150
 
 
 
151
  import os
152
  import time
153
  import json
@@ -203,20 +205,22 @@ class VideoTranscriptionTool:
203
  # --- Agent State Definition ---
204
  class AgentState(TypedDict):
205
  question: str
206
- history: List[Union[HumanMessage, AIMessage]] # History only contains proper messages
207
- context: Dict[str, Any] # Use context for internal agent state
208
  reasoning: str
209
  iterations: int
210
  final_answer: Union[str, float, int, None]
211
  current_task: str
212
  current_thoughts: str
213
- tools: List[Tool] # Pass tools into state
214
 
215
 
216
  # --- Utility Functions ---
217
  def parse_agent_response(response_content: str) -> tuple[str, str, str]:
218
  """
219
  Parses the LLM's JSON output for reasoning, action, and action input.
 
 
220
  """
221
  try:
222
  response_json = json.loads(response_content)
@@ -225,17 +229,46 @@ def parse_agent_response(response_content: str) -> tuple[str, str, str]:
225
  action_input = response_json.get("Action Input", "").strip()
226
  return reasoning, action, action_input
227
  except json.JSONDecodeError:
228
- print(f"WARNING: LLM response not perfectly JSON: {response_content[:200]}...")
229
- # Fallback heuristic parsing (less reliable but better than nothing)
230
- reasoning_match = response_content.split("Reasoning:", 1)
231
- reasoning = reasoning_match[1].split("Action:", 1)[0].strip() if len(reasoning_match) > 1 else ""
232
-
233
- action_part_match = response_content.split("Action:", 1)
234
- action_part = action_part_match[1].strip() if len(action_part_match) > 1 else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
- action_input_match = action_part.split("Action Input:", 1)
237
- action = action_input_match[0].strip()
238
- action_input = action_input_match[1].strip() if len(action_input_match) > 1 else ""
239
  return reasoning, action, action_input
240
 
241
 
@@ -247,17 +280,14 @@ def should_continue(state: AgentState) -> str:
247
  """
248
  print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
249
 
250
- # End if agent has produced a final answer
251
- if state.get("final_answer") is not None: # Check for None explicitly
252
  print("DEBUG: should_continue -> END (Final Answer set in state)")
253
  return "end"
254
 
255
- # Check if a tool action is pending in context
256
  if state.get("context", {}).get("pending_action"):
257
  print("DEBUG: should_continue -> ACTION (Pending action in context)")
258
  return "action"
259
 
260
- # Otherwise, go back to reasoning (e.g., after initial question, or after tool output)
261
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
262
  return "reason"
263
 
@@ -270,11 +300,9 @@ def reasoning_node(state: AgentState) -> AgentState:
270
  print(f"DEBUG: Entering reasoning_node. Iteration: {state['iterations']}")
271
  print(f"DEBUG: Current history length: {len(state.get('history', []))}")
272
 
273
- # --- CHANGE: Use HF_TOKEN environment variable ---
274
  HF_TOKEN = os.getenv("HF_TOKEN")
275
  if not HF_TOKEN:
276
  raise ValueError("HF_TOKEN not set in environment variables.")
277
- # --- END CHANGE ---
278
 
279
  state.setdefault("context", {})
280
  state.setdefault("reasoning", "")
@@ -289,9 +317,9 @@ def reasoning_node(state: AgentState) -> AgentState:
289
  llm = ChatHuggingFace(
290
  llm=HuggingFaceEndpoint(
291
  repo_id=model_id,
292
- max_new_tokens=1024, # 512
293
  temperature=0.1,
294
- huggingfacehub_api_token=HF_TOKEN, # --- CHANGE: Pass HF_TOKEN here ---
295
  )
296
  )
297
 
@@ -340,14 +368,20 @@ def reasoning_node(state: AgentState) -> AgentState:
340
  for attempt in range(retries):
341
  try:
342
  response = chain.invoke(inputs)
343
- json.loads(response.content)
 
 
 
344
  return response
345
  except json.JSONDecodeError as e:
346
- print(f"[Retry {attempt+1}/{retries}] LLM returned invalid JSON. Retrying...")
347
- print(f"Invalid JSON content: {response.content[:200]}...")
 
 
348
  time.sleep(5)
349
  except Exception as e:
350
  print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during LLM call: {e}. Waiting {delay}s...")
 
351
  time.sleep(delay)
352
  raise RuntimeError("Failed after multiple retries due to Hugging Face API issues or invalid JSON.")
353
 
@@ -362,10 +396,13 @@ def reasoning_node(state: AgentState) -> AgentState:
362
  content = response.content
363
  reasoning, action, action_input = parse_agent_response(content)
364
 
365
- print(f"DEBUG: LLM Raw Response Content: {content[:200]}...")
366
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
367
 
368
- state["history"].append(AIMessage(content=content))
 
 
 
 
369
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
370
  state["iterations"] += 1
371
  state["current_thoughts"] = reasoning
@@ -377,6 +414,8 @@ def reasoning_node(state: AgentState) -> AgentState:
377
  "tool": action,
378
  "input": action_input
379
  }
 
 
380
  state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
381
 
382
 
@@ -403,7 +442,7 @@ def tool_node(state: AgentState) -> AgentState:
403
 
404
  if not tool_name or tool_input is None:
405
  error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty. LLM needs to provide valid 'Action' and 'Action Input'."
406
- print(f"ERROR: {error_message}")
407
  state["history"].append(AIMessage(content=error_message))
408
  state["context"].pop("pending_action", None)
409
  return state
@@ -491,8 +530,14 @@ class BasicAgent:
491
  return answer
492
  else:
493
  print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
494
- raise ValueError("Agent finished without providing a final answer.")
495
-
 
 
 
 
 
 
496
 
497
 
498
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
148
 
149
 
150
 
151
+
152
+
153
  import os
154
  import time
155
  import json
 
205
  # --- Agent State Definition ---
206
  class AgentState(TypedDict):
207
  question: str
208
+ history: List[Union[HumanMessage, AIMessage]]
209
+ context: Dict[str, Any]
210
  reasoning: str
211
  iterations: int
212
  final_answer: Union[str, float, int, None]
213
  current_task: str
214
  current_thoughts: str
215
+ tools: List[Tool]
216
 
217
 
218
  # --- Utility Functions ---
219
  def parse_agent_response(response_content: str) -> tuple[str, str, str]:
220
  """
221
  Parses the LLM's JSON output for reasoning, action, and action input.
222
+ Returns (reasoning, action, action_input).
223
+ If JSON parsing fails, it attempts heuristic parsing.
224
  """
225
  try:
226
  response_json = json.loads(response_content)
 
229
  action_input = response_json.get("Action Input", "").strip()
230
  return reasoning, action, action_input
231
  except json.JSONDecodeError:
232
+ print(f"WARNING: JSONDecodeError: LLM response was not valid JSON. Attempting heuristic parse: {response_content[:200]}...")
233
+ # Heuristic parsing for non-JSON or partial JSON responses
234
+ reasoning = ""
235
+ action = ""
236
+ action_input = ""
237
+
238
+ # Attempt to find Reasoning
239
+ reasoning_idx = response_content.find("Reasoning:")
240
+ action_idx = response_content.find("Action:")
241
+ if reasoning_idx != -1 and action_idx != -1 and reasoning_idx < action_idx:
242
+ reasoning = response_content[reasoning_idx + len("Reasoning:"):action_idx].strip()
243
+ # Clean up leading/trailing quotes if present
244
+ if reasoning.startswith('"') and reasoning.endswith('"'):
245
+ reasoning = reasoning[1:-1]
246
+ elif reasoning_idx != -1: # If only reasoning is found
247
+ reasoning = response_content[reasoning_idx + len("Reasoning:"):].strip()
248
+ if reasoning.startswith('"') and reasoning.endswith('"'):
249
+ reasoning = reasoning[1:-1]
250
+
251
+
252
+ # Attempt to find Action and Action Input
253
+ if action_idx != -1:
254
+ action_input_idx = response_content.find("Action Input:", action_idx)
255
+ if action_input_idx != -1:
256
+ action_part = response_content[action_idx + len("Action:"):action_input_idx].strip()
257
+ action = action_part
258
+ action_input = response_content[action_input_idx + len("Action Input:"):].strip()
259
+ else:
260
+ action = response_content[action_idx + len("Action:"):].strip()
261
+
262
+ # Clean up action and action_input
263
+ if action.startswith('"') and action.endswith('"'):
264
+ action = action[1:-1]
265
+ if action_input.startswith('"') and action_input.endswith('"'):
266
+ action_input = action_input[1:-1]
267
+
268
+ # Final cleanup for any trailing JSON artifacts if heuristic grabs too much
269
+ action = action.split('"', 1)[0].strip() # Stop at first quote for safety if it's "Action": "tool_name",
270
+ action_input = action_input.split('"', 1)[0].strip() # Similar for input
271
 
 
 
 
272
  return reasoning, action, action_input
273
 
274
 
 
280
  """
281
  print(f"DEBUG: Entering should_continue. Current context: {state.get('context', {})}")
282
 
283
+ if state.get("final_answer") is not None:
 
284
  print("DEBUG: should_continue -> END (Final Answer set in state)")
285
  return "end"
286
 
 
287
  if state.get("context", {}).get("pending_action"):
288
  print("DEBUG: should_continue -> ACTION (Pending action in context)")
289
  return "action"
290
 
 
291
  print("DEBUG: should_continue -> REASON (Default to reasoning)")
292
  return "reason"
293
 
 
300
  print(f"DEBUG: Entering reasoning_node. Iteration: {state['iterations']}")
301
  print(f"DEBUG: Current history length: {len(state.get('history', []))}")
302
 
 
303
  HF_TOKEN = os.getenv("HF_TOKEN")
304
  if not HF_TOKEN:
305
  raise ValueError("HF_TOKEN not set in environment variables.")
 
306
 
307
  state.setdefault("context", {})
308
  state.setdefault("reasoning", "")
 
317
  llm = ChatHuggingFace(
318
  llm=HuggingFaceEndpoint(
319
  repo_id=model_id,
320
+ max_new_tokens=1024, # Increased max_new_tokens
321
  temperature=0.1,
322
+ huggingfacehub_api_token=HF_TOKEN,
323
  )
324
  )
325
 
 
368
  for attempt in range(retries):
369
  try:
370
  response = chain.invoke(inputs)
371
+ # --- NEW DEBUGGING PRINT ---
372
+ print(f"DEBUG: RAW LLM Response (Attempt {attempt+1}):\n---\n{response.content}\n---")
373
+ # --- END NEW DEBUGGING PRINT ---
374
+ json.loads(response.content) # Attempt to parse to validate structure
375
  return response
376
  except json.JSONDecodeError as e:
377
+ print(f"[Retry {attempt+1}/{retries}] LLM returned invalid JSON. Error: {e}. Retrying...")
378
+ print(f"Invalid JSON content (partial): {response.content[:200]}...")
379
+ # Add specific error message to history to guide LLM
380
+ state["history"].append(AIMessage(content=f"[Parsing Error] The previous LLM output was not valid JSON. Expected format: ```json{{\"Reasoning\": \"...\", \"Action\": \"...\", \"Action Input\": \"...\"}}```. Please ensure your response is ONLY valid JSON and strictly follows the format. Error: {e}"))
381
  time.sleep(5)
382
  except Exception as e:
383
  print(f"[Retry {attempt+1}/{retries}] An unexpected error occurred during LLM call: {e}. Waiting {delay}s...")
384
+ state["history"].append(AIMessage(content=f"[API Error] Failed to get a response from the LLM due to an API error: {e}. Trying again."))
385
  time.sleep(delay)
386
  raise RuntimeError("Failed after multiple retries due to Hugging Face API issues or invalid JSON.")
387
 
 
396
  content = response.content
397
  reasoning, action, action_input = parse_agent_response(content)
398
 
 
399
  print(f"DEBUG: Parsed Action: '{action}', Action Input: '{action_input[:100]}...'")
400
 
401
+ # Only append the LLM's raw output if it was valid JSON and processed successfully
402
+ # Otherwise, the specific error message from the retry loop will already be in history.
403
+ if isinstance(response, AIMessage) and content == response.content: # Check if it's the original response, not an error message
404
+ state["history"].append(AIMessage(content=content))
405
+
406
  state["reasoning"] += f"\nStep {state['iterations'] + 1}: {reasoning}"
407
  state["iterations"] += 1
408
  state["current_thoughts"] = reasoning
 
414
  "tool": action,
415
  "input": action_input
416
  }
417
+ # Add a message to history to indicate the agent's intent for the LLM
418
+ # This will be shown to the LLM in the next turn.
419
  state["history"].append(AIMessage(content=f"Agent decided to use tool: {action} with input: {action_input}"))
420
 
421
 
 
442
 
443
  if not tool_name or tool_input is None:
444
  error_message = f"[Tool Error] Invalid action request from LLM: Tool name '{tool_name}' or input '{tool_input}' was empty. LLM needs to provide valid 'Action' and 'Action Input'."
445
+ print(f"ERROR: {error_output}") # Print the error message for debugging
446
  state["history"].append(AIMessage(content=error_message))
447
  state["context"].pop("pending_action", None)
448
  return state
 
530
  return answer
531
  else:
532
  print(f"--- ERROR: Agent finished without setting 'final_answer' for question: {question} ---")
533
+ # You might want to return the last message from history if no final answer was set
534
+ if final_state["history"]:
535
+ last_message = final_state["history"][-1].content
536
+ print(f"Last message in history: {last_message}")
537
+ return f"Agent could not fully answer. Last message: {last_message}"
538
+ else:
539
+ raise ValueError("Agent finished without providing a final answer and no history messages.")
540
+
541
 
542
 
543
  def run_and_submit_all( profile: gr.OAuthProfile | None):