sanjeed5 commited on
Commit
15e92df
·
1 Parent(s): fdba468

Refactor agent.py to utilize LiteLLMModel for enhanced image processing capabilities. Implement improved file handling for various file types, including images and Excel files, allowing the agent to process content directly from provided files. Update task descriptions for clarity and error handling for file reading issues.

Browse files
Files changed (1) hide show
  1. agent.py +103 -50
agent.py CHANGED
@@ -1,20 +1,26 @@
1
- from smolagents import CodeAgent, InferenceClientModel, WebSearchTool
 
 
2
  import os
3
  from dotenv import load_dotenv
 
 
 
4
 
5
  load_dotenv()
6
 
7
- # Configure the underlying LLM model for the agent
8
- smol_model = InferenceClientModel(token=os.getenv("HF_TOKEN")) # You can choose to not pass any model_id to InferenceClientModel to use a default model
9
- # you can also specify a particular provider e.g. provider="together" or provider="sambanova"
10
 
11
- # Instantiate the CodeAgent with authorized imports for file operations
12
- # WebSearchTool is kept as an example, you might want to add/remove tools as needed.
13
  smol_code_agent_instance = CodeAgent(
14
- tools=[WebSearchTool()],
15
  model=smol_model,
16
- add_base_tools=True,
17
- additional_authorized_imports=["os", "io"] # Authorize os and io for file access
 
 
18
  )
19
 
20
  # This is the agent class that app.py will instantiate and use.
@@ -22,55 +28,102 @@ smol_code_agent_instance = CodeAgent(
22
  class BasicAgent:
23
  def __init__(self):
24
  self.smol_agent = smol_code_agent_instance
25
- print("BasicAgent (using smolagents.CodeAgent) initialized.")
26
 
27
  def __call__(self, question: str, file_path: str | None = None) -> str:
28
  print(f"BasicAgent received question (first 100 chars): {question[:100]}...")
29
 
30
- task_description = question
31
- run_additional_args = {}
 
32
 
33
- if file_path:
34
  print(f"BasicAgent received file_path: {file_path}")
35
- # Augment the task description to inform the agent about the file
36
- # and how to access it via additional_args.
37
- task_description += (
38
- f"\n\nA file relevant to this task has been provided. "
39
- f"You can access this file's content using standard Python file operations (e.g., open(), read()). "
40
- f"The path to this file is available in the 'additional_args' dictionary, passed to the run method, under the key 'task_file'. "
41
- f"The value is: '{file_path}'."
42
- )
43
- run_additional_args["additional_args"] = {"task_file": file_path}
44
- print(f"Passing to smolagent: task='{task_description[:150]}...', additional_args={run_additional_args['additional_args']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  else:
46
- print("BasicAgent received no file_path.")
47
- print(f"Passing to smolagent: task='{task_description[:150]}...'")
 
 
 
 
 
 
 
48
 
49
  try:
50
- # The CodeAgent's .run() method returns the final answer.
51
- # Pass additional_args if they exist.
52
- if run_additional_args:
53
- answer = self.smol_agent.run(task_description, **run_additional_args)
54
- else:
55
- answer = self.smol_agent.run(task_description)
56
-
57
- print(f"smolagents.CodeAgent returned: {str(answer)[:200]}...") # Log a snippet of the answer
58
- return str(answer) # Ensure the output is a string
59
  except Exception as e:
60
  print(f"Error during smolagents.CodeAgent run: {e}")
61
- # It's often good to return a more informative error message
62
- return f"AGENT_ERROR: An error occurred while processing with CodeAgent: {str(e)}"
63
-
64
- # Example test run (optional, can be commented out for deployment)
65
- # if __name__ == '__main__':
66
- # print("Testing CodeAgent with a simple task...")
67
- # test_agent = BasicAgent()
68
- # # Test without file
69
- # # response = test_agent("What is the capital of France?")
70
- # # print(f"Test response (no file): {response}")
71
- # # To test with a file, you'd create a dummy file and pass its path:
72
- # # with open("dummy_test_file.txt", "w") as f:
73
- # # f.write("This is a test file for the agent.")
74
- # # response_with_file = test_agent("Summarize the content of the provided file.", file_path="dummy_test_file.txt")
75
- # # print(f"Test response (with file): {response_with_file}")
76
- # # os.remove("dummy_test_file.txt")
 
1
+ import json
2
+ from smolagents import CodeAgent, LiteLLMModel
3
+ from tools import perplexity_search_tool
4
  import os
5
  from dotenv import load_dotenv
6
+ from PIL import Image # Added for loading images
7
+ import pandas as pd # Added for reading Excel
8
+ import io # Added for StringIO
9
 
10
  load_dotenv()
11
 
12
+ # Configure the underlying LLM model for the agent - using a vision-capable model
13
+ smol_model = LiteLLMModel(model_id="gemini/gemini-2.5-flash-preview-04-17", api_key=os.getenv("GEMINI_API_KEY"))
 
14
 
15
+ # Instantiate the CodeAgent with authorized imports
16
+ # Removed base64, litellm. Kept PIL for now, though framework might handle image display.
17
  smol_code_agent_instance = CodeAgent(
18
+ tools=[perplexity_search_tool],
19
  model=smol_model,
20
+ add_base_tools=False,
21
+ # Authorized imports for the agent's generated code if it needs to manipulate images/files.
22
+ # PIL might be needed if agent code directly interacts with image objects.
23
+ additional_authorized_imports=["os", "io", "requests", "pandas", "PIL", "pydub", "subprocess"]
24
  )
25
 
26
  # This is the agent class that app.py will instantiate and use.
 
28
  class BasicAgent:
29
  def __init__(self):
30
  self.smol_agent = smol_code_agent_instance
31
+ print("BasicAgent (using smolagents.CodeAgent with direct image passing and content pre-loading) initialized.")
32
 
33
  def __call__(self, question: str, file_path: str | None = None) -> str:
34
  print(f"BasicAgent received question (first 100 chars): {question[:100]}...")
35
 
36
+ task_description = question # Default to original question
37
+ pil_images = []
38
+ run_kwargs = {} # Will hold {'images': pil_images} or {'additional_args': ...} or be empty
39
 
40
+ if file_path and os.path.exists(file_path):
41
  print(f"BasicAgent received file_path: {file_path}")
42
+ file_extension = os.path.splitext(file_path)[1].lower()
43
+ try:
44
+ # Attempt to load as image first
45
+ img = Image.open(file_path).convert("RGB")
46
+ pil_images.append(img)
47
+ run_kwargs["images"] = pil_images
48
+
49
+ task_description = f"""Please answer the following question: "{question}"
50
+
51
+ An image has been provided to help answer this question. The image is directly available to you for analysis. Focus on interpreting the visual content of the image in relation to the question."""
52
+ print(f"Passing PIL image to smolagent. Task: '{task_description[:150]}...'")
53
+
54
+ except IOError: # Not a standard image or unreadable by PIL
55
+ print(f"File {file_path} is not a PIL-compatible image. Attempting other file type processing.")
56
+ try:
57
+ if file_extension == ".xlsx":
58
+ df = pd.read_excel(file_path)
59
+ # Convert dataframe to CSV string to pass as content
60
+ excel_content_str = df.to_csv(index=False)
61
+ run_kwargs["additional_args"] = {"task_file_content": excel_content_str, "file_type": "excel_csv_string"}
62
+ task_description = f"""Please answer the following question: "{question}"
63
+
64
+ An Excel file's content (converted to CSV format) has been provided in 'additional_args' under the key 'task_file_content'.
65
+ You can process this CSV string, for example, by using pandas: pandas.read_csv(io.StringIO(task_file_content)).
66
+ The original file name was: {os.path.basename(file_path)}"""
67
+ print(f"Passing Excel content (as CSV string) via additional_args. Task: '{task_description[:200]}...'")
68
+ elif file_extension == ".mp3": # Example for mp3, passing as bytes
69
+ with open(file_path, "rb") as f:
70
+ file_bytes = f.read()
71
+ run_kwargs["additional_args"] = {"task_file_content": file_bytes, "file_type": "mp3_bytes"}
72
+ task_description = f"""Please answer the following question: "{question}"
73
+
74
+ An MP3 file's content (as bytes) has been provided in 'additional_args' under the key 'task_file_content'.
75
+ The original file name was: {os.path.basename(file_path)}.
76
+ Note: You will need appropriate tools/capabilities to process raw audio bytes for tasks like transcription."""
77
+ print(f"Passing MP3 file content (as bytes) via additional_args. Task: '{task_description[:200]}...'")
78
+ else: # Generic binary/text file
79
+ with open(file_path, "rb") as f: # Read as binary by default
80
+ file_bytes = f.read()
81
+ run_kwargs["additional_args"] = {"task_file_content": file_bytes, "file_type": "generic_bytes"}
82
+ # Try to decode as text for the task description if it's small, otherwise indicate binary
83
+ try:
84
+ preview_text = file_bytes.decode('utf-8', errors='ignore')[:200]
85
+ file_content_description = f"The file content (as bytes, starting with: '{preview_text}...') has been provided"
86
+ except:
87
+ file_content_description = "The file content (as binary bytes) has been provided"
88
+
89
+ task_description = f"""Please answer the following question: "{question}"
90
+
91
+ A file's content has been provided in 'additional_args' under the key 'task_file_content' (type: {run_kwargs['additional_args']['file_type']}).
92
+ {file_content_description}.
93
+ The original file name was: {os.path.basename(file_path)}."""
94
+ print(f"Passing generic file content (as bytes) via additional_args. Task: '{task_description[:200]}...'")
95
+
96
+ except Exception as e_read:
97
+ print(f"Error reading content from {file_path} after PIL failure: {e_read}")
98
+ task_description = f"""Regarding the question: "{question}"
99
+ A file was provided at '{file_path}', but an error occurred while trying to read its content: {e_read}.
100
+ Please attempt to answer based on the text query alone. If the file was essential, state it could not be processed."""
101
+ # run_kwargs remains as is, or cleared if preferred. Let's clear additional_args if read fails.
102
+ if "additional_args" in run_kwargs: del run_kwargs["additional_args"]
103
+
104
+ except Exception as e_pil: # Catch other PIL errors if any beyond IOError
105
+ print(f"An unexpected PIL error occurred while processing file {file_path}: {e_pil}")
106
+ task_description = f"""Regarding the question: "{question}"
107
+ A file was initially provided at '{file_path}', but an error occurred during its image processing: {e_pil}.
108
+ Please attempt to answer the question based on the text query alone if possible. If the file was essential, please state that the file could not be processed."""
109
  else:
110
+ if file_path: # File path provided but does not exist
111
+ print(f"BasicAgent received file_path: {file_path}, but it does not exist. Proceeding without file content.")
112
+ task_description = f"""Regarding the question: "{question}"
113
+ A file was expected at '{file_path}', but it was not found.
114
+ Please answer the question based on the text query alone if possible. If the file was essential, please state that the file was missing."""
115
+ else: # No file path provided at all
116
+ print("BasicAgent received no file_path. Task is based on question only.")
117
+
118
+ print(f"Final task description for smolagent (first 300 chars): {task_description[:300]}...")
119
 
120
  try:
121
+ answer = self.smol_agent.run(task_description, **run_kwargs)
122
+ print(f"smolagents.CodeAgent returned: {str(answer)[:200]}...")
123
+ return str(answer)
 
 
 
 
 
 
124
  except Exception as e:
125
  print(f"Error during smolagents.CodeAgent run: {e}")
126
+ # Be more specific about the error type if possible
127
+ return f"AGENT_ERROR: An error occurred in CodeAgent ({type(e).__name__}): {str(e)}"
128
+
129
+