File size: 8,067 Bytes
15e92df
 
 
a2cbab6
 
15e92df
 
 
a2cbab6
 
 
15e92df
2683590
a2cbab6
15e92df
 
097f999
15e92df
097f999
15e92df
 
 
 
a2cbab6
 
097f999
 
a2cbab6
 
097f999
15e92df
097f999
 
 
 
15e92df
 
 
097f999
15e92df
097f999
15e92df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
097f999
15e92df
 
 
 
 
 
 
 
 
097f999
 
15e92df
 
 
097f999
 
15e92df
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import json
from smolagents import CodeAgent, LiteLLMModel
from tools import perplexity_search_tool
import os
from dotenv import load_dotenv
from PIL import Image # Added for loading images
import pandas as pd # Added for reading Excel
import io # Added for StringIO

load_dotenv()

# Configure the underlying LLM model for the agent - using a vision-capable model
smol_model = LiteLLMModel(model_id="openai/gpt-4.1", api_key=os.getenv("OPENAI_API_KEY"))

# Instantiate the CodeAgent with authorized imports
# Removed base64, litellm. Kept PIL for now, though framework might handle image display.
smol_code_agent_instance = CodeAgent(
    tools=[perplexity_search_tool],
    model=smol_model,
    add_base_tools=False, 
    # Authorized imports for the agent's generated code if it needs to manipulate images/files.
    # PIL might be needed if agent code directly interacts with image objects.
    additional_authorized_imports=["os", "io", "requests", "pandas", "PIL", "pydub", "subprocess"] 
)

# This is the agent class that app.py will instantiate and use.
# It wraps the smolagents.CodeAgent.
class BasicAgent:
    def __init__(self):
        self.smol_agent = smol_code_agent_instance
        print("BasicAgent (using smolagents.CodeAgent with direct image passing and content pre-loading) initialized.")

    def __call__(self, question: str, file_path: str | None = None) -> str:
        print(f"BasicAgent received question (first 100 chars): {question[:100]}...")
        
        task_description = question # Default to original question
        pil_images = []
        run_kwargs = {} # Will hold {'images': pil_images} or {'additional_args': ...} or be empty

        if file_path and os.path.exists(file_path):
            print(f"BasicAgent received file_path: {file_path}")
            file_extension = os.path.splitext(file_path)[1].lower()
            try:
                # Attempt to load as image first
                img = Image.open(file_path).convert("RGB")
                pil_images.append(img)
                run_kwargs["images"] = pil_images
                
                task_description = f"""Please answer the following question: "{question}"

An image has been provided to help answer this question. The image is directly available to you for analysis. Focus on interpreting the visual content of the image in relation to the question."""
                print(f"Passing PIL image to smolagent. Task: '{task_description[:150]}...'")

            except IOError: # Not a standard image or unreadable by PIL
                print(f"File {file_path} is not a PIL-compatible image. Attempting other file type processing.")
                try:
                    if file_extension == ".xlsx":
                        df = pd.read_excel(file_path)
                        # Convert dataframe to CSV string to pass as content
                        excel_content_str = df.to_csv(index=False)
                        run_kwargs["additional_args"] = {"task_file_content": excel_content_str, "file_type": "excel_csv_string"}
                        task_description = f"""Please answer the following question: "{question}"

An Excel file's content (converted to CSV format) has been provided in 'additional_args' under the key 'task_file_content'.
You can process this CSV string, for example, by using pandas: pandas.read_csv(io.StringIO(task_file_content)).
The original file name was: {os.path.basename(file_path)}"""
                        print(f"Passing Excel content (as CSV string) via additional_args. Task: '{task_description[:200]}...'")
                    elif file_extension == ".mp3": # Example for mp3, passing as bytes
                        with open(file_path, "rb") as f:
                            file_bytes = f.read()
                        run_kwargs["additional_args"] = {"task_file_content": file_bytes, "file_type": "mp3_bytes"}
                        task_description = f"""Please answer the following question: "{question}"

An MP3 file's content (as bytes) has been provided in 'additional_args' under the key 'task_file_content'.
The original file name was: {os.path.basename(file_path)}.
Note: You will need appropriate tools/capabilities to process raw audio bytes for tasks like transcription."""
                        print(f"Passing MP3 file content (as bytes) via additional_args. Task: '{task_description[:200]}...'")
                    else: # Generic binary/text file
                        with open(file_path, "rb") as f: # Read as binary by default
                            file_bytes = f.read()
                        run_kwargs["additional_args"] = {"task_file_content": file_bytes, "file_type": "generic_bytes"}
                        # Try to decode as text for the task description if it's small, otherwise indicate binary
                        try:
                            preview_text = file_bytes.decode('utf-8', errors='ignore')[:200]
                            file_content_description = f"The file content (as bytes, starting with: '{preview_text}...') has been provided"
                        except:
                            file_content_description = "The file content (as binary bytes) has been provided"

                        task_description = f"""Please answer the following question: "{question}"

A file's content has been provided in 'additional_args' under the key 'task_file_content' (type: {run_kwargs['additional_args']['file_type']}).
{file_content_description}.
The original file name was: {os.path.basename(file_path)}."""
                        print(f"Passing generic file content (as bytes) via additional_args. Task: '{task_description[:200]}...'")

                except Exception as e_read:
                    print(f"Error reading content from {file_path} after PIL failure: {e_read}")
                    task_description = f"""Regarding the question: "{question}"
A file was provided at '{file_path}', but an error occurred while trying to read its content: {e_read}.
Please attempt to answer based on the text query alone. If the file was essential, state it could not be processed."""
                    # run_kwargs remains as is, or cleared if preferred. Let's clear additional_args if read fails.
                    if "additional_args" in run_kwargs: del run_kwargs["additional_args"]

            except Exception as e_pil: # Catch other PIL errors if any beyond IOError
                print(f"An unexpected PIL error occurred while processing file {file_path}: {e_pil}")
                task_description = f"""Regarding the question: "{question}"
A file was initially provided at '{file_path}', but an error occurred during its image processing: {e_pil}.
Please attempt to answer the question based on the text query alone if possible. If the file was essential, please state that the file could not be processed."""
        else:
            if file_path: # File path provided but does not exist
                print(f"BasicAgent received file_path: {file_path}, but it does not exist. Proceeding without file content.")
                task_description = f"""Regarding the question: "{question}"
A file was expected at '{file_path}', but it was not found.
Please answer the question based on the text query alone if possible. If the file was essential, please state that the file was missing."""
            else: # No file path provided at all
                print("BasicAgent received no file_path. Task is based on question only.")
        
        print(f"Final task description for smolagent (first 300 chars): {task_description[:300]}...")

        try:
            answer = self.smol_agent.run(task_description, **run_kwargs)
            print(f"smolagents.CodeAgent returned: {str(answer)[:200]}...")
            return str(answer)
        except Exception as e:
            print(f"Error during smolagents.CodeAgent run: {e}")
            # Be more specific about the error type if possible
            return f"AGENT_ERROR: An error occurred in CodeAgent ({type(e).__name__}): {str(e)}"