Spaces:

Aedelon
/

GAIA_Agent

Running

File size: 14,712 Bytes

import os
import logging

from llama_index.core.agent.workflow import CodeActAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.llms.openai import OpenAI
from llama_index.tools.code_interpreter import CodeInterpreterToolSpec

# Setup logging
logger = logging.getLogger(__name__)

# Helper function to load prompt from file
def load_prompt_from_file(filename: str, default_prompt: str) -> str:
    """Loads a prompt from a text file."""
    try:
        # Assuming the prompt file is in the same directory as the agent script
        script_dir = os.path.dirname(__file__)
        prompt_path = os.path.join(script_dir, filename)
        with open(prompt_path, "r") as f:
            prompt = f.read()
            logger.info(f"Successfully loaded prompt from {prompt_path}")
            return prompt
    except FileNotFoundError:
        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
        return default_prompt
    except Exception as e:
        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
        return default_prompt

def generate_python_code(prompt: str) -> str:
    """
    Generate valid Python code from a natural language description using a configured LLM.
    Args:
        prompt (str): A clear description of the desired Python code functionality.
    Returns:
        str: A string containing the generated Python code.
    Raises:
        ValueError: If required API key is not set.
        Exception: If the LLM call fails.
    """
    logger.info(f"Generating Python code for prompt: {prompt[:100]}...")
    
    # Configuration for code generation LLM
    gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
    gen_api_key = os.getenv("OPENAI_API_KEY")

    if not gen_api_key:
        raise ValueError("OPENAI_API_KEY environment variable is not set.")

    # Load the prompt template
    default_gen_prompt_template = ("You are a helpful assistant that writes Python code. "
                                   "You will be given a prompt and you must generate Python code based on that prompt. "
                                   "You must only generate Python code and nothing else. "
                                   "Do not include any explanations or any other text. "
                                   "Do not use any markdown. \n"
                                   "Prompt: {prompt} \n"
                                   "Code:\n")
    gen_prompt_template = load_prompt_from_file("../prompts/code_gen_prompt.txt", default_gen_prompt_template)
    input_prompt = gen_prompt_template.format(prompt=prompt)

    try:
        llm = OpenAI(
            model=gen_llm_model,
            api_key=gen_api_key,
            reasoning_effort="high",
            temperature=0.1,
            max_tokens=16384
        )
        logger.info(f"Using code generation LLM: {gen_llm_model}")
        generated_code = llm.complete(input_prompt)
        logger.info("Code generation successful.")
        return generated_code.text
    except Exception as e:
        logger.error(f"LLM call failed during code generation: {e}", exc_info=True)
        raise # Re-raise the exception to be handled by the agent/workflow

# --- Tool Definitions ---

python_code_generator_tool = FunctionTool.from_defaults(
    fn=generate_python_code,
    name="python_code_generator",
    description=(
        "Generates executable Python code based on a natural language prompt. "
        "Input: prompt string. Output: Python code string."
    ),
)

# Use LlamaIndex's built-in Code Interpreter Tool Spec for safe execution
# This assumes the necessary environment (e.g., docker) for the spec is available
try:
    code_interpreter_spec = CodeInterpreterToolSpec()
    # Get the tool(s) from the spec. It might return multiple tools.
    code_interpreter_tools = code_interpreter_spec.to_tool_list()
    if not code_interpreter_tools:
        raise RuntimeError("CodeInterpreterToolSpec did not return any tools.")
    # Assuming the primary tool is the first one, or find by name if necessary
    code_interpreter_tool = next((t for t in code_interpreter_tools if t.metadata.name == "code_interpreter"), None)
    if code_interpreter_tool is None:
         raise RuntimeError("Could not find 'code_interpreter' tool in CodeInterpreterToolSpec results.")
    logger.info("CodeInterpreterToolSpec initialized successfully.")
except Exception as e:
    logger.error(f"Failed to initialize CodeInterpreterToolSpec: {e}", exc_info=True)
    # Fallback: Define a dummy tool or raise error to prevent agent start?
    # For now, let initialization fail if the safe interpreter isn't available.
    raise RuntimeError("CodeInterpreterToolSpec failed to initialize. Cannot create code_agent.") from e

# --- REMOVED SimpleCodeExecutor --- 
# The SimpleCodeExecutor class that used subprocess has been entirely removed 
# due to severe security risks. Execution MUST go through the CodeInterpreterToolSpec.

# --- Agent Initialization ---

def initialize_code_agent() -> ReActAgent:
    """Initializes the CodeActAgent, configured for safe code execution."""
    logger.info("Initializing CodeAgent...")

    # Configuration for the agent's main LLM
    agent_llm_model = os.getenv("CODE_AGENT_LLM_MODEL", "gemini-2.5-pro-preview-03-25")
    gemini_api_key = os.getenv("GEMINI_API_KEY")

    if not gemini_api_key:
        logger.error("GEMINI_API_KEY not found in environment variables for CodeAgent.")
        raise ValueError("GEMINI_API_KEY must be set for CodeAgent")

    try:
        llm = GoogleGenAI(
            api_key=gemini_api_key,
            model=agent_llm_model,
            temperature=0.10
        )
        logger.info(f"Using agent LLM: {agent_llm_model}")

        # Load system prompt (consider loading from file)
        default_system_prompt = """\
            You are CodeAgent, a specialist in generating and executing Python code. Your mission:
            
            1. **Thought**: Think step-by-step before acting and state your reasoning.
            2. **Code Generation**: To produce code, call `python_code_generator` with a concise, unambiguous prompt. Review the generated code for correctness and safety.
            3. **Execution & Testing**: To execute or test code, call `code_interpreter`. Provide the complete code snippet. Analyze its output (stdout, stderr, result) to verify functionality and debug errors.
            4. **Iteration**: If execution fails or the result is incorrect, analyze the error, think about the fix, generate corrected code using `python_code_generator`, and execute again using `code_interpreter`.
            5. **Tool Use**: Always adhere strictly to each tool’s input/output format.
            6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
            7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
            
            **Special Instructions for Chess-Related Tasks**:
            - Prioritize using the Stockfish engine to solve chess problems. Ubuntu installation: `sudo apt-get install stockfish` so path is `/usr/games/stockfish`
            - Use `python-chess` to represent boards, generate and validate moves, and parse PGN/FEN.
            
            **Available Python Packages**:
            
            - beautifulsoup4: HTML/XML parsing and lightweight web scraping  
            - certifi: Mozilla CA bundle for secure TLS/SSL requests  
            - datasets: Hugging Face dataset loading and streaming  
            - duckdb: In‑process OLAP SQL engine (analytics, Parquet, Arrow)  
            - ffmpeg-python: Wrapper around FFmpeg for audio/video operations  
            - gradio[oauth]: Rapid web‑UI prototyping with optional OAuth  
            - helium: High‑level Selenium / browser automation toolkit  
            - huggingface: Interact with Hugging Face Hub models, datasets, spaces  
            - imageio: Read and write images, GIFs, MP4s, volumes, etc.  
            - matplotlib: 2‑D plotting (figures, axes, annotations)  
            - numpy: N‑dimensional arrays and vectorized math  
            - openai-whisper: Speech‑to‑text transcription  
            - opencv-python: Computer vision, image/video processing  
            - openpyxl: Excel .xlsx read/write, styles, formulas  
            - pandas: DataFrames, time series, CSV/Parquet I/O  
            - pyarrow: Apache Arrow tables, Parquet, Flight RPC  
            - pygame: Simple 2‑D game/graphics engine (SDL based)  
            - python-chess: Chess move generation, PGN/FEN handling, engine UCI integration  
            - requests: HTTP/HTTPS client with sessions and retries  
            - scikit-learn: Machine‑learning algorithms, preprocessing, pipelines  
            - scipy: Scientific computing, optimization, signal processing  
            - seaborn: Statistical visualization on top of matplotlib  
            - sqlalchemy: SQL ORM and core engine for many databases  
            - statsmodels: Econometrics and statistical modeling (GLM, ARIMA)  
            - stockfish: UCI interface to Stockfish chess engine  
            - sympy: Symbolic math, algebra, calculus CAS  
            - youtube-transcript-api: Fetch YouTube video transcripts via API  
            - yt-dlp: Download videos/playlists from YouTube and other sites  
            """

        system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)

        agent = ReActAgent(
            name="code_agent",
            description=(
                "Generates Python code using `python_code_generator` and executes it safely with "
                "`code_interpreter`, then iteratively debugs and refines the code from run-time feedback.\n\n"
                "The agent can leverage the following pre-installed packages:\n"
                "- beautifulsoup4>=4.13.4 : HTML/XML parsing and lightweight web scraping\n"
                "- certifi>=2025.4.26     : Mozilla CA bundle for secure TLS/SSL requests\n"
                "- datasets>=3.5.1        : Hugging Face dataset loading and streaming\n"
                "- duckdb>=1.2.2          : In‑process OLAP SQL engine (analytics, Parquet, Arrow)\n"
                "- ffmpeg-python>=0.2.0   : Wrapper around FFmpeg for audio/video operations\n"
                "- gradio[oauth]>=5.28.0  : Rapid web‑UI prototyping with optional OAuth\n"
                "- helium>=5.1.1          : High‑level Selenium / browser automation toolkit\n"
                "- huggingface>=0.0.1     : Interact with Hugging Face Hub models, datasets, spaces\n"
                "- imageio>=2.37.0        : Read and write images, GIFs, MP4s, volumes, etc.\n"
                "- matplotlib>=3.10.1     : 2‑D plotting (figures, axes, annotations)\n"
                "- numpy>=2.2.5           : N‑dimensional arrays and vectorized math\n"
                "- openai-whisper>=20240930 : Speech‑to‑text transcription\n"
                "- opencv-python>=4.11.0.86 : Computer vision, image/video processing\n"
                "- openpyxl>=3.1.5        : Excel .xlsx read/write, styles, formulas\n"
                "- pandas>=2.2.3          : DataFrames, time series, CSV/Parquet I/O\n"
                "- pyarrow>=20.0.0        : Apache Arrow tables, Parquet, Flight RPC\n"
                "- pygame>=2.6.1          : Simple 2‑D game/graphics engine (SDL based)\n"
                "- python-chess>=1.999    : Chess move generation, PGN/FEN handling, engines\n"
                "- requests>=2.32.3       : HTTP/HTTPS client with sessions and retries\n"
                "- scikit-learn>=1.6.1    : Machine‑learning algorithms, preprocessing, pipelines\n"
                "- scipy>=1.15.2          : Scientific computing, optimization, signal processing\n"
                "- seaborn>=0.13.2        : Statistical visualization on top of matplotlib\n"
                "- sqlalchemy>=2.0.40     : SQL ORM and core engine for many databases\n"
                "- statsmodels>=0.14.4    : Econometrics and statistical modeling (GLM, ARIMA)\n"
                "- stockfish==3.28.0      : UCI interface to Stockfish chess engine\n"
                "- sympy>=1.14.0          : Symbolic math, algebra, calculus CAS\n"
                "- youtube-transcript-api>=1.0.3 : Fetch YouTube video transcripts via API\n"
                "- yt-dlp>=2025.3.31      : Download videos/playlists from YouTube and other sites\n\n"
                "Additionally, the `stockfish` package enables the agent to solve chess problems by analyzing positions, "
                "identifying tactical motifs, and calculating optimal move sequences, making it a valuable tool for chess training and analysis."
            ),
            # REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
            tools=[
                python_code_generator_tool,
                code_interpreter_tool, # Use the safe tool from the spec
            ],
            llm=llm,
            system_prompt=system_prompt,
            can_handoff_to=["planner_agent", "reasoning_agent"],
        )
        logger.info("CodeAgent initialized successfully.")

        return agent

    except Exception as e:
        logger.error(f"Error during CodeAgent initialization: {e}", exc_info=True)
        raise

# Example usage (for testing if run directly)
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logger.info("Running code_agent.py directly for testing...")

    # Ensure API keys are set for testing
    required_keys = ["GEMINI_API_KEY", os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")]
    missing_keys = [key for key in required_keys if not os.getenv(key)]
    if missing_keys:
        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
    else:
        try:
            test_agent = initialize_code_agent()
            print("Code Agent initialized successfully for testing.")
            # Example test (requires user interaction or pre-defined task)
            # result = test_agent.chat("Write and execute python code to print 'hello world'")
            # print(f"Test query result: {result}")
        except Exception as e:
            print(f"Error during testing: {e}")