Final_Assignment_Template3

Running

App Files Files Community

Duibonduil commited on about 10 hours ago

Commit

d7949de

verified ·

1 Parent(s): 2928e27

Upload 17 files

Browse files

Files changed (17) hide show

src/smolagents/__init__.py +31 -0
src/smolagents/_function_type_hints_utils.py +431 -0
src/smolagents/agent_types.py +283 -0
src/smolagents/agents.py +1725 -0
src/smolagents/cli.py +164 -0
src/smolagents/default_tools.py +577 -0
src/smolagents/gradio_ui.py +508 -0
src/smolagents/local_python_executor.py +1611 -0
src/smolagents/mcp_client.py +154 -0
src/smolagents/memory.py +257 -0
src/smolagents/models.py +1882 -0
src/smolagents/monitoring.py +265 -0
src/smolagents/remote_executors.py +451 -0
src/smolagents/tool_validation.py +266 -0
src/smolagents/tools.py +1239 -0
src/smolagents/utils.py +500 -0
src/smolagents/vision_web_browser.py +228 -0

src/smolagents/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__version__ = "1.20.0.dev0"
+from .agent_types import *  # noqa: I001
+from .agents import *  # Above noqa avoids a circular dependency due to cli.py
+from .default_tools import *
+from .gradio_ui import *
+from .local_python_executor import *
+from .mcp_client import *
+from .memory import *
+from .models import *
+from .monitoring import *
+from .remote_executors import *
+from .tools import *
+from .utils import *
+from .cli import *

src/smolagents/_function_type_hints_utils.py ADDED Viewed

	@@ -0,0 +1,431 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This module contains utilities exclusively taken from `transformers` repository.
+Since they are not specific to `transformers` and that `transformers` is an heavy dependencies, those helpers have
+been duplicated.
+TODO: move them to `huggingface_hub` to avoid code duplication.
+"""
+import inspect
+import json
+import re
+import types
+from collections.abc import Callable
+from copy import copy
+from typing import (
+    Any,
+    Literal,
+    Union,
+    get_args,
+    get_origin,
+    get_type_hints,
+)
+IMPORT_TO_PACKAGE_MAPPING = {
+    "wikipediaapi": "wikipedia-api",
+}
+def get_package_name(import_name: str) -> str:
+    """
+    Return the package name for a given import name.
+    Args:
+        import_name (`str`): Import name to get the package name for.
+    Returns:
+        `str`: Package name for the given import name.
+    """
+    return IMPORT_TO_PACKAGE_MAPPING.get(import_name, import_name)
+def get_imports(code: str) -> list[str]:
+    """
+    Extracts all the libraries (not relative imports) that are imported in a code.
+    Args:
+        code (`str`): Code text to inspect.
+    Returns:
+        `list[str]`: List of all packages required to use the input code.
+    """
+    # filter out try/except block so in custom code we can have try/except imports
+    code = re.sub(r"\s*try\s*:.*?except.*?:", "", code, flags=re.DOTALL)
+    # filter out imports under is_flash_attn_2_available block for avoid import issues in cpu only environment
+    code = re.sub(
+        r"if is_flash_attn[a-zA-Z0-9_]+available\(\):\s*(from flash_attn\s*.*\s*)+",
+        "",
+        code,
+        flags=re.MULTILINE,
+    )
+    # Imports of the form `import xxx` or `import xxx as yyy`
+    imports = re.findall(r"^\s*import\s+(\S+?)(?:\s+as\s+\S+)?\s*$", code, flags=re.MULTILINE)
+    # Imports of the form `from xxx import yyy`
+    imports += re.findall(r"^\s*from\s+(\S+)\s+import", code, flags=re.MULTILINE)
+    # Only keep the top-level module
+    imports = [imp.split(".")[0] for imp in imports if not imp.startswith(".")]
+    return [get_package_name(import_name) for import_name in set(imports)]
+class TypeHintParsingException(Exception):
+    """Exception raised for errors in parsing type hints to generate JSON schemas"""
+class DocstringParsingException(Exception):
+    """Exception raised for errors in parsing docstrings to generate JSON schemas"""
+def get_json_schema(func: Callable) -> dict:
+    """
+    This function generates a JSON schema for a given function, based on its docstring and type hints. This is
+    mostly used for passing lists of tools to a chat template. The JSON schema contains the name and description of
+    the function, as well as the names, types and descriptions for each of its arguments. `get_json_schema()` requires
+    that the function has a docstring, and that each argument has a description in the docstring, in the standard
+    Google docstring format shown below. It also requires that all the function arguments have a valid Python type hint.
+    Although it is not required, a `Returns` block can also be added, which will be included in the schema. This is
+    optional because most chat templates ignore the return value of the function.
+    Args:
+        func: The function to generate a JSON schema for.
+    Returns:
+        A dictionary containing the JSON schema for the function.
+    Examples:
+    ```python
+    >>> def multiply(x: float, y: float):
+    >>>    '''
+    >>>    A function that multiplies two numbers
+    >>>
+    >>>    Args:
+    >>>        x: The first number to multiply
+    >>>        y: The second number to multiply
+    >>>    '''
+    >>>    return x * y
+    >>>
+    >>> print(get_json_schema(multiply))
+    {
+        "name": "multiply",
+        "description": "A function that multiplies two numbers",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "x": {"type": "number", "description": "The first number to multiply"},
+                "y": {"type": "number", "description": "The second number to multiply"}
+            },
+            "required": ["x", "y"]
+        }
+    }
+    ```
+    The general use for these schemas is that they are used to generate tool descriptions for chat templates that
+    support them, like so:
+    ```python
+    >>> from transformers import AutoTokenizer
+    >>> from transformers.utils import get_json_schema
+    >>>
+    >>> def multiply(x: float, y: float):
+    >>>    '''
+    >>>    A function that multiplies two numbers
+    >>>
+    >>>    Args:
+    >>>        x: The first number to multiply
+    >>>        y: The second number to multiply
+    >>>    return x * y
+    >>>    '''
+    >>>
+    >>> multiply_schema = get_json_schema(multiply)
+    >>> tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-v01")
+    >>> messages = [{"role": "user", "content": "What is 179 x 4571?"}]
+    >>> formatted_chat = tokenizer.apply_chat_template(
+    >>>     messages,
+    >>>     tools=[multiply_schema],
+    >>>     chat_template="tool_use",
+    >>>     return_dict=True,
+    >>>     return_tensors="pt",
+    >>>     add_generation_prompt=True
+    >>> )
+    >>> # The formatted chat can now be passed to model.generate()
+    ```
+    Each argument description can also have an optional `(choices: ...)` block at the end, such as
+    `(choices: ["tea", "coffee"])`, which will be parsed into an `enum` field in the schema. Note that this will
+    only be parsed correctly if it is at the end of the line:
+    ```python
+    >>> def drink_beverage(beverage: str):
+    >>>    '''
+    >>>    A function that drinks a beverage
+    >>>
+    >>>    Args:
+    >>>        beverage: The beverage to drink (choices: ["tea", "coffee"])
+    >>>    '''
+    >>>    pass
+    >>>
+    >>> print(get_json_schema(drink_beverage))
+    ```
+    {
+        'name': 'drink_beverage',
+        'description': 'A function that drinks a beverage',
+        'parameters': {
+            'type': 'object',
+            'properties': {
+                'beverage': {
+                    'type': 'string',
+                    'enum': ['tea', 'coffee'],
+                    'description': 'The beverage to drink'
+                    }
+                },
+            'required': ['beverage']
+        }
+    }
+    """
+    doc = inspect.getdoc(func)
+    if not doc:
+        raise DocstringParsingException(
+            f"Cannot generate JSON schema for {func.__name__} because it has no docstring!"
+        )
+    doc = doc.strip()
+    main_doc, param_descriptions, return_doc = _parse_google_format_docstring(doc)
+    json_schema = _convert_type_hints_to_json_schema(func)
+    if (return_dict := json_schema["properties"].pop("return", None)) is not None:
+        if return_doc is not None:  # We allow a missing return docstring since most templates ignore it
+            return_dict["description"] = return_doc
+    for arg, schema in json_schema["properties"].items():
+        if arg not in param_descriptions:
+            raise DocstringParsingException(
+                f"Cannot generate JSON schema for {func.__name__} because the docstring has no description for the argument '{arg}'"
+            )
+        desc = param_descriptions[arg]
+        enum_choices = re.search(r"\(choices:\s*(.*?)\)\s*$", desc, flags=re.IGNORECASE)
+        if enum_choices:
+            schema["enum"] = [c.strip() for c in json.loads(enum_choices.group(1))]
+            desc = enum_choices.string[: enum_choices.start()].strip()
+        schema["description"] = desc
+    output = {"name": func.__name__, "description": main_doc, "parameters": json_schema}
+    if return_dict is not None:
+        output["return"] = return_dict
+    return {"type": "function", "function": output}
+# Extracts the initial segment of the docstring, containing the function description
+description_re = re.compile(r"^(.*?)(?=\n\s*(Args:|Returns:|Raises:)|\Z)", re.DOTALL)
+# Extracts the Args: block from the docstring
+args_re = re.compile(r"\n\s*Args:\n\s*(.*?)[\n\s]*(Returns:|Raises:|\Z)", re.DOTALL)
+# Splits the Args: block into individual arguments
+args_split_re = re.compile(
+    r"(?:^|\n)"  # Match the start of the args block, or a newline
+    r"\s*(\w+)\s*(?:\([^)]*?\))?:\s*"  # Capture the argument name (ignore the type) and strip spacing
+    r"(.*?)\s*"  # Capture the argument description, which can span multiple lines, and strip trailing spacing
+    r"(?=\n\s*\w+\s*(?:\([^)]*?\))?:|\Z)",  # Stop when you hit the next argument (with or without type) or the end of the block
+    re.DOTALL | re.VERBOSE,
+)
+# Extracts the Returns: block from the docstring, if present. Note that most chat templates ignore the return type/doc!
+returns_re = re.compile(
+    r"\n\s*Returns:\n\s*"
+    r"(?:[^)]*?:\s*)?"  # Ignore the return type if present
+    r"(.*?)"  # Capture the return description
+    r"[\n\s]*(Raises:|\Z)",
+    re.DOTALL,
+)
+def _parse_google_format_docstring(
+    docstring: str,
+) -> tuple[str | None, dict | None, str | None]:
+    """
+    Parses a Google-style docstring to extract the function description,
+    argument descriptions, and return description.
+    Args:
+        docstring (str): The docstring to parse.
+    Returns:
+        The function description, arguments, and return description.
+    """
+    # Extract the sections
+    description_match = description_re.search(docstring)
+    args_match = args_re.search(docstring)
+    returns_match = returns_re.search(docstring)
+    # Clean and store the sections
+    description = description_match.group(1).strip() if description_match else None
+    docstring_args = args_match.group(1).strip() if args_match else None
+    returns = returns_match.group(1).strip() if returns_match else None
+    # Parsing the arguments into a dictionary
+    if docstring_args is not None:
+        docstring_args = "\n".join([line for line in docstring_args.split("\n") if line.strip()])  # Remove blank lines
+        matches = args_split_re.findall(docstring_args)
+        args_dict = {match[0]: re.sub(r"\s*\n+\s*", " ", match[1].strip()) for match in matches}
+    else:
+        args_dict = {}
+    return description, args_dict, returns
+def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hints: bool = True) -> dict:
+    type_hints = get_type_hints(func)
+    signature = inspect.signature(func)
+    properties = {}
+    for param_name, param_type in type_hints.items():
+        properties[param_name] = _parse_type_hint(param_type)
+    required = []
+    for param_name, param in signature.parameters.items():
+        if param.annotation == inspect.Parameter.empty and error_on_missing_type_hints:
+            raise TypeHintParsingException(f"Argument {param.name} is missing a type hint in function {func.__name__}")
+        if param_name not in properties:
+            properties[param_name] = {}
+        if param.default == inspect.Parameter.empty:
+            required.append(param_name)
+        else:
+            properties[param_name]["nullable"] = True
+    # Return: multi‐type union -> treat as any
+    if (
+        "return" in properties
+        and (return_type := properties["return"].get("type"))
+        and not isinstance(return_type, str)
+    ):
+        properties["return"]["type"] = "any"
+    schema = {"type": "object", "properties": properties}
+    if required:
+        schema["required"] = required
+    return schema
+def _parse_type_hint(hint: type) -> dict:
+    origin = get_origin(hint)
+    args = get_args(hint)
+    if origin is None:
+        try:
+            return _get_json_schema_type(hint)
+        except KeyError:
+            raise TypeHintParsingException(
+                "Couldn't parse this type hint, likely due to a custom class or object: ",
+                hint,
+            )
+    elif origin is Union or (hasattr(types, "UnionType") and origin is types.UnionType):
+        return _parse_union_type(args)
+    elif origin is list:
+        if not args:
+            return {"type": "array"}
+        else:
+            # Lists can only have a single type argument, so recurse into it
+            return {"type": "array", "items": _parse_type_hint(args[0])}
+    elif origin is tuple:
+        if not args:
+            return {"type": "array"}
+        if len(args) == 1:
+            raise TypeHintParsingException(
+                f"The type hint {str(hint).replace('typing.', '')} is a Tuple with a single element, which "
+                "we do not automatically convert to JSON schema as it is rarely necessary. If this input can contain "
+                "more than one element, we recommend "
+                "using a List[] type instead, or if it really is a single element, remove the Tuple[] wrapper and just "
+                "pass the element directly."
+            )
+        if ... in args:
+            raise TypeHintParsingException(
+                "Conversion of '...' is not supported in Tuple type hints. "
+                "Use List[] types for variable-length"
+                " inputs instead."
+            )
+        return {"type": "array", "prefixItems": [_parse_type_hint(t) for t in args]}
+    elif origin is dict:
+        # The JSON equivalent to a dict is 'object', which mandates that all keys are strings
+        # However, we can specify the type of the dict values with "additionalProperties"
+        out = {"type": "object"}
+        if len(args) == 2:
+            out["additionalProperties"] = _parse_type_hint(args[1])
+        return out
+    elif origin is Literal:
+        literal_types = set(type(arg) for arg in args)
+        final_type = _parse_union_type(literal_types)
+        # None literal value is represented by 'nullable' field set by _parse_union_type
+        final_type.update({"enum": [arg for arg in args if arg is not None]})
+        return final_type
+    raise TypeHintParsingException("Couldn't parse this type hint, likely due to a custom class or object: ", hint)
+def _parse_union_type(args: tuple[Any, ...]) -> dict:
+    subtypes = [_parse_type_hint(t) for t in args if t is not type(None)]
+    if len(subtypes) == 1:
+        # A single non-null type can be expressed directly
+        return_dict = subtypes[0]
+    elif all(isinstance(subtype["type"], str) for subtype in subtypes):
+        # A union of basic types can be expressed as a list in the schema
+        return_dict = {"type": sorted([subtype["type"] for subtype in subtypes])}
+    else:
+        # A union of more complex types requires "anyOf"
+        return_dict = {"anyOf": subtypes}
+    if type(None) in args:
+        return_dict["nullable"] = True
+    return return_dict
+_BASE_TYPE_MAPPING = {
+    int: {"type": "integer"},
+    float: {"type": "number"},
+    str: {"type": "string"},
+    bool: {"type": "boolean"},
+    list: {"type": "array"},
+    dict: {"type": "object"},
+    Any: {"type": "any"},
+    types.NoneType: {"type": "null"},
+}
+def _get_json_schema_type(param_type: type) -> dict[str, str]:
+    if param_type in _BASE_TYPE_MAPPING:
+        return copy(_BASE_TYPE_MAPPING[param_type])
+    if str(param_type) == "Image":
+        from PIL.Image import Image
+        if param_type == Image:
+            return {"type": "image"}
+    if str(param_type) == "Tensor":
+        try:
+            from torch import Tensor
+            if param_type == Tensor:
+                return {"type": "audio"}
+        except ModuleNotFoundError:
+            pass
+    return {"type": "object"}

src/smolagents/agent_types.py ADDED Viewed

	@@ -0,0 +1,283 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import pathlib
+import tempfile
+import uuid
+from io import BytesIO
+import PIL.Image
+import requests
+from .utils import _is_package_available
+logger = logging.getLogger(__name__)
+class AgentType:
+    """
+    Abstract class to be reimplemented to define types that can be returned by agents.
+    These objects serve three purposes:
+    - They behave as they were the type they're meant to be, e.g., a string for text, a PIL.Image.Image for images
+    - They can be stringified: str(object) in order to return a string defining the object
+    - They should be displayed correctly in ipython notebooks/colab/jupyter
+    """
+    def __init__(self, value):
+        self._value = value
+    def __str__(self):
+        return self.to_string()
+    def to_raw(self):
+        logger.error(
+            "This is a raw AgentType of unknown type. Display in notebooks and string conversion will be unreliable"
+        )
+        return self._value
+    def to_string(self) -> str:
+        logger.error(
+            "This is a raw AgentType of unknown type. Display in notebooks and string conversion will be unreliable"
+        )
+        return str(self._value)
+class AgentText(AgentType, str):
+    """
+    Text type returned by the agent. Behaves as a string.
+    """
+    def to_raw(self):
+        return self._value
+    def to_string(self):
+        return str(self._value)
+class AgentImage(AgentType, PIL.Image.Image):
+    """
+    Image type returned by the agent. Behaves as a PIL.Image.Image.
+    """
+    def __init__(self, value):
+        AgentType.__init__(self, value)
+        PIL.Image.Image.__init__(self)
+        self._path = None
+        self._raw = None
+        self._tensor = None
+        if isinstance(value, AgentImage):
+            self._raw, self._path, self._tensor = value._raw, value._path, value._tensor
+        elif isinstance(value, PIL.Image.Image):
+            self._raw = value
+        elif isinstance(value, bytes):
+            self._raw = PIL.Image.open(BytesIO(value))
+        elif isinstance(value, (str, pathlib.Path)):
+            self._path = value
+        else:
+            try:
+                import torch
+                if isinstance(value, torch.Tensor):
+                    self._tensor = value
+                import numpy as np
+                if isinstance(value, np.ndarray):
+                    self._tensor = torch.from_numpy(value)
+            except ModuleNotFoundError:
+                pass
+        if self._path is None and self._raw is None and self._tensor is None:
+            raise TypeError(f"Unsupported type for {self.__class__.__name__}: {type(value)}")
+    def _ipython_display_(self, include=None, exclude=None):
+        """
+        Displays correctly this type in an ipython notebook (ipython, colab, jupyter, ...)
+        """
+        from IPython.display import Image, display
+        display(Image(self.to_string()))
+    def to_raw(self):
+        """
+        Returns the "raw" version of that object. In the case of an AgentImage, it is a PIL.Image.Image.
+        """
+        if self._raw is not None:
+            return self._raw
+        if self._path is not None:
+            self._raw = PIL.Image.open(self._path)
+            return self._raw
+        if self._tensor is not None:
+            import numpy as np
+            array = self._tensor.cpu().detach().numpy()
+            return PIL.Image.fromarray((255 - array * 255).astype(np.uint8))
+    def to_string(self):
+        """
+        Returns the stringified version of that object. In the case of an AgentImage, it is a path to the serialized
+        version of the image.
+        """
+        if self._path is not None:
+            return self._path
+        if self._raw is not None:
+            directory = tempfile.mkdtemp()
+            self._path = os.path.join(directory, str(uuid.uuid4()) + ".png")
+            self._raw.save(self._path, format="png")
+            return self._path
+        if self._tensor is not None:
+            import numpy as np
+            array = self._tensor.cpu().detach().numpy()
+            # There is likely simpler than load into image into save
+            img = PIL.Image.fromarray((255 - array * 255).astype(np.uint8))
+            directory = tempfile.mkdtemp()
+            self._path = os.path.join(directory, str(uuid.uuid4()) + ".png")
+            img.save(self._path, format="png")
+            return self._path
+    def save(self, output_bytes, format: str = None, **params):
+        """
+        Saves the image to a file.
+        Args:
+            output_bytes (bytes): The output bytes to save the image to.
+            format (str): The format to use for the output image. The format is the same as in PIL.Image.save.
+            **params: Additional parameters to pass to PIL.Image.save.
+        """
+        img = self.to_raw()
+        img.save(output_bytes, format=format, **params)
+class AgentAudio(AgentType, str):
+    """
+    Audio type returned by the agent.
+    """
+    def __init__(self, value, samplerate=16_000):
+        if not _is_package_available("soundfile") or not _is_package_available("torch"):
+            raise ModuleNotFoundError(
+                "Please install 'audio' extra to use AgentAudio: `pip install 'smolagents[audio]'`"
+            )
+        import numpy as np
+        import torch
+        super().__init__(value)
+        self._path = None
+        self._tensor = None
+        self.samplerate = samplerate
+        if isinstance(value, (str, pathlib.Path)):
+            self._path = value
+        elif isinstance(value, torch.Tensor):
+            self._tensor = value
+        elif isinstance(value, tuple):
+            self.samplerate = value[0]
+            if isinstance(value[1], np.ndarray):
+                self._tensor = torch.from_numpy(value[1])
+            else:
+                self._tensor = torch.tensor(value[1])
+        else:
+            raise ValueError(f"Unsupported audio type: {type(value)}")
+    def _ipython_display_(self, include=None, exclude=None):
+        """
+        Displays correctly this type in an ipython notebook (ipython, colab, jupyter, ...)
+        """
+        from IPython.display import Audio, display
+        display(Audio(self.to_string(), rate=self.samplerate))
+    def to_raw(self):
+        """
+        Returns the "raw" version of that object. It is a `torch.Tensor` object.
+        """
+        import soundfile as sf
+        if self._tensor is not None:
+            return self._tensor
+        import torch
+        if self._path is not None:
+            if "://" in str(self._path):
+                response = requests.get(self._path)
+                response.raise_for_status()
+                tensor, self.samplerate = sf.read(BytesIO(response.content))
+            else:
+                tensor, self.samplerate = sf.read(self._path)
+            self._tensor = torch.tensor(tensor)
+            return self._tensor
+    def to_string(self):
+        """
+        Returns the stringified version of that object. In the case of an AgentAudio, it is a path to the serialized
+        version of the audio.
+        """
+        import soundfile as sf
+        if self._path is not None:
+            return self._path
+        if self._tensor is not None:
+            directory = tempfile.mkdtemp()
+            self._path = os.path.join(directory, str(uuid.uuid4()) + ".wav")
+            sf.write(self._path, self._tensor, samplerate=self.samplerate)
+            return self._path
+_AGENT_TYPE_MAPPING = {"string": AgentText, "image": AgentImage, "audio": AgentAudio}
+def handle_agent_input_types(*args, **kwargs):
+    args = [(arg.to_raw() if isinstance(arg, AgentType) else arg) for arg in args]
+    kwargs = {k: (v.to_raw() if isinstance(v, AgentType) else v) for k, v in kwargs.items()}
+    return args, kwargs
+def handle_agent_output_types(output, output_type=None):
+    if output_type in _AGENT_TYPE_MAPPING:
+        # If the class has defined outputs, we can map directly according to the class definition
+        decoded_outputs = _AGENT_TYPE_MAPPING[output_type](output)
+        return decoded_outputs
+    # If the class does not have defined output, then we map according to the type
+    if isinstance(output, str):
+        return AgentText(output)
+    if isinstance(output, PIL.Image.Image):
+        return AgentImage(output)
+    try:
+        import torch
+        if isinstance(output, torch.Tensor):
+            return AgentAudio(output)
+    except ModuleNotFoundError:
+        pass
+    return output
+__all__ = ["AgentType", "AgentImage", "AgentText", "AgentAudio"]

src/smolagents/agents.py ADDED Viewed

	@@ -0,0 +1,1725 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+import inspect
+import json
+import os
+import re
+import tempfile
+import textwrap
+import time
+import warnings
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Generator
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass
+from logging import getLogger
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Literal, TypeAlias, TypedDict, Union
+import jinja2
+import yaml
+from huggingface_hub import create_repo, metadata_update, snapshot_download, upload_folder
+from jinja2 import StrictUndefined, Template
+from rich.console import Group
+from rich.live import Live
+from rich.markdown import Markdown
+from rich.panel import Panel
+from rich.rule import Rule
+from rich.text import Text
+if TYPE_CHECKING:
+    import PIL.Image
+from .agent_types import AgentAudio, AgentImage, handle_agent_output_types
+from .default_tools import TOOL_MAPPING, FinalAnswerTool
+from .local_python_executor import BASE_BUILTIN_MODULES, LocalPythonExecutor, PythonExecutor, fix_final_answer_code
+from .memory import (
+    ActionStep,
+    AgentMemory,
+    FinalAnswerStep,
+    PlanningStep,
+    SystemPromptStep,
+    TaskStep,
+    Timing,
+    TokenUsage,
+    ToolCall,
+)
+from .models import (
+    CODEAGENT_RESPONSE_FORMAT,
+    ChatMessage,
+    ChatMessageStreamDelta,
+    ChatMessageToolCall,
+    MessageRole,
+    Model,
+    agglomerate_stream_deltas,
+    parse_json_if_needed,
+)
+from .monitoring import (
+    YELLOW_HEX,
+    AgentLogger,
+    LogLevel,
+    Monitor,
+)
+from .remote_executors import DockerExecutor, E2BExecutor
+from .tools import Tool, validate_tool_arguments
+from .utils import (
+    AGENT_GRADIO_APP_TEMPLATE,
+    AgentError,
+    AgentExecutionError,
+    AgentGenerationError,
+    AgentMaxStepsError,
+    AgentParsingError,
+    AgentToolCallError,
+    AgentToolExecutionError,
+    extract_code_from_text,
+    is_valid_name,
+    make_init_file,
+    parse_code_blobs,
+    truncate_content,
+)
+logger = getLogger(__name__)
+def get_variable_names(self, template: str) -> set[str]:
+    pattern = re.compile(r"\{\{([^{}]+)\}\}")
+    return {match.group(1).strip() for match in pattern.finditer(template)}
+def populate_template(template: str, variables: dict[str, Any]) -> str:
+    compiled_template = Template(template, undefined=StrictUndefined)
+    try:
+        return compiled_template.render(**variables)
+    except Exception as e:
+        raise Exception(f"Error during jinja template rendering: {type(e).__name__}: {e}")
+@dataclass
+class ActionOutput:
+    output: Any
+    is_final_answer: bool
+@dataclass
+class ToolOutput:
+    id: str
+    output: Any
+    is_final_answer: bool
+    observation: str
+    tool_call: ToolCall
+class PlanningPromptTemplate(TypedDict):
+    """
+    Prompt templates for the planning step.
+    Args:
+        plan (`str`): Initial plan prompt.
+        update_plan_pre_messages (`str`): Update plan pre-messages prompt.
+        update_plan_post_messages (`str`): Update plan post-messages prompt.
+    """
+    initial_plan: str
+    update_plan_pre_messages: str
+    update_plan_post_messages: str
+class ManagedAgentPromptTemplate(TypedDict):
+    """
+    Prompt templates for the managed agent.
+    Args:
+        task (`str`): Task prompt.
+        report (`str`): Report prompt.
+    """
+    task: str
+    report: str
+class FinalAnswerPromptTemplate(TypedDict):
+    """
+    Prompt templates for the final answer.
+    Args:
+        pre_messages (`str`): Pre-messages prompt.
+        post_messages (`str`): Post-messages prompt.
+    """
+    pre_messages: str
+    post_messages: str
+class PromptTemplates(TypedDict):
+    """
+    Prompt templates for the agent.
+    Args:
+        system_prompt (`str`): System prompt.
+        planning ([`~agents.PlanningPromptTemplate`]): Planning prompt templates.
+        managed_agent ([`~agents.ManagedAgentPromptTemplate`]): Managed agent prompt templates.
+        final_answer ([`~agents.FinalAnswerPromptTemplate`]): Final answer prompt templates.
+    """
+    system_prompt: str
+    planning: PlanningPromptTemplate
+    managed_agent: ManagedAgentPromptTemplate
+    final_answer: FinalAnswerPromptTemplate
+EMPTY_PROMPT_TEMPLATES = PromptTemplates(
+    system_prompt="",
+    planning=PlanningPromptTemplate(
+        initial_plan="",
+        update_plan_pre_messages="",
+        update_plan_post_messages="",
+    ),
+    managed_agent=ManagedAgentPromptTemplate(task="", report=""),
+    final_answer=FinalAnswerPromptTemplate(pre_messages="", post_messages=""),
+)
+@dataclass
+class RunResult:
+    """Holds extended information about an agent run.
+    Attributes:
+        output (Any | None): The final output of the agent run, if available.
+        state (Literal["success", "max_steps_error"]): The final state of the agent after the run.
+        messages (list[dict]): The agent's memory, as a list of messages.
+        token_usage (TokenUsage | None): Count of tokens used during the run.
+        timing (Timing): Timing details of the agent run: start time, end time, duration.
+    """
+    output: Any | None
+    state: Literal["success", "max_steps_error"]
+    messages: list[dict]
+    token_usage: TokenUsage | None
+    timing: Timing
+StreamEvent: TypeAlias = Union[
+    ChatMessageStreamDelta,
+    ChatMessageToolCall,
+    ActionOutput,
+    ToolCall,
+    ToolOutput,
+    PlanningStep,
+    ActionStep,
+    FinalAnswerStep,
+]
+class MultiStepAgent(ABC):
+    """
+    Agent class that solves the given task step by step, using the ReAct framework:
+    While the objective is not reached, the agent will perform a cycle of action (given by the LLM) and observation (obtained from the environment).
+    Args:
+        tools (`list[Tool]`): [`Tool`]s that the agent can use.
+        model (`Callable[[list[dict[str, str]]], ChatMessage]`): Model that will generate the agent's actions.
+        prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates.
+        instructions (`str`, *optional*): Custom instructions for the agent, will be inserted in the system prompt.
+        max_steps (`int`, default `20`): Maximum number of steps the agent can take to solve the task.
+        add_base_tools (`bool`, default `False`): Whether to add the base tools to the agent's tools.
+        verbosity_level (`LogLevel`, default `LogLevel.INFO`): Level of verbosity of the agent's logs.
+        grammar (`dict[str, str]`, *optional*): Grammar used to parse the LLM output.
+            <Deprecated version="1.17.0">
+            Parameter `grammar` is deprecated and will be removed in version 1.20.
+            </Deprecated>
+        managed_agents (`list`, *optional*): Managed agents that the agent can call.
+        step_callbacks (`list[Callable]`, *optional*): Callbacks that will be called at each step.
+        planning_interval (`int`, *optional*): Interval at which the agent will run a planning step.
+        name (`str`, *optional*): Necessary for a managed agent only - the name by which this agent can be called.
+        description (`str`, *optional*): Necessary for a managed agent only - the description of this agent.
+        provide_run_summary (`bool`, *optional*): Whether to provide a run summary when called as a managed agent.
+        final_answer_checks (`list[Callable]`, *optional*): List of validation functions to run before accepting a final answer.
+            Each function should:
+            - Take the final answer and the agent's memory as arguments.
+            - Return a boolean indicating whether the final answer is valid.
+    """
+    def __init__(
+        self,
+        tools: list[Tool],
+        model: Model,
+        prompt_templates: PromptTemplates | None = None,
+        instructions: str | None = None,
+        max_steps: int = 20,
+        add_base_tools: bool = False,
+        verbosity_level: LogLevel = LogLevel.INFO,
+        grammar: dict[str, str] | None = None,
+        managed_agents: list | None = None,
+        step_callbacks: list[Callable] | None = None,
+        planning_interval: int | None = None,
+        name: str | None = None,
+        description: str | None = None,
+        provide_run_summary: bool = False,
+        final_answer_checks: list[Callable] | None = None,
+        return_full_result: bool = False,
+        logger: AgentLogger | None = None,
+    ):
+        self.agent_name = self.__class__.__name__
+        self.model = model
+        self.prompt_templates = prompt_templates or EMPTY_PROMPT_TEMPLATES
+        if prompt_templates is not None:
+            missing_keys = set(EMPTY_PROMPT_TEMPLATES.keys()) - set(prompt_templates.keys())
+            assert not missing_keys, (
+                f"Some prompt templates are missing from your custom `prompt_templates`: {missing_keys}"
+            )
+            for key, value in EMPTY_PROMPT_TEMPLATES.items():
+                if isinstance(value, dict):
+                    for subkey in value.keys():
+                        assert key in prompt_templates.keys() and (subkey in prompt_templates[key].keys()), (
+                            f"Some prompt templates are missing from your custom `prompt_templates`: {subkey} under {key}"
+                        )
+        self.max_steps = max_steps
+        self.step_number = 0
+        if grammar is not None:
+            warnings.warn(
+                "Parameter 'grammar' is deprecated and will be removed in version 1.20.",
+                FutureWarning,
+            )
+        self.grammar = grammar
+        self.planning_interval = planning_interval
+        self.state: dict[str, Any] = {}
+        self.name = self._validate_name(name)
+        self.description = description
+        self.provide_run_summary = provide_run_summary
+        self.final_answer_checks = final_answer_checks if final_answer_checks is not None else []
+        self.return_full_result = return_full_result
+        self.instructions = instructions
+        self._setup_managed_agents(managed_agents)
+        self._setup_tools(tools, add_base_tools)
+        self._validate_tools_and_managed_agents(tools, managed_agents)
+        self.task: str | None = None
+        self.memory = AgentMemory(self.system_prompt)
+        if logger is None:
+            self.logger = AgentLogger(level=verbosity_level)
+        else:
+            self.logger = logger
+        self.monitor = Monitor(self.model, self.logger)
+        self.step_callbacks = step_callbacks if step_callbacks is not None else []
+        self.step_callbacks.append(self.monitor.update_metrics)
+        self.stream_outputs = False
+    @property
+    def system_prompt(self) -> str:
+        return self.initialize_system_prompt()
+    @system_prompt.setter
+    def system_prompt(self, value: str):
+        raise AttributeError(
+            """The 'system_prompt' property is read-only. Use 'self.prompt_templates["system_prompt"]' instead."""
+        )
+    def _validate_name(self, name: str | None) -> str | None:
+        if name is not None and not is_valid_name(name):
+            raise ValueError(f"Agent name '{name}' must be a valid Python identifier and not a reserved keyword.")
+        return name
+    def _setup_managed_agents(self, managed_agents: list | None = None) -> None:
+        """Setup managed agents with proper logging."""
+        self.managed_agents = {}
+        if managed_agents:
+            assert all(agent.name and agent.description for agent in managed_agents), (
+                "All managed agents need both a name and a description!"
+            )
+            self.managed_agents = {agent.name: agent for agent in managed_agents}
+            # Ensure managed agents can be called as tools by the model: set their inputs and output_type
+            for agent in self.managed_agents.values():
+                agent.inputs = {
+                    "task": {"type": "string", "description": "Long detailed description of the task."},
+                    "additional_args": {
+                        "type": "object",
+                        "description": "Dictionary of extra inputs to pass to the managed agent, e.g. images, dataframes, or any other contextual data it may need.",
+                    },
+                }
+                agent.output_type = "string"
+    def _setup_tools(self, tools, add_base_tools):
+        assert all(isinstance(tool, Tool) for tool in tools), "All elements must be instance of Tool (or a subclass)"
+        self.tools = {tool.name: tool for tool in tools}
+        if add_base_tools:
+            self.tools.update(
+                {
+                    name: cls()
+                    for name, cls in TOOL_MAPPING.items()
+                    if name != "python_interpreter" or self.__class__.__name__ == "ToolCallingAgent"
+                }
+            )
+        self.tools.setdefault("final_answer", FinalAnswerTool())
+    def _validate_tools_and_managed_agents(self, tools, managed_agents):
+        tool_and_managed_agent_names = [tool.name for tool in tools]
+        if managed_agents is not None:
+            tool_and_managed_agent_names += [agent.name for agent in managed_agents]
+        if self.name:
+            tool_and_managed_agent_names.append(self.name)
+        if len(tool_and_managed_agent_names) != len(set(tool_and_managed_agent_names)):
+            raise ValueError(
+                "Each tool or managed_agent should have a unique name! You passed these duplicate names: "
+                f"{[name for name in tool_and_managed_agent_names if tool_and_managed_agent_names.count(name) > 1]}"
+            )
+    def run(
+        self,
+        task: str,
+        stream: bool = False,
+        reset: bool = True,
+        images: list["PIL.Image.Image"] | None = None,
+        additional_args: dict | None = None,
+        max_steps: int | None = None,
+    ):
+        """
+        Run the agent for the given task.
+        Args:
+            task (`str`): Task to perform.
+            stream (`bool`): Whether to run in streaming mode.
+                If `True`, returns a generator that yields each step as it is executed. You must iterate over this generator to process the individual steps (e.g., using a for loop or `next()`).
+                If `False`, executes all steps internally and returns only the final answer after completion.
+            reset (`bool`): Whether to reset the conversation or keep it going from previous run.
+            images (`list[PIL.Image.Image]`, *optional*): Image(s) objects.
+            additional_args (`dict`, *optional*): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names!
+            max_steps (`int`, *optional*): Maximum number of steps the agent can take to solve the task. if not provided, will use the agent's default value.
+        Example:
+        ```py
+        from smolagents import CodeAgent
+        agent = CodeAgent(tools=[])
+        agent.run("What is the result of 2 power 3.7384?")
+        ```
+        """
+        max_steps = max_steps or self.max_steps
+        self.task = task
+        self.interrupt_switch = False
+        if additional_args is not None:
+            self.state.update(additional_args)
+            self.task += f"""
+You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
+{str(additional_args)}."""
+        self.memory.system_prompt = SystemPromptStep(system_prompt=self.system_prompt)
+        if reset:
+            self.memory.reset()
+            self.monitor.reset()
+        self.logger.log_task(
+            content=self.task.strip(),
+            subtitle=f"{type(self.model).__name__} - {(self.model.model_id if hasattr(self.model, 'model_id') else '')}",
+            level=LogLevel.INFO,
+            title=self.name if hasattr(self, "name") else None,
+        )
+        self.memory.steps.append(TaskStep(task=self.task, task_images=images))
+        if getattr(self, "python_executor", None):
+            self.python_executor.send_variables(variables=self.state)
+            self.python_executor.send_tools({**self.tools, **self.managed_agents})
+        if stream:
+            # The steps are returned as they are executed through a generator to iterate on.
+            return self._run_stream(task=self.task, max_steps=max_steps, images=images)
+        run_start_time = time.time()
+        # Outputs are returned only at the end. We only look at the last step.
+        steps = list(self._run_stream(task=self.task, max_steps=max_steps, images=images))
+        assert isinstance(steps[-1], FinalAnswerStep)
+        output = steps[-1].output
+        if self.return_full_result:
+            total_input_tokens = 0
+            total_output_tokens = 0
+            correct_token_usage = True
+            for step in self.memory.steps:
+                if isinstance(step, (ActionStep, PlanningStep)):
+                    if step.token_usage is None:
+                        correct_token_usage = False
+                        break
+                    else:
+                        total_input_tokens += step.token_usage.input_tokens
+                        total_output_tokens += step.token_usage.output_tokens
+            if correct_token_usage:
+                token_usage = TokenUsage(input_tokens=total_input_tokens, output_tokens=total_output_tokens)
+            else:
+                token_usage = None
+            if self.memory.steps and isinstance(getattr(self.memory.steps[-1], "error", None), AgentMaxStepsError):
+                state = "max_steps_error"
+            else:
+                state = "success"
+            messages = self.memory.get_full_steps()
+            return RunResult(
+                output=output,
+                token_usage=token_usage,
+                messages=messages,
+                timing=Timing(start_time=run_start_time, end_time=time.time()),
+                state=state,
+            )
+        return output
+    def _run_stream(
+        self, task: str, max_steps: int, images: list["PIL.Image.Image"] | None = None
+    ) -> Generator[ActionStep | PlanningStep | FinalAnswerStep | ChatMessageStreamDelta]:
+        self.step_number = 1
+        returned_final_answer = False
+        while not returned_final_answer and self.step_number <= max_steps:
+            if self.interrupt_switch:
+                raise AgentError("Agent interrupted.", self.logger)
+            # Run a planning step if scheduled
+            if self.planning_interval is not None and (
+                self.step_number == 1 or (self.step_number - 1) % self.planning_interval == 0
+            ):
+                planning_start_time = time.time()
+                planning_step = None
+                for element in self._generate_planning_step(
+                    task, is_first_step=len(self.memory.steps) == 1, step=self.step_number
+                ):  # Don't use the attribute step_number here, because there can be steps from previous runs
+                    yield element
+                    planning_step = element
+                assert isinstance(planning_step, PlanningStep)  # Last yielded element should be a PlanningStep
+                self.memory.steps.append(planning_step)
+                planning_end_time = time.time()
+                planning_step.timing = Timing(
+                    start_time=planning_start_time,
+                    end_time=planning_end_time,
+                )
+            # Start action step!
+            action_step_start_time = time.time()
+            action_step = ActionStep(
+                step_number=self.step_number,
+                timing=Timing(start_time=action_step_start_time),
+                observations_images=images,
+            )
+            self.logger.log_rule(f"Step {self.step_number}", level=LogLevel.INFO)
+            try:
+                for output in self._step_stream(action_step):
+                    # Yield all
+                    yield output
+                    if isinstance(output, ActionOutput) and output.is_final_answer:
+                        final_answer = output.output
+                        self.logger.log(
+                            Text(f"Final answer: {final_answer}", style=f"bold {YELLOW_HEX}"),
+                            level=LogLevel.INFO,
+                        )
+                        if self.final_answer_checks:
+                            self._validate_final_answer(final_answer)
+                        returned_final_answer = True
+                        action_step.is_final_answer = True
+            except AgentGenerationError as e:
+                # Agent generation errors are not caused by a Model error but an implementation error: so we should raise them and exit.
+                raise e
+            except AgentError as e:
+                # Other AgentError types are caused by the Model, so we should log them and iterate.
+                action_step.error = e
+            finally:
+                self._finalize_step(action_step)
+                self.memory.steps.append(action_step)
+                yield action_step
+                self.step_number += 1
+        if not returned_final_answer and self.step_number == max_steps + 1:
+            final_answer = self._handle_max_steps_reached(task, images)
+            yield action_step
+        yield FinalAnswerStep(handle_agent_output_types(final_answer))
+    def _validate_final_answer(self, final_answer: Any):
+        for check_function in self.final_answer_checks:
+            try:
+                assert check_function(final_answer, self.memory)
+            except Exception as e:
+                raise AgentError(f"Check {check_function.__name__} failed with error: {e}", self.logger)
+    def _finalize_step(self, memory_step: ActionStep):
+        memory_step.timing.end_time = time.time()
+        for callback in self.step_callbacks:
+            # For compatibility with old callbacks that don't take the agent as an argument
+            callback(memory_step) if len(inspect.signature(callback).parameters) == 1 else callback(
+                memory_step, agent=self
+            )
+    def _handle_max_steps_reached(self, task: str, images: list["PIL.Image.Image"]) -> Any:
+        action_step_start_time = time.time()
+        final_answer = self.provide_final_answer(task, images)
+        final_memory_step = ActionStep(
+            step_number=self.step_number,
+            error=AgentMaxStepsError("Reached max steps.", self.logger),
+            timing=Timing(start_time=action_step_start_time, end_time=time.time()),
+            token_usage=final_answer.token_usage,
+        )
+        final_memory_step.action_output = final_answer.content
+        self._finalize_step(final_memory_step)
+        self.memory.steps.append(final_memory_step)
+        return final_answer.content
+    def _generate_planning_step(
+        self, task, is_first_step: bool, step: int
+    ) -> Generator[ChatMessageStreamDelta | PlanningStep]:
+        start_time = time.time()
+        if is_first_step:
+            input_messages = [
+                ChatMessage(
+                    role=MessageRole.USER,
+                    content=[
+                        {
+                            "type": "text",
+                            "text": populate_template(
+                                self.prompt_templates["planning"]["initial_plan"],
+                                variables={"task": task, "tools": self.tools, "managed_agents": self.managed_agents},
+                            ),
+                        }
+                    ],
+                )
+            ]
+            if self.stream_outputs and hasattr(self.model, "generate_stream"):
+                plan_message_content = ""
+                output_stream = self.model.generate_stream(input_messages, stop_sequences=["<end_plan>"])  # type: ignore
+                input_tokens, output_tokens = 0, 0
+                with Live("", console=self.logger.console, vertical_overflow="visible") as live:
+                    for event in output_stream:
+                        if event.content is not None:
+                            plan_message_content += event.content
+                            live.update(Markdown(plan_message_content))
+                            if event.token_usage:
+                                output_tokens += event.token_usage.output_tokens
+                                input_tokens = event.token_usage.input_tokens
+                        yield event
+            else:
+                plan_message = self.model.generate(input_messages, stop_sequences=["<end_plan>"])
+                plan_message_content = plan_message.content
+                input_tokens, output_tokens = (
+                    (
+                        plan_message.token_usage.input_tokens,
+                        plan_message.token_usage.output_tokens,
+                    )
+                    if plan_message.token_usage
+                    else (None, None)
+                )
+            plan = textwrap.dedent(
+                f"""Here are the facts I know and the plan of action that I will follow to solve the task:\n```\n{plan_message_content}\n```"""
+            )
+        else:
+            # Summary mode removes the system prompt and previous planning messages output by the model.
+            # Removing previous planning messages avoids influencing too much the new plan.
+            memory_messages = self.write_memory_to_messages(summary_mode=True)
+            plan_update_pre = ChatMessage(
+                role=MessageRole.SYSTEM,
+                content=[
+                    {
+                        "type": "text",
+                        "text": populate_template(
+                            self.prompt_templates["planning"]["update_plan_pre_messages"], variables={"task": task}
+                        ),
+                    }
+                ],
+            )
+            plan_update_post = ChatMessage(
+                role=MessageRole.USER,
+                content=[
+                    {
+                        "type": "text",
+                        "text": populate_template(
+                            self.prompt_templates["planning"]["update_plan_post_messages"],
+                            variables={
+                                "task": task,
+                                "tools": self.tools,
+                                "managed_agents": self.managed_agents,
+                                "remaining_steps": (self.max_steps - step),
+                            },
+                        ),
+                    }
+                ],
+            )
+            input_messages = [plan_update_pre] + memory_messages + [plan_update_post]
+            if self.stream_outputs and hasattr(self.model, "generate_stream"):
+                plan_message_content = ""
+                input_tokens, output_tokens = 0, 0
+                with Live("", console=self.logger.console, vertical_overflow="visible") as live:
+                    for event in self.model.generate_stream(
+                        input_messages,
+                        stop_sequences=["<end_plan>"],
+                    ):  # type: ignore
+                        if event.content is not None:
+                            plan_message_content += event.content
+                            live.update(Markdown(plan_message_content))
+                            if event.token_usage:
+                                output_tokens += event.token_usage.output_tokens
+                                input_tokens = event.token_usage.input_tokens
+                        yield event
+            else:
+                plan_message = self.model.generate(input_messages, stop_sequences=["<end_plan>"])
+                plan_message_content = plan_message.content
+                if plan_message.token_usage is not None:
+                    input_tokens, output_tokens = (
+                        plan_message.token_usage.input_tokens,
+                        plan_message.token_usage.output_tokens,
+                    )
+            plan = textwrap.dedent(
+                f"""I still need to solve the task I was given:\n```\n{self.task}\n```\n\nHere are the facts I know and my new/updated plan of action to solve the task:\n```\n{plan_message_content}\n```"""
+            )
+        log_headline = "Initial plan" if is_first_step else "Updated plan"
+        self.logger.log(Rule(f"[bold]{log_headline}", style="orange"), Text(plan), level=LogLevel.INFO)
+        yield PlanningStep(
+            model_input_messages=input_messages,
+            plan=plan,
+            model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content=plan_message_content),
+            token_usage=TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens),
+            timing=Timing(start_time=start_time, end_time=time.time()),
+        )
+    @property
+    def logs(self):
+        logger.warning(
+            "The 'logs' attribute is deprecated and will soon be removed. Please use 'self.memory.steps' instead."
+        )
+        return [self.memory.system_prompt] + self.memory.steps
+    @abstractmethod
+    def initialize_system_prompt(self) -> str:
+        """To be implemented in child classes"""
+        ...
+    def interrupt(self):
+        """Interrupts the agent execution."""
+        self.interrupt_switch = True
+    def write_memory_to_messages(
+        self,
+        summary_mode: bool = False,
+    ) -> list[ChatMessage]:
+        """
+        Reads past llm_outputs, actions, and observations or errors from the memory into a series of messages
+        that can be used as input to the LLM. Adds a number of keywords (such as PLAN, error, etc) to help
+        the LLM.
+        """
+        messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
+        for memory_step in self.memory.steps:
+            messages.extend(memory_step.to_messages(summary_mode=summary_mode))
+        return messages
+    def _step_stream(
+        self, memory_step: ActionStep
+    ) -> Generator[ChatMessageStreamDelta | ToolCall | ToolOutput | ActionOutput]:
+        """
+        Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
+        Yields ChatMessageStreamDelta during the run if streaming is enabled.
+        At the end, yields either None if the step is not final, or the final answer.
+        """
+        raise NotImplementedError("This method should be implemented in child classes")
+    def step(self, memory_step: ActionStep) -> Any:
+        """
+        Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
+        Returns either None if the step is not final, or the final answer.
+        """
+        return list(self._step_stream(memory_step))[-1]
+    def extract_action(self, model_output: str, split_token: str) -> tuple[str, str]:
+        """
+        Parse action from the LLM output
+        Args:
+            model_output (`str`): Output of the LLM
+            split_token (`str`): Separator for the action. Should match the example in the system prompt.
+        """
+        try:
+            split = model_output.split(split_token)
+            rationale, action = (
+                split[-2],
+                split[-1],
+            )  # NOTE: using indexes starting from the end solves for when you have more than one split_token in the output
+        except Exception:
+            raise AgentParsingError(
+                f"No '{split_token}' token provided in your output.\nYour output:\n{model_output}\n. Be sure to include an action, prefaced with '{split_token}'!",
+                self.logger,
+            )
+        return rationale.strip(), action.strip()
+    def provide_final_answer(self, task: str, images: list["PIL.Image.Image"] | None = None) -> ChatMessage:
+        """
+        Provide the final answer to the task, based on the logs of the agent's interactions.
+        Args:
+            task (`str`): Task to perform.
+            images (`list[PIL.Image.Image]`, *optional*): Image(s) objects.
+        Returns:
+            `str`: Final answer to the task.
+        """
+        messages = [
+            ChatMessage(
+                role=MessageRole.SYSTEM,
+                content=[
+                    {
+                        "type": "text",
+                        "text": self.prompt_templates["final_answer"]["pre_messages"],
+                    }
+                ],
+            )
+        ]
+        if images:
+            messages[0].content += [{"type": "image", "image": image} for image in images]
+        messages += self.write_memory_to_messages()[1:]
+        messages.append(
+            ChatMessage(
+                role=MessageRole.USER,
+                content=[
+                    {
+                        "type": "text",
+                        "text": populate_template(
+                            self.prompt_templates["final_answer"]["post_messages"], variables={"task": task}
+                        ),
+                    }
+                ],
+            )
+        )
+        try:
+            chat_message: ChatMessage = self.model.generate(messages)
+            return chat_message
+        except Exception as e:
+            return ChatMessage(role=MessageRole.ASSISTANT, content=f"Error in generating final LLM output:\n{e}")
+    def visualize(self):
+        """Creates a rich tree visualization of the agent's structure."""
+        self.logger.visualize_agent_tree(self)
+    def replay(self, detailed: bool = False):
+        """Prints a pretty replay of the agent's steps.
+        Args:
+            detailed (bool, optional): If True, also displays the memory at each step. Defaults to False.
+                Careful: will increase log length exponentially. Use only for debugging.
+        """
+        self.memory.replay(self.logger, detailed=detailed)
+    def __call__(self, task: str, **kwargs):
+        """Adds additional prompting for the managed agent, runs it, and wraps the output.
+        This method is called only by a managed agent.
+        """
+        full_task = populate_template(
+            self.prompt_templates["managed_agent"]["task"],
+            variables=dict(name=self.name, task=task),
+        )
+        result = self.run(full_task, **kwargs)
+        if isinstance(result, RunResult):
+            report = result.output
+        else:
+            report = result
+        answer = populate_template(
+            self.prompt_templates["managed_agent"]["report"], variables=dict(name=self.name, final_answer=report)
+        )
+        if self.provide_run_summary:
+            answer += "\n\nFor more detail, find below a summary of this agent's work:\n<summary_of_work>\n"
+            for message in self.write_memory_to_messages(summary_mode=True):
+                content = message["content"]
+                answer += "\n" + truncate_content(str(content)) + "\n---"
+            answer += "\n</summary_of_work>"
+        return answer
+    def save(self, output_dir: str | Path, relative_path: str | None = None):
+        """
+        Saves the relevant code files for your agent. This will copy the code of your agent in `output_dir` as well as autogenerate:
+        - a `tools` folder containing the logic for each of the tools under `tools/{tool_name}.py`.
+        - a `managed_agents` folder containing the logic for each of the managed agents.
+        - an `agent.json` file containing a dictionary representing your agent.
+        - a `prompt.yaml` file containing the prompt templates used by your agent.
+        - an `app.py` file providing a UI for your agent when it is exported to a Space with `agent.push_to_hub()`
+        - a `requirements.txt` containing the names of the modules used by your tool (as detected when inspecting its
+          code)
+        Args:
+            output_dir (`str` or `Path`): The folder in which you want to save your agent.
+        """
+        make_init_file(output_dir)
+        # Recursively save managed agents
+        if self.managed_agents:
+            make_init_file(os.path.join(output_dir, "managed_agents"))
+            for agent_name, agent in self.managed_agents.items():
+                agent_suffix = f"managed_agents.{agent_name}"
+                if relative_path:
+                    agent_suffix = relative_path + "." + agent_suffix
+                agent.save(os.path.join(output_dir, "managed_agents", agent_name), relative_path=agent_suffix)
+        class_name = self.__class__.__name__
+        # Save tools to different .py files
+        for tool in self.tools.values():
+            make_init_file(os.path.join(output_dir, "tools"))
+            tool.save(os.path.join(output_dir, "tools"), tool_file_name=tool.name, make_gradio_app=False)
+        # Save prompts to yaml
+        yaml_prompts = yaml.safe_dump(
+            self.prompt_templates,
+            default_style="|",  # This forces block literals for all strings
+            default_flow_style=False,
+            width=float("inf"),
+            sort_keys=False,
+            allow_unicode=True,
+            indent=2,
+        )
+        with open(os.path.join(output_dir, "prompts.yaml"), "w", encoding="utf-8") as f:
+            f.write(yaml_prompts)
+        # Save agent dictionary to json
+        agent_dict = self.to_dict()
+        agent_dict["tools"] = [tool.name for tool in self.tools.values()]
+        agent_dict["managed_agents"] = {agent.name: agent.__class__.__name__ for agent in self.managed_agents.values()}
+        with open(os.path.join(output_dir, "agent.json"), "w", encoding="utf-8") as f:
+            json.dump(agent_dict, f, indent=4)
+        # Save requirements
+        with open(os.path.join(output_dir, "requirements.txt"), "w", encoding="utf-8") as f:
+            f.writelines(f"{r}\n" for r in agent_dict["requirements"])
+        # Make agent.py file with Gradio UI
+        agent_name = f"agent_{self.name}" if getattr(self, "name", None) else "agent"
+        managed_agent_relative_path = relative_path + "." if relative_path is not None else ""
+        app_template = AGENT_GRADIO_APP_TEMPLATE
+        template_env = jinja2.Environment(loader=jinja2.BaseLoader(), undefined=jinja2.StrictUndefined)
+        template_env.filters["repr"] = repr
+        template_env.filters["camelcase"] = lambda value: "".join(word.capitalize() for word in value.split("_"))
+        template = template_env.from_string(app_template)
+        # Render the app.py file from Jinja2 template
+        app_text = template.render(
+            {
+                "agent_name": agent_name,
+                "class_name": class_name,
+                "agent_dict": agent_dict,
+                "tools": self.tools,
+                "managed_agents": self.managed_agents,
+                "managed_agent_relative_path": managed_agent_relative_path,
+            }
+        )
+        with open(os.path.join(output_dir, "app.py"), "w", encoding="utf-8") as f:
+            f.write(app_text + "\n")  # Append newline at the end
+    def to_dict(self) -> dict[str, Any]:
+        """Convert the agent to a dictionary representation.
+        Returns:
+            `dict`: Dictionary representation of the agent.
+        """
+        # TODO: handle serializing step_callbacks and final_answer_checks
+        for attr in ["final_answer_checks", "step_callbacks"]:
+            if getattr(self, attr, None):
+                self.logger.log(f"This agent has {attr}: they will be ignored by this method.", LogLevel.INFO)
+        tool_dicts = [tool.to_dict() for tool in self.tools.values()]
+        tool_requirements = {req for tool in self.tools.values() for req in tool.to_dict()["requirements"]}
+        managed_agents_requirements = {
+            req for managed_agent in self.managed_agents.values() for req in managed_agent.to_dict()["requirements"]
+        }
+        requirements = tool_requirements | managed_agents_requirements
+        if hasattr(self, "authorized_imports"):
+            requirements.update(
+                {package.split(".")[0] for package in self.authorized_imports if package not in BASE_BUILTIN_MODULES}
+            )
+        agent_dict = {
+            "class": self.__class__.__name__,
+            "tools": tool_dicts,
+            "model": {
+                "class": self.model.__class__.__name__,
+                "data": self.model.to_dict(),
+            },
+            "managed_agents": [managed_agent.to_dict() for managed_agent in self.managed_agents.values()],
+            "prompt_templates": self.prompt_templates,
+            "max_steps": self.max_steps,
+            "verbosity_level": int(self.logger.level),
+            "grammar": self.grammar,
+            "planning_interval": self.planning_interval,
+            "name": self.name,
+            "description": self.description,
+            "requirements": sorted(requirements),
+        }
+        return agent_dict
+    @classmethod
+    def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "MultiStepAgent":
+        """Create agent from a dictionary representation.
+        Args:
+            agent_dict (`dict[str, Any]`): Dictionary representation of the agent.
+            **kwargs: Additional keyword arguments that will override agent_dict values.
+        Returns:
+            `MultiStepAgent`: Instance of the agent class.
+        """
+        # Load model
+        model_info = agent_dict["model"]
+        model_class = getattr(importlib.import_module("smolagents.models"), model_info["class"])
+        model = model_class.from_dict(model_info["data"])
+        # Load tools
+        tools = []
+        for tool_info in agent_dict["tools"]:
+            tools.append(Tool.from_code(tool_info["code"]))
+        # Load managed agents
+        managed_agents = []
+        for managed_agent_name, managed_agent_class_name in agent_dict["managed_agents"].items():
+            managed_agent_class = getattr(importlib.import_module("smolagents.agents"), managed_agent_class_name)
+            managed_agents.append(managed_agent_class.from_dict(agent_dict["managed_agents"][managed_agent_name]))
+        # Extract base agent parameters
+        agent_args = {
+            "model": model,
+            "tools": tools,
+            "prompt_templates": agent_dict.get("prompt_templates"),
+            "max_steps": agent_dict.get("max_steps"),
+            "verbosity_level": agent_dict.get("verbosity_level"),
+            "grammar": agent_dict.get("grammar"),
+            "planning_interval": agent_dict.get("planning_interval"),
+            "name": agent_dict.get("name"),
+            "description": agent_dict.get("description"),
+        }
+        # Filter out None values to use defaults from __init__
+        agent_args = {k: v for k, v in agent_args.items() if v is not None}
+        # Update with any additional kwargs
+        agent_args.update(kwargs)
+        # Create agent instance
+        return cls(**agent_args)
+    @classmethod
+    def from_hub(
+        cls,
+        repo_id: str,
+        token: str | None = None,
+        trust_remote_code: bool = False,
+        **kwargs,
+    ):
+        """
+        Loads an agent defined on the Hub.
+        <Tip warning={true}>
+        Loading a tool from the Hub means that you'll download the tool and execute it locally.
+        ALWAYS inspect the tool you're downloading before loading it within your runtime, as you would do when
+        installing a package using pip/npm/apt.
+        </Tip>
+        Args:
+            repo_id (`str`):
+                The name of the repo on the Hub where your tool is defined.
+            token (`str`, *optional*):
+                The token to identify you on hf.co. If unset, will use the token generated when running
+                `huggingface-cli login` (stored in `~/.huggingface`).
+            trust_remote_code(`bool`, *optional*, defaults to False):
+                This flags marks that you understand the risk of running remote code and that you trust this tool.
+                If not setting this to True, loading the tool from Hub will fail.
+            kwargs (additional keyword arguments, *optional*):
+                Additional keyword arguments that will be split in two: all arguments relevant to the Hub (such as
+                `cache_dir`, `revision`, `subfolder`) will be used when downloading the files for your agent, and the
+                others will be passed along to its init.
+        """
+        if not trust_remote_code:
+            raise ValueError(
+                "Loading an agent from Hub requires to acknowledge you trust its code: to do so, pass `trust_remote_code=True`."
+            )
+        # Get the agent's Hub folder.
+        download_kwargs = {"token": token, "repo_type": "space"} | {
+            key: kwargs.pop(key)
+            for key in [
+                "cache_dir",
+                "force_download",
+                "proxies",
+                "revision",
+                "local_files_only",
+            ]
+            if key in kwargs
+        }
+        download_folder = Path(snapshot_download(repo_id=repo_id, **download_kwargs))
+        return cls.from_folder(download_folder, **kwargs)
+    @classmethod
+    def from_folder(cls, folder: str | Path, **kwargs):
+        """Loads an agent from a local folder.
+        Args:
+            folder (`str` or `Path`): The folder where the agent is saved.
+            **kwargs: Additional keyword arguments that will be passed to the agent's init.
+        """
+        # Load agent.json
+        folder = Path(folder)
+        agent_dict = json.loads((folder / "agent.json").read_text())
+        # Load managed agents from their respective folders, recursively
+        managed_agents = []
+        for managed_agent_name, managed_agent_class_name in agent_dict["managed_agents"].items():
+            agent_cls = getattr(importlib.import_module("smolagents.agents"), managed_agent_class_name)
+            managed_agents.append(agent_cls.from_folder(folder / "managed_agents" / managed_agent_name))
+        agent_dict["managed_agents"] = {}
+        # Load tools
+        tools = []
+        for tool_name in agent_dict["tools"]:
+            tool_code = (folder / "tools" / f"{tool_name}.py").read_text()
+            tools.append({"name": tool_name, "code": tool_code})
+        agent_dict["tools"] = tools
+        # Add managed agents to kwargs to override the empty list in from_dict
+        if managed_agents:
+            kwargs["managed_agents"] = managed_agents
+        return cls.from_dict(agent_dict, **kwargs)
+    def push_to_hub(
+        self,
+        repo_id: str,
+        commit_message: str = "Upload agent",
+        private: bool | None = None,
+        token: bool | str | None = None,
+        create_pr: bool = False,
+    ) -> str:
+        """
+        Upload the agent to the Hub.
+        Parameters:
+            repo_id (`str`):
+                The name of the repository you want to push to. It should contain your organization name when
+                pushing to a given organization.
+            commit_message (`str`, *optional*, defaults to `"Upload agent"`):
+                Message to commit while pushing.
+            private (`bool`, *optional*, defaults to `None`):
+                Whether to make the repo private. If `None`, the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
+            token (`bool` or `str`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. If unset, will use the token generated
+                when running `huggingface-cli login` (stored in `~/.huggingface`).
+            create_pr (`bool`, *optional*, defaults to `False`):
+                Whether to create a PR with the uploaded files or directly commit.
+        """
+        repo_url = create_repo(
+            repo_id=repo_id,
+            token=token,
+            private=private,
+            exist_ok=True,
+            repo_type="space",
+            space_sdk="gradio",
+        )
+        repo_id = repo_url.repo_id
+        metadata_update(
+            repo_id,
+            {"tags": ["smolagents", "agent"]},
+            repo_type="space",
+            token=token,
+            overwrite=True,
+        )
+        with tempfile.TemporaryDirectory() as work_dir:
+            self.save(work_dir)
+            logger.info(f"Uploading the following files to {repo_id}: {','.join(os.listdir(work_dir))}")
+            return upload_folder(
+                repo_id=repo_id,
+                commit_message=commit_message,
+                folder_path=work_dir,
+                token=token,
+                create_pr=create_pr,
+                repo_type="space",
+            )
+class ToolCallingAgent(MultiStepAgent):
+    """
+    This agent uses JSON-like tool calls, using method `model.get_tool_call` to leverage the LLM engine's tool calling capabilities.
+    Args:
+        tools (`list[Tool]`): [`Tool`]s that the agent can use.
+        model (`Model`): Model that will generate the agent's actions.
+        prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates.
+        planning_interval (`int`, *optional*): Interval at which the agent will run a planning step.
+        stream_outputs (`bool`, *optional*, default `False`): Whether to stream outputs during execution.
+        max_tool_threads (`int`, *optional*): Maximum number of threads for parallel tool calls.
+            Higher values increase concurrency but resource usage as well.
+            Defaults to `ThreadPoolExecutor`'s default.
+        **kwargs: Additional keyword arguments.
+    """
+    def __init__(
+        self,
+        tools: list[Tool],
+        model: Model,
+        prompt_templates: PromptTemplates | None = None,
+        planning_interval: int | None = None,
+        stream_outputs: bool = False,
+        max_tool_threads: int | None = None,
+        **kwargs,
+    ):
+        prompt_templates = prompt_templates or yaml.safe_load(
+            importlib.resources.files("smolagents.prompts").joinpath("toolcalling_agent.yaml").read_text()
+        )
+        super().__init__(
+            tools=tools,
+            model=model,
+            prompt_templates=prompt_templates,
+            planning_interval=planning_interval,
+            **kwargs,
+        )
+        # Streaming setup
+        self.stream_outputs = stream_outputs
+        if self.stream_outputs and not hasattr(self.model, "generate_stream"):
+            raise ValueError(
+                "`stream_outputs` is set to True, but the model class implements no `generate_stream` method."
+            )
+        # Tool calling setup
+        self.max_tool_threads = max_tool_threads
+    @property
+    def tools_and_managed_agents(self):
+        """Returns a combined list of tools and managed agents."""
+        return list(self.tools.values()) + list(self.managed_agents.values())
+    def initialize_system_prompt(self) -> str:
+        system_prompt = populate_template(
+            self.prompt_templates["system_prompt"],
+            variables={
+                "tools": self.tools,
+                "managed_agents": self.managed_agents,
+                "custom_instructions": self.instructions,
+            },
+        )
+        return system_prompt
+    def _step_stream(
+        self, memory_step: ActionStep
+    ) -> Generator[ChatMessageStreamDelta | ToolCall | ToolOutput | ActionOutput]:
+        """
+        Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
+        Yields ChatMessageStreamDelta during the run if streaming is enabled.
+        At the end, yields either None if the step is not final, or the final answer.
+        """
+        memory_messages = self.write_memory_to_messages()
+        input_messages = memory_messages.copy()
+        # Add new step in logs
+        memory_step.model_input_messages = input_messages
+        try:
+            if self.stream_outputs and hasattr(self.model, "generate_stream"):
+                output_stream = self.model.generate_stream(
+                    input_messages,
+                    stop_sequences=["Observation:", "Calling tools:"],
+                    tools_to_call_from=self.tools_and_managed_agents,
+                )
+                chat_message_stream_deltas: list[ChatMessageStreamDelta] = []
+                with Live("", console=self.logger.console, vertical_overflow="visible") as live:
+                    for event in output_stream:
+                        chat_message_stream_deltas.append(event)
+                        live.update(
+                            Markdown(agglomerate_stream_deltas(chat_message_stream_deltas).render_as_markdown())
+                        )
+                        yield event
+                chat_message = agglomerate_stream_deltas(chat_message_stream_deltas)
+            else:
+                chat_message: ChatMessage = self.model.generate(
+                    input_messages,
+                    stop_sequences=["Observation:", "Calling tools:"],
+                    tools_to_call_from=self.tools_and_managed_agents,
+                )
+                if chat_message.content is None and chat_message.raw is not None:
+                    log_content = str(chat_message.raw)
+                else:
+                    log_content = str(chat_message.content) or ""
+                self.logger.log_markdown(
+                    content=log_content,
+                    title="Output message of the LLM:",
+                    level=LogLevel.DEBUG,
+                )
+            # Record model output
+            memory_step.model_output_message = chat_message
+            memory_step.model_output = chat_message.content
+            memory_step.token_usage = chat_message.token_usage
+        except Exception as e:
+            raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e
+        if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0:
+            try:
+                chat_message = self.model.parse_tool_calls(chat_message)
+            except Exception as e:
+                raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger)
+        else:
+            for tool_call in chat_message.tool_calls:
+                tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
+        final_answer, got_final_answer = None, False
+        for output in self.process_tool_calls(chat_message, memory_step):
+            yield output
+            if isinstance(output, ToolOutput):
+                if output.is_final_answer:
+                    if got_final_answer:
+                        raise AgentToolExecutionError(
+                            "You returned multiple final answers. Please return only one single final answer!",
+                            self.logger,
+                        )
+                    final_answer = output.output
+                    got_final_answer = True
+                    # Manage state variables
+                    if isinstance(final_answer, str) and final_answer in self.state.keys():
+                        final_answer = self.state[final_answer]
+        yield ActionOutput(
+            output=final_answer,
+            is_final_answer=got_final_answer,
+        )
+    def process_tool_calls(
+        self, chat_message: ChatMessage, memory_step: ActionStep
+    ) -> Generator[ToolCall | ToolOutput]:
+        """Process tool calls from the model output and update agent memory.
+        Args:
+            chat_message (`ChatMessage`): Chat message containing tool calls from the model.
+            memory_step (`ActionStep)`: Memory ActionStep to update with results.
+        Yields:
+            `ToolCall | ToolOutput`: The tool call or tool output.
+        """
+        parallel_calls: dict[str, ToolCall] = {}
+        assert chat_message.tool_calls is not None
+        for chat_tool_call in chat_message.tool_calls:
+            tool_call = ToolCall(
+                name=chat_tool_call.function.name, arguments=chat_tool_call.function.arguments, id=chat_tool_call.id
+            )
+            yield tool_call
+            parallel_calls[tool_call.id] = tool_call
+        # Helper function to process a single tool call
+        def process_single_tool_call(tool_call: ToolCall) -> ToolOutput:
+            tool_name = tool_call.name
+            tool_arguments = tool_call.arguments or {}
+            self.logger.log(
+                Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}")),
+                level=LogLevel.INFO,
+            )
+            tool_call_result = self.execute_tool_call(tool_name, tool_arguments)
+            tool_call_result_type = type(tool_call_result)
+            if tool_call_result_type in [AgentImage, AgentAudio]:
+                if tool_call_result_type == AgentImage:
+                    observation_name = "image.png"
+                elif tool_call_result_type == AgentAudio:
+                    observation_name = "audio.mp3"
+                # TODO: tool_call_result naming could allow for different names of same type
+                self.state[observation_name] = tool_call_result
+                observation = f"Stored '{observation_name}' in memory."
+            else:
+                observation = str(tool_call_result).strip()
+            self.logger.log(
+                f"Observations: {observation.replace('[', '|')}",  # escape potential rich-tag-like components
+                level=LogLevel.INFO,
+            )
+            is_final_answer = tool_name == "final_answer"
+            return ToolOutput(
+                id=tool_call.id,
+                output=tool_call_result,
+                is_final_answer=is_final_answer,
+                observation=observation,
+                tool_call=tool_call,
+            )
+        # Process tool calls in parallel
+        outputs = {}
+        if len(parallel_calls) == 1:
+            # If there's only one call, process it directly
+            tool_call = list(parallel_calls.values())[0]
+            tool_output = process_single_tool_call(tool_call)
+            outputs[tool_output.id] = tool_output
+            yield tool_output
+        else:
+            # If multiple tool calls, process them in parallel
+            with ThreadPoolExecutor(self.max_tool_threads) as executor:
+                futures = [
+                    executor.submit(process_single_tool_call, tool_call) for tool_call in parallel_calls.values()
+                ]
+                for future in as_completed(futures):
+                    tool_output = future.result()
+                    outputs[tool_output.id] = tool_output
+                    yield tool_output
+        memory_step.tool_calls = [parallel_calls[k] for k in sorted(parallel_calls.keys())]
+        memory_step.model_output = memory_step.model_output or ""
+        memory_step.observations = memory_step.observations or ""
+        for tool_output in [outputs[k] for k in sorted(outputs.keys())]:
+            message = f"Tool call {tool_output.id}: calling '{tool_output.tool_call.name}' with arguments: {tool_output.tool_call.arguments}\n"
+            memory_step.model_output += message
+            memory_step.observations += tool_output.observation + "\n"
+        memory_step.model_output = memory_step.model_output.rstrip("\n")
+        memory_step.observations = (
+            memory_step.observations.rstrip("\n") if memory_step.observations else memory_step.observations
+        )
+    def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str:
+        """Replace string values in arguments with their corresponding state values if they exist."""
+        if isinstance(arguments, dict):
+            return {
+                key: self.state.get(value, value) if isinstance(value, str) else value
+                for key, value in arguments.items()
+            }
+        return arguments
+    def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any:
+        """
+        Execute a tool or managed agent with the provided arguments.
+        The arguments are replaced with the actual values from the state if they refer to state variables.
+        Args:
+            tool_name (`str`): Name of the tool or managed agent to execute.
+            arguments (dict[str, str] | str): Arguments passed to the tool call.
+        """
+        # Check if the tool exists
+        available_tools = {**self.tools, **self.managed_agents}
+        if tool_name not in available_tools:
+            raise AgentToolExecutionError(
+                f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger
+            )
+        # Get the tool and substitute state variables in arguments
+        tool = available_tools[tool_name]
+        arguments = self._substitute_state_variables(arguments)
+        is_managed_agent = tool_name in self.managed_agents
+        error_msg = validate_tool_arguments(tool, arguments)
+        if error_msg:
+            raise AgentToolCallError(error_msg, self.logger)
+        try:
+            # Call tool with appropriate arguments
+            if isinstance(arguments, dict):
+                return tool(**arguments) if is_managed_agent else tool(**arguments, sanitize_inputs_outputs=True)
+            else:
+                return tool(arguments) if is_managed_agent else tool(arguments, sanitize_inputs_outputs=True)
+        except Exception as e:
+            # Handle execution errors
+            if is_managed_agent:
+                error_msg = (
+                    f"Error executing request to team member '{tool_name}' with arguments {str(arguments)}: {e}\n"
+                    "Please try again or request to another team member"
+                )
+            else:
+                error_msg = (
+                    f"Error executing tool '{tool_name}' with arguments {str(arguments)}: {type(e).__name__}: {e}\n"
+                    "Please try again or use another tool"
+                )
+            raise AgentToolExecutionError(error_msg, self.logger) from e
+class CodeAgent(MultiStepAgent):
+    """
+    In this agent, the tool calls will be formulated by the LLM in code format, then parsed and executed.
+    Args:
+        tools (`list[Tool]`): [`Tool`]s that the agent can use.
+        model (`Model`): Model that will generate the agent's actions.
+        prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates.
+        additional_authorized_imports (`list[str]`, *optional*): Additional authorized imports for the agent.
+        planning_interval (`int`, *optional*): Interval at which the agent will run a planning step.
+        executor_type (`str`, default `"local"`): Which executor type to use between `"local"`, `"e2b"`, or `"docker"`.
+        executor_kwargs (`dict`, *optional*): Additional arguments to pass to initialize the executor.
+        max_print_outputs_length (`int`, *optional*): Maximum length of the print outputs.
+        stream_outputs (`bool`, *optional*, default `False`): Whether to stream outputs during execution.
+        use_structured_outputs_internally (`bool`, default `False`): Whether to use structured generation at each action step: improves performance for many models.
+            <Added version="1.17.0"/>
+        grammar (`dict[str, str]`, *optional*): Grammar used to parse the LLM output.
+            <Deprecated version="1.17.0">
+            Parameter `grammar` is deprecated and will be removed in version 1.20.
+            </Deprecated>
+        **kwargs: Additional keyword arguments.
+    """
+    def __init__(
+        self,
+        tools: list[Tool],
+        model: Model,
+        prompt_templates: PromptTemplates | None = None,
+        additional_authorized_imports: list[str] | None = None,
+        planning_interval: int | None = None,
+        executor_type: str | None = "local",
+        executor_kwargs: dict[str, Any] | None = None,
+        max_print_outputs_length: int | None = None,
+        stream_outputs: bool = False,
+        use_structured_outputs_internally: bool = False,
+        grammar: dict[str, str] | None = None,
+        **kwargs,
+    ):
+        self.additional_authorized_imports = additional_authorized_imports if additional_authorized_imports else []
+        self.authorized_imports = sorted(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports))
+        self.max_print_outputs_length = max_print_outputs_length
+        self._use_structured_outputs_internally = use_structured_outputs_internally
+        if use_structured_outputs_internally:
+            prompt_templates = prompt_templates or yaml.safe_load(
+                importlib.resources.files("smolagents.prompts").joinpath("structured_code_agent.yaml").read_text()
+            )
+        else:
+            prompt_templates = prompt_templates or yaml.safe_load(
+                importlib.resources.files("smolagents.prompts").joinpath("code_agent.yaml").read_text()
+            )
+        if grammar and use_structured_outputs_internally:
+            raise ValueError("You cannot use 'grammar' and 'use_structured_outputs_internally' at the same time.")
+        super().__init__(
+            tools=tools,
+            model=model,
+            prompt_templates=prompt_templates,
+            grammar=grammar,
+            planning_interval=planning_interval,
+            **kwargs,
+        )
+        self.stream_outputs = stream_outputs
+        if self.stream_outputs and not hasattr(self.model, "generate_stream"):
+            raise ValueError(
+                "`stream_outputs` is set to True, but the model class implements no `generate_stream` method."
+            )
+        if "*" in self.additional_authorized_imports:
+            self.logger.log(
+                "Caution: you set an authorization for all imports, meaning your agent can decide to import any package it deems necessary. This might raise issues if the package is not installed in your environment.",
+                level=LogLevel.INFO,
+            )
+        self.executor_type = executor_type or "local"
+        self.executor_kwargs = executor_kwargs or {}
+        self.python_executor = self.create_python_executor()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.cleanup()
+    def cleanup(self):
+        """Clean up resources used by the agent, such as the remote Python executor."""
+        if hasattr(self.python_executor, "cleanup"):
+            self.python_executor.cleanup()
+    def create_python_executor(self) -> PythonExecutor:
+        match self.executor_type:
+            case "e2b" | "docker":
+                if self.managed_agents:
+                    raise Exception("Managed agents are not yet supported with remote code execution.")
+                if self.executor_type == "e2b":
+                    return E2BExecutor(self.additional_authorized_imports, self.logger, **self.executor_kwargs)
+                else:
+                    return DockerExecutor(self.additional_authorized_imports, self.logger, **self.executor_kwargs)
+            case "local":
+                return LocalPythonExecutor(
+                    self.additional_authorized_imports,
+                    **{"max_print_outputs_length": self.max_print_outputs_length} | self.executor_kwargs,
+                )
+            case _:  # if applicable
+                raise ValueError(f"Unsupported executor type: {self.executor_type}")
+    def initialize_system_prompt(self) -> str:
+        system_prompt = populate_template(
+            self.prompt_templates["system_prompt"],
+            variables={
+                "tools": self.tools,
+                "managed_agents": self.managed_agents,
+                "authorized_imports": (
+                    "You can import from any package you want."
+                    if "*" in self.authorized_imports
+                    else str(self.authorized_imports)
+                ),
+                "custom_instructions": self.instructions,
+            },
+        )
+        return system_prompt
+    def _step_stream(
+        self, memory_step: ActionStep
+    ) -> Generator[ChatMessageStreamDelta | ToolCall | ToolOutput | ActionOutput]:
+        """
+        Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
+        Yields ChatMessageStreamDelta during the run if streaming is enabled.
+        At the end, yields either None if the step is not final, or the final answer.
+        """
+        memory_messages = self.write_memory_to_messages()
+        input_messages = memory_messages.copy()
+        ### Generate model output ###
+        memory_step.model_input_messages = input_messages
+        try:
+            additional_args: dict[str, Any] = {}
+            if self.grammar:
+                additional_args["grammar"] = self.grammar
+            if self._use_structured_outputs_internally:
+                additional_args["response_format"] = CODEAGENT_RESPONSE_FORMAT
+            if self.stream_outputs:
+                output_stream = self.model.generate_stream(
+                    input_messages,
+                    stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
+                    **additional_args,
+                )
+                chat_message_stream_deltas: list[ChatMessageStreamDelta] = []
+                with Live("", console=self.logger.console, vertical_overflow="visible") as live:
+                    for event in output_stream:
+                        chat_message_stream_deltas.append(event)
+                        live.update(
+                            Markdown(agglomerate_stream_deltas(chat_message_stream_deltas).render_as_markdown())
+                        )
+                        yield event
+                chat_message = agglomerate_stream_deltas(chat_message_stream_deltas)
+                memory_step.model_output_message = chat_message
+                output_text = chat_message.content
+            else:
+                chat_message: ChatMessage = self.model.generate(
+                    input_messages,
+                    stop_sequences=["<end_code>", "Observation:", "Calling tools:"],
+                    **additional_args,
+                )
+                memory_step.model_output_message = chat_message
+                output_text = chat_message.content
+                self.logger.log_markdown(
+                    content=output_text,
+                    title="Output message of the LLM:",
+                    level=LogLevel.DEBUG,
+                )
+            # This adds <end_code> sequence to the history.
+            # This will nudge ulterior LLM calls to finish with <end_code>, thus efficiently stopping generation.
+            if output_text and output_text.strip().endswith("```"):
+                output_text += "<end_code>"
+                memory_step.model_output_message.content = output_text
+            memory_step.token_usage = chat_message.token_usage
+            memory_step.model_output = output_text
+        except Exception as e:
+            raise AgentGenerationError(f"Error in generating model output:\n{e}", self.logger) from e
+        ### Parse output ###
+        try:
+            if self._use_structured_outputs_internally:
+                code_action = json.loads(output_text)["code"]
+                code_action = extract_code_from_text(code_action) or code_action
+            else:
+                code_action = parse_code_blobs(output_text)
+            code_action = fix_final_answer_code(code_action)
+            memory_step.code_action = code_action
+        except Exception as e:
+            error_msg = f"Error in code parsing:\n{e}\nMake sure to provide correct code blobs."
+            raise AgentParsingError(error_msg, self.logger)
+        tool_call = ToolCall(
+            name="python_interpreter",
+            arguments=code_action,
+            id=f"call_{len(self.memory.steps)}",
+        )
+        yield tool_call
+        memory_step.tool_calls = [tool_call]
+        ### Execute action ###
+        self.logger.log_code(title="Executing parsed code:", content=code_action, level=LogLevel.INFO)
+        is_final_answer = False
+        try:
+            output, execution_logs, is_final_answer = self.python_executor(code_action)
+            execution_outputs_console = []
+            if len(execution_logs) > 0:
+                execution_outputs_console += [
+                    Text("Execution logs:", style="bold"),
+                    Text(execution_logs),
+                ]
+            observation = "Execution logs:\n" + execution_logs
+        except Exception as e:
+            if hasattr(self.python_executor, "state") and "_print_outputs" in self.python_executor.state:
+                execution_logs = str(self.python_executor.state["_print_outputs"])
+                if len(execution_logs) > 0:
+                    execution_outputs_console = [
+                        Text("Execution logs:", style="bold"),
+                        Text(execution_logs),
+                    ]
+                    memory_step.observations = "Execution logs:\n" + execution_logs
+                    self.logger.log(Group(*execution_outputs_console), level=LogLevel.INFO)
+            error_msg = str(e)
+            if "Import of " in error_msg and " is not allowed" in error_msg:
+                self.logger.log(
+                    "[bold red]Warning to user: Code execution failed due to an unauthorized import - Consider passing said import under `additional_authorized_imports` when initializing your CodeAgent.",
+                    level=LogLevel.INFO,
+                )
+            raise AgentExecutionError(error_msg, self.logger)
+        truncated_output = truncate_content(str(output))
+        observation += "Last output from code snippet:\n" + truncated_output
+        memory_step.observations = observation
+        if not is_final_answer:
+            execution_outputs_console += [
+                Text(
+                    f"Out: {truncated_output}",
+                ),
+            ]
+        self.logger.log(Group(*execution_outputs_console), level=LogLevel.INFO)
+        memory_step.action_output = output
+        yield ActionOutput(output=output, is_final_answer=is_final_answer)
+    def to_dict(self) -> dict[str, Any]:
+        """Convert the agent to a dictionary representation.
+        Returns:
+            `dict`: Dictionary representation of the agent.
+        """
+        agent_dict = super().to_dict()
+        agent_dict["authorized_imports"] = self.authorized_imports
+        agent_dict["executor_type"] = self.executor_type
+        agent_dict["executor_kwargs"] = self.executor_kwargs
+        agent_dict["max_print_outputs_length"] = self.max_print_outputs_length
+        return agent_dict
+    @classmethod
+    def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "CodeAgent":
+        """Create CodeAgent from a dictionary representation.
+        Args:
+            agent_dict (`dict[str, Any]`): Dictionary representation of the agent.
+            **kwargs: Additional keyword arguments that will override agent_dict values.
+        Returns:
+            `CodeAgent`: Instance of the CodeAgent class.
+        """
+        # Add CodeAgent-specific parameters to kwargs
+        code_agent_kwargs = {
+            "additional_authorized_imports": agent_dict.get("authorized_imports"),
+            "executor_type": agent_dict.get("executor_type"),
+            "executor_kwargs": agent_dict.get("executor_kwargs"),
+            "max_print_outputs_length": agent_dict.get("max_print_outputs_length"),
+        }
+        # Filter out None values
+        code_agent_kwargs = {k: v for k, v in code_agent_kwargs.items() if v is not None}
+        # Update with any additional kwargs
+        code_agent_kwargs.update(kwargs)
+        # Call the parent class's from_dict method
+        return super().from_dict(agent_dict, **code_agent_kwargs)

src/smolagents/cli.py ADDED Viewed

	@@ -0,0 +1,164 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+from dotenv import load_dotenv
+from smolagents import CodeAgent, InferenceClientModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel
+from smolagents.default_tools import TOOL_MAPPING
+leopard_prompt = "How many seconds would it take for a leopard at full speed to run through Pont des Arts?"
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Run a CodeAgent with all specified parameters")
+    parser.add_argument(
+        "prompt",
+        type=str,
+        nargs="?",  # Makes it optional
+        default=leopard_prompt,
+        help="The prompt to run with the agent",
+    )
+    parser.add_argument(
+        "--model-type",
+        type=str,
+        default="InferenceClientModel",
+        help="The model type to use (e.g., InferenceClientModel, OpenAIServerModel, LiteLLMModel, TransformersModel)",
+    )
+    parser.add_argument(
+        "--model-id",
+        type=str,
+        default="Qwen/Qwen2.5-Coder-32B-Instruct",
+        help="The model ID to use for the specified model type",
+    )
+    parser.add_argument(
+        "--imports",
+        nargs="*",  # accepts zero or more arguments
+        default=[],
+        help="Space-separated list of imports to authorize (e.g., 'numpy pandas')",
+    )
+    parser.add_argument(
+        "--tools",
+        nargs="*",
+        default=["web_search"],
+        help="Space-separated list of tools that the agent can use (e.g., 'tool1 tool2 tool3')",
+    )
+    parser.add_argument(
+        "--verbosity-level",
+        type=int,
+        default=1,
+        help="The verbosity level, as an int in [0, 1, 2].",
+    )
+    group = parser.add_argument_group("api options", "Options for API-based model types")
+    group.add_argument(
+        "--provider",
+        type=str,
+        default=None,
+        help="The inference provider to use for the model",
+    )
+    group.add_argument(
+        "--api-base",
+        type=str,
+        help="The base URL for the model",
+    )
+    group.add_argument(
+        "--api-key",
+        type=str,
+        help="The API key for the model",
+    )
+    return parser.parse_args()
+def load_model(
+    model_type: str,
+    model_id: str,
+    api_base: str | None = None,
+    api_key: str | None = None,
+    provider: str | None = None,
+) -> Model:
+    if model_type == "OpenAIServerModel":
+        return OpenAIServerModel(
+            api_key=api_key or os.getenv("FIREWORKS_API_KEY"),
+            api_base=api_base or "https://api.fireworks.ai/inference/v1",
+            model_id=model_id,
+        )
+    elif model_type == "LiteLLMModel":
+        return LiteLLMModel(
+            model_id=model_id,
+            api_key=api_key,
+            api_base=api_base,
+        )
+    elif model_type == "TransformersModel":
+        return TransformersModel(model_id=model_id, device_map="auto")
+    elif model_type == "InferenceClientModel":
+        return InferenceClientModel(
+            model_id=model_id,
+            token=api_key or os.getenv("HF_API_KEY"),
+            provider=provider,
+        )
+    else:
+        raise ValueError(f"Unsupported model type: {model_type}")
+def run_smolagent(
+    prompt: str,
+    tools: list[str],
+    model_type: str,
+    model_id: str,
+    api_base: str | None = None,
+    api_key: str | None = None,
+    imports: list[str] | None = None,
+    provider: str | None = None,
+) -> None:
+    load_dotenv()
+    model = load_model(model_type, model_id, api_base=api_base, api_key=api_key, provider=provider)
+    available_tools = []
+    for tool_name in tools:
+        if "/" in tool_name:
+            available_tools.append(Tool.from_space(tool_name))
+        else:
+            if tool_name in TOOL_MAPPING:
+                available_tools.append(TOOL_MAPPING[tool_name]())
+            else:
+                raise ValueError(f"Tool {tool_name} is not recognized either as a default tool or a Space.")
+    print(f"Running agent with these tools: {tools}")
+    agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=imports)
+    agent.run(prompt)
+def main() -> None:
+    args = parse_arguments()
+    run_smolagent(
+        args.prompt,
+        args.tools,
+        args.model_type,
+        args.model_id,
+        provider=args.provider,
+        api_base=args.api_base,
+        api_key=args.api_key,
+        imports=args.imports,
+    )
+if __name__ == "__main__":
+    main()

src/smolagents/default_tools.py ADDED Viewed

	@@ -0,0 +1,577 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Any
+from .local_python_executor import (
+    BASE_BUILTIN_MODULES,
+    BASE_PYTHON_TOOLS,
+    evaluate_python_code,
+)
+from .tools import PipelineTool, Tool
+@dataclass
+class PreTool:
+    name: str
+    inputs: dict[str, str]
+    output_type: type
+    task: str
+    description: str
+    repo_id: str
+class PythonInterpreterTool(Tool):
+    name = "python_interpreter"
+    description = "This is a tool that evaluates python code. It can be used to perform calculations."
+    inputs = {
+        "code": {
+            "type": "string",
+            "description": "The python code to run in interpreter",
+        }
+    }
+    output_type = "string"
+    def __init__(self, *args, authorized_imports=None, **kwargs):
+        if authorized_imports is None:
+            self.authorized_imports = list(set(BASE_BUILTIN_MODULES))
+        else:
+            self.authorized_imports = list(set(BASE_BUILTIN_MODULES) | set(authorized_imports))
+        self.inputs = {
+            "code": {
+                "type": "string",
+                "description": (
+                    "The code snippet to evaluate. All variables used in this snippet must be defined in this same snippet, "
+                    f"else you will get an error. This code can only import the following python libraries: {self.authorized_imports}."
+                ),
+            }
+        }
+        self.base_python_tools = BASE_PYTHON_TOOLS
+        self.python_evaluator = evaluate_python_code
+        super().__init__(*args, **kwargs)
+    def forward(self, code: str) -> str:
+        state = {}
+        output = str(
+            self.python_evaluator(
+                code,
+                state=state,
+                static_tools=self.base_python_tools,
+                authorized_imports=self.authorized_imports,
+            )[0]  # The second element is boolean is_final_answer
+        )
+        return f"Stdout:\n{str(state['_print_outputs'])}\nOutput: {output}"
+class FinalAnswerTool(Tool):
+    name = "final_answer"
+    description = "Provides a final answer to the given problem."
+    inputs = {"answer": {"type": "any", "description": "The final answer to the problem"}}
+    output_type = "any"
+    def forward(self, answer: Any) -> Any:
+        return answer
+class UserInputTool(Tool):
+    name = "user_input"
+    description = "Asks for user's input on a specific question"
+    inputs = {"question": {"type": "string", "description": "The question to ask the user"}}
+    output_type = "string"
+    def forward(self, question):
+        user_input = input(f"{question} => Type your answer here:")
+        return user_input
+class DuckDuckGoSearchTool(Tool):
+    name = "web_search"
+    description = """Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."""
+    inputs = {"query": {"type": "string", "description": "The search query to perform."}}
+    output_type = "string"
+    def __init__(self, max_results=10, **kwargs):
+        super().__init__()
+        self.max_results = max_results
+        try:
+            from duckduckgo_search import DDGS
+        except ImportError as e:
+            raise ImportError(
+                "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
+            ) from e
+        self.ddgs = DDGS(**kwargs)
+    def forward(self, query: str) -> str:
+        results = self.ddgs.text(query, max_results=self.max_results)
+        if len(results) == 0:
+            raise Exception("No results found! Try a less restrictive/shorter query.")
+        postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
+        return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
+class GoogleSearchTool(Tool):
+    name = "web_search"
+    description = """Performs a google web search for your query then returns a string of the top search results."""
+    inputs = {
+        "query": {"type": "string", "description": "The search query to perform."},
+        "filter_year": {
+            "type": "integer",
+            "description": "Optionally restrict results to a certain year",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def __init__(self, provider: str = "serpapi"):
+        super().__init__()
+        import os
+        self.provider = provider
+        if provider == "serpapi":
+            self.organic_key = "organic_results"
+            api_key_env_name = "SERPAPI_API_KEY"
+        else:
+            self.organic_key = "organic"
+            api_key_env_name = "SERPER_API_KEY"
+        self.api_key = os.getenv(api_key_env_name)
+        if self.api_key is None:
+            raise ValueError(f"Missing API key. Make sure you have '{api_key_env_name}' in your env variables.")
+    def forward(self, query: str, filter_year: int | None = None) -> str:
+        import requests
+        if self.provider == "serpapi":
+            params = {
+                "q": query,
+                "api_key": self.api_key,
+                "engine": "google",
+                "google_domain": "google.com",
+            }
+            base_url = "https://serpapi.com/search.json"
+        else:
+            params = {
+                "q": query,
+                "api_key": self.api_key,
+            }
+            base_url = "https://google.serper.dev/search"
+        if filter_year is not None:
+            params["tbs"] = f"cdr:1,cd_min:01/01/{filter_year},cd_max:12/31/{filter_year}"
+        response = requests.get(base_url, params=params)
+        if response.status_code == 200:
+            results = response.json()
+        else:
+            raise ValueError(response.json())
+        if self.organic_key not in results.keys():
+            if filter_year is not None:
+                raise Exception(
+                    f"No results found for query: '{query}' with filtering on year={filter_year}. Use a less restrictive query or do not filter on year."
+                )
+            else:
+                raise Exception(f"No results found for query: '{query}'. Use a less restrictive query.")
+        if len(results[self.organic_key]) == 0:
+            year_filter_message = f" with filter year={filter_year}" if filter_year is not None else ""
+            return f"No results found for '{query}'{year_filter_message}. Try with a more general query, or remove the year filter."
+        web_snippets = []
+        if self.organic_key in results:
+            for idx, page in enumerate(results[self.organic_key]):
+                date_published = ""
+                if "date" in page:
+                    date_published = "\nDate published: " + page["date"]
+                source = ""
+                if "source" in page:
+                    source = "\nSource: " + page["source"]
+                snippet = ""
+                if "snippet" in page:
+                    snippet = "\n" + page["snippet"]
+                redacted_version = f"{idx}. [{page['title']}]({page['link']}){date_published}{source}\n{snippet}"
+                web_snippets.append(redacted_version)
+        return "## Search Results\n" + "\n\n".join(web_snippets)
+class ApiWebSearchTool(Tool):
+    name = "web_search"
+    description = "Performs a web search for a query and returns a string of the top search results formatted as markdown with titles, URLs, and descriptions."
+    inputs = {"query": {"type": "string", "description": "The search query to perform."}}
+    output_type = "string"
+    def __init__(
+        self, endpoint: str = "", api_key: str = "", api_key_name: str = "", headers: dict = None, params: dict = None
+    ):
+        import os
+        super().__init__()
+        self.endpoint = endpoint or "https://api.search.brave.com/res/v1/web/search"
+        self.api_key = api_key or os.getenv(api_key_name)
+        self.headers = headers or {"X-Subscription-Token": self.api_key}
+        self.params = params or {"count": 10}
+    def forward(self, query: str) -> str:
+        import requests
+        params = {**self.params, "q": query}
+        response = requests.get(self.endpoint, headers=self.headers, params=params)
+        response.raise_for_status()
+        data = response.json()
+        results = self.extract_results(data)
+        return self.format_markdown(results)
+    def extract_results(self, data: dict) -> list:
+        results = []
+        for result in data.get("web", {}).get("results", []):
+            results.append(
+                {"title": result["title"], "url": result["url"], "description": result.get("description", "")}
+            )
+        return results
+    def format_markdown(self, results: list) -> str:
+        if not results:
+            return "No results found."
+        return "## Search Results\n\n" + "\n\n".join(
+            [
+                f"{idx}. [{result['title']}]({result['url']})\n{result['description']}"
+                for idx, result in enumerate(results, start=1)
+            ]
+        )
+class WebSearchTool(Tool):
+    name = "web_search"
+    description = "Performs a web search for a query and returns a string of the top search results formatted as markdown with titles, links, and descriptions."
+    inputs = {"query": {"type": "string", "description": "The search query to perform."}}
+    output_type = "string"
+    def __init__(self, max_results: int = 10, engine: str = "duckduckgo"):
+        super().__init__()
+        self.max_results = max_results
+        self.engine = engine
+    def forward(self, query: str) -> str:
+        results = self.search(query)
+        if len(results) == 0:
+            raise Exception("No results found! Try a less restrictive/shorter query.")
+        return self.parse_results(results)
+    def search(self, query: str) -> list:
+        if self.engine == "duckduckgo":
+            return self.search_duckduckgo(query)
+        elif self.engine == "bing":
+            return self.search_bing(query)
+        else:
+            raise ValueError(f"Unsupported engine: {self.engine}")
+    def parse_results(self, results: list) -> str:
+        return "## Search Results\n\n" + "\n\n".join(
+            [f"[{result['title']}]({result['link']})\n{result['description']}" for result in results]
+        )
+    def search_duckduckgo(self, query: str) -> list:
+        import requests
+        response = requests.get(
+            "https://lite.duckduckgo.com/lite/",
+            params={"q": query},
+            headers={"User-Agent": "Mozilla/5.0"},
+        )
+        response.raise_for_status()
+        parser = self._create_duckduckgo_parser()
+        parser.feed(response.text)
+        return parser.results
+    def _create_duckduckgo_parser(self):
+        from html.parser import HTMLParser
+        class SimpleResultParser(HTMLParser):
+            def __init__(self):
+                super().__init__()
+                self.results = []
+                self.current = {}
+                self.capture_title = False
+                self.capture_description = False
+                self.capture_link = False
+            def handle_starttag(self, tag, attrs):
+                attrs = dict(attrs)
+                if tag == "a" and attrs.get("class") == "result-link":
+                    self.capture_title = True
+                elif tag == "td" and attrs.get("class") == "result-snippet":
+                    self.capture_description = True
+                elif tag == "span" and attrs.get("class") == "link-text":
+                    self.capture_link = True
+            def handle_endtag(self, tag):
+                if tag == "a" and self.capture_title:
+                    self.capture_title = False
+                elif tag == "td" and self.capture_description:
+                    self.capture_description = False
+                elif tag == "span" and self.capture_link:
+                    self.capture_link = False
+                elif tag == "tr":
+                    # Store current result if all parts are present
+                    if {"title", "description", "link"} <= self.current.keys():
+                        self.current["description"] = " ".join(self.current["description"])
+                        self.results.append(self.current)
+                        self.current = {}
+            def handle_data(self, data):
+                if self.capture_title:
+                    self.current["title"] = data.strip()
+                elif self.capture_description:
+                    self.current.setdefault("description", [])
+                    self.current["description"].append(data.strip())
+                elif self.capture_link:
+                    self.current["link"] = "https://" + data.strip()
+        return SimpleResultParser()
+    def search_bing(self, query: str) -> list:
+        import xml.etree.ElementTree as ET
+        import requests
+        response = requests.get(
+            "https://www.bing.com/search",
+            params={"q": query, "format": "rss"},
+        )
+        response.raise_for_status()
+        root = ET.fromstring(response.text)
+        items = root.findall(".//item")
+        results = [
+            {
+                "title": item.findtext("title"),
+                "link": item.findtext("link"),
+                "description": item.findtext("description"),
+            }
+            for item in items[: self.max_results]
+        ]
+        return results
+class VisitWebpageTool(Tool):
+    name = "visit_webpage"
+    description = (
+        "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
+    )
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "The url of the webpage to visit.",
+        }
+    }
+    output_type = "string"
+    def __init__(self, max_output_length: int = 40000):
+        super().__init__()
+        self.max_output_length = max_output_length
+    def _truncate_content(self, content: str, max_length: int) -> str:
+        if len(content) <= max_length:
+            return content
+        return (
+            content[: max_length // 2]
+            + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
+            + content[-max_length // 2 :]
+        )
+    def forward(self, url: str) -> str:
+        try:
+            import re
+            import requests
+            from markdownify import markdownify
+            from requests.exceptions import RequestException
+        except ImportError as e:
+            raise ImportError(
+                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
+            ) from e
+        try:
+            # Send a GET request to the URL with a 20-second timeout
+            response = requests.get(url, timeout=20)
+            response.raise_for_status()  # Raise an exception for bad status codes
+            # Convert the HTML content to Markdown
+            markdown_content = markdownify(response.text).strip()
+            # Remove multiple line breaks
+            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+            return self._truncate_content(markdown_content, self.max_output_length)
+        except requests.exceptions.Timeout:
+            return "The request timed out. Please try again later or check the URL."
+        except RequestException as e:
+            return f"Error fetching the webpage: {str(e)}"
+        except Exception as e:
+            return f"An unexpected error occurred: {str(e)}"
+class WikipediaSearchTool(Tool):
+    """
+    WikipediaSearchTool searches Wikipedia and returns a summary or full text of the given topic, along with the page URL.
+    Attributes:
+        user_agent (str): A custom user-agent string to identify the project. This is required as per Wikipedia API policies, read more here: http://github.com/martin-majlis/Wikipedia-API/blob/master/README.rst
+        language (str): The language in which to retrieve Wikipedia articles.
+                http://meta.wikimedia.org/wiki/List_of_Wikipedias
+        content_type (str): Defines the content to fetch. Can be "summary" for a short summary or "text" for the full article.
+        extract_format (str): Defines the output format. Can be `"WIKI"` or `"HTML"`.
+    Example:
+        >>> from smolagents import CodeAgent, InferenceClientModel, WikipediaSearchTool
+        >>> agent = CodeAgent(
+        >>>     tools=[
+        >>>            WikipediaSearchTool(
+        >>>                user_agent="MyResearchBot ([email protected])",
+        >>>                language="en",
+        >>>                content_type="summary",  # or "text"
+        >>>                extract_format="WIKI",
+        >>>            )
+        >>>        ],
+        >>>     model=InferenceClientModel(),
+        >>> )
+        >>> agent.run("Python_(programming_language)")
+    """
+    name = "wikipedia_search"
+    description = "Searches Wikipedia and returns a summary or full text of the given topic, along with the page URL."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The topic to search on Wikipedia.",
+        }
+    }
+    output_type = "string"
+    def __init__(
+        self,
+        user_agent: str = "Smolagents ([email protected])",
+        language: str = "en",
+        content_type: str = "text",
+        extract_format: str = "WIKI",
+    ):
+        super().__init__()
+        try:
+            import wikipediaapi
+        except ImportError as e:
+            raise ImportError(
+                "You must install `wikipedia-api` to run this tool: for instance run `pip install wikipedia-api`"
+            ) from e
+        if not user_agent:
+            raise ValueError("User-agent is required. Provide a meaningful identifier for your project.")
+        self.user_agent = user_agent
+        self.language = language
+        self.content_type = content_type
+        # Map string format to wikipediaapi.ExtractFormat
+        extract_format_map = {
+            "WIKI": wikipediaapi.ExtractFormat.WIKI,
+            "HTML": wikipediaapi.ExtractFormat.HTML,
+        }
+        if extract_format not in extract_format_map:
+            raise ValueError("Invalid extract_format. Choose between 'WIKI' or 'HTML'.")
+        self.extract_format = extract_format_map[extract_format]
+        self.wiki = wikipediaapi.Wikipedia(
+            user_agent=self.user_agent, language=self.language, extract_format=self.extract_format
+        )
+    def forward(self, query: str) -> str:
+        try:
+            page = self.wiki.page(query)
+            if not page.exists():
+                return f"No Wikipedia page found for '{query}'. Try a different query."
+            title = page.title
+            url = page.fullurl
+            if self.content_type == "summary":
+                text = page.summary
+            elif self.content_type == "text":
+                text = page.text
+            else:
+                return "⚠️ Invalid `content_type`. Use either 'summary' or 'text'."
+            return f"✅ **Wikipedia Page:** {title}\n\n**Content:** {text}\n\n🔗 **Read more:** {url}"
+        except Exception as e:
+            return f"Error fetching Wikipedia summary: {str(e)}"
+class SpeechToTextTool(PipelineTool):
+    default_checkpoint = "openai/whisper-large-v3-turbo"
+    description = "This is a tool that transcribes an audio into text. It returns the transcribed text."
+    name = "transcriber"
+    inputs = {
+        "audio": {
+            "type": "audio",
+            "description": "The audio to transcribe. Can be a local path, an url, or a tensor.",
+        }
+    }
+    output_type = "string"
+    def __new__(cls, *args, **kwargs):
+        from transformers.models.whisper import WhisperForConditionalGeneration, WhisperProcessor
+        cls.pre_processor_class = WhisperProcessor
+        cls.model_class = WhisperForConditionalGeneration
+        return super().__new__(cls)
+    def encode(self, audio):
+        from .agent_types import AgentAudio
+        audio = AgentAudio(audio).to_raw()
+        return self.pre_processor(audio, return_tensors="pt")
+    def forward(self, inputs):
+        return self.model.generate(inputs["input_features"])
+    def decode(self, outputs):
+        return self.pre_processor.batch_decode(outputs, skip_special_tokens=True)[0]
+TOOL_MAPPING = {
+    tool_class.name: tool_class
+    for tool_class in [
+        PythonInterpreterTool,
+        DuckDuckGoSearchTool,
+        VisitWebpageTool,
+    ]
+}
+__all__ = [
+    "ApiWebSearchTool",
+    "PythonInterpreterTool",
+    "FinalAnswerTool",
+    "UserInputTool",
+    "WebSearchTool",
+    "DuckDuckGoSearchTool",
+    "GoogleSearchTool",
+    "VisitWebpageTool",
+    "WikipediaSearchTool",
+    "SpeechToTextTool",
+]

src/smolagents/gradio_ui.py ADDED Viewed

	@@ -0,0 +1,508 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+import shutil
+from pathlib import Path
+from typing import Generator
+from smolagents.agent_types import AgentAudio, AgentImage, AgentText
+from smolagents.agents import MultiStepAgent, PlanningStep
+from smolagents.memory import ActionStep, FinalAnswerStep
+from smolagents.models import ChatMessageStreamDelta, MessageRole, agglomerate_stream_deltas
+from smolagents.utils import _is_package_available
+def get_step_footnote_content(step_log: ActionStep | PlanningStep, step_name: str) -> str:
+    """Get a footnote string for a step log with duration and token information"""
+    step_footnote = f"**{step_name}**"
+    if step_log.token_usage is not None:
+        step_footnote += f" | Input tokens: {step_log.token_usage.input_tokens:,} | Output tokens: {step_log.token_usage.output_tokens:,}"
+    step_footnote += f" | Duration: {round(float(step_log.timing.duration), 2)}s" if step_log.timing.duration else ""
+    step_footnote_content = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
+    return step_footnote_content
+def _clean_model_output(model_output: str) -> str:
+    """
+    Clean up model output by removing trailing tags and extra backticks.
+    Args:
+        model_output (`str`): Raw model output.
+    Returns:
+        `str`: Cleaned model output.
+    """
+    if not model_output:
+        return ""
+    model_output = model_output.strip()
+    # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
+    model_output = re.sub(r"```\s*<end_code>", "```", model_output)  # handles ```<end_code>
+    model_output = re.sub(r"<end_code>\s*```", "```", model_output)  # handles <end_code>```
+    model_output = re.sub(r"```\s*\n\s*<end_code>", "```", model_output)  # handles ```\n<end_code>
+    return model_output.strip()
+def _format_code_content(content: str) -> str:
+    """
+    Format code content as Python code block if it's not already formatted.
+    Args:
+        content (`str`): Code content to format.
+    Returns:
+        `str`: Code content formatted as a Python code block.
+    """
+    content = content.strip()
+    # Remove existing code blocks and end_code tags
+    content = re.sub(r"```.*?\n", "", content)
+    content = re.sub(r"\s*<end_code>\s*", "", content)
+    content = content.strip()
+    # Add Python code block formatting if not already present
+    if not content.startswith("```python"):
+        content = f"```python\n{content}\n```"
+    return content
+def _process_action_step(step_log: ActionStep, skip_model_outputs: bool = False) -> Generator:
+    """
+    Process an [`ActionStep`] and yield appropriate Gradio ChatMessage objects.
+    Args:
+        step_log ([`ActionStep`]): ActionStep to process.
+        skip_model_outputs (`bool`): Whether to skip model outputs.
+    Yields:
+        `gradio.ChatMessage`: Gradio ChatMessages representing the action step.
+    """
+    import gradio as gr
+    # Output the step number
+    step_number = f"Step {step_log.step_number}"
+    if not skip_model_outputs:
+        yield gr.ChatMessage(role=MessageRole.ASSISTANT, content=f"**{step_number}**", metadata={"status": "done"})
+    # First yield the thought/reasoning from the LLM
+    if not skip_model_outputs and getattr(step_log, "model_output", ""):
+        model_output = _clean_model_output(step_log.model_output)
+        yield gr.ChatMessage(role=MessageRole.ASSISTANT, content=model_output, metadata={"status": "done"})
+    # For tool calls, create a parent message
+    if getattr(step_log, "tool_calls", []):
+        first_tool_call = step_log.tool_calls[0]
+        used_code = first_tool_call.name == "python_interpreter"
+        # Process arguments based on type
+        args = first_tool_call.arguments
+        if isinstance(args, dict):
+            content = str(args.get("answer", str(args)))
+        else:
+            content = str(args).strip()
+        # Format code content if needed
+        if used_code:
+            content = _format_code_content(content)
+        # Create the tool call message
+        parent_message_tool = gr.ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=content,
+            metadata={
+                "title": f"🛠️ Used tool {first_tool_call.name}",
+                "status": "done",
+            },
+        )
+        yield parent_message_tool
+    # Display execution logs if they exist
+    if getattr(step_log, "observations", "") and step_log.observations.strip():
+        log_content = step_log.observations.strip()
+        if log_content:
+            log_content = re.sub(r"^Execution logs:\s*", "", log_content)
+            yield gr.ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content=f"```bash\n{log_content}\n",
+                metadata={"title": "📝 Execution Logs", "status": "done"},
+            )
+    # Display any images in observations
+    if getattr(step_log, "observations_images", []):
+        for image in step_log.observations_images:
+            path_image = AgentImage(image).to_string()
+            yield gr.ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content={"path": path_image, "mime_type": f"image/{path_image.split('.')[-1]}"},
+                metadata={"title": "🖼️ Output Image", "status": "done"},
+            )
+    # Handle errors
+    if getattr(step_log, "error", None):
+        yield gr.ChatMessage(
+            role=MessageRole.ASSISTANT, content=str(step_log.error), metadata={"title": "💥 Error", "status": "done"}
+        )
+    # Add step footnote and separator
+    yield gr.ChatMessage(
+        role=MessageRole.ASSISTANT,
+        content=get_step_footnote_content(step_log, step_number),
+        metadata={"status": "done"},
+    )
+    yield gr.ChatMessage(role=MessageRole.ASSISTANT, content="-----", metadata={"status": "done"})
+def _process_planning_step(step_log: PlanningStep, skip_model_outputs: bool = False) -> Generator:
+    """
+    Process a [`PlanningStep`] and yield appropriate gradio.ChatMessage objects.
+    Args:
+        step_log ([`PlanningStep`]): PlanningStep to process.
+    Yields:
+        `gradio.ChatMessage`: Gradio ChatMessages representing the planning step.
+    """
+    import gradio as gr
+    if not skip_model_outputs:
+        yield gr.ChatMessage(role=MessageRole.ASSISTANT, content="**Planning step**", metadata={"status": "done"})
+        yield gr.ChatMessage(role=MessageRole.ASSISTANT, content=step_log.plan, metadata={"status": "done"})
+    yield gr.ChatMessage(
+        role=MessageRole.ASSISTANT,
+        content=get_step_footnote_content(step_log, "Planning step"),
+        metadata={"status": "done"},
+    )
+    yield gr.ChatMessage(role=MessageRole.ASSISTANT, content="-----", metadata={"status": "done"})
+def _process_final_answer_step(step_log: FinalAnswerStep) -> Generator:
+    """
+    Process a [`FinalAnswerStep`] and yield appropriate gradio.ChatMessage objects.
+    Args:
+        step_log ([`FinalAnswerStep`]): FinalAnswerStep to process.
+    Yields:
+        `gradio.ChatMessage`: Gradio ChatMessages representing the final answer.
+    """
+    import gradio as gr
+    final_answer = step_log.output
+    if isinstance(final_answer, AgentText):
+        yield gr.ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=f"**Final answer:**\n{final_answer.to_string()}\n",
+            metadata={"status": "done"},
+        )
+    elif isinstance(final_answer, AgentImage):
+        yield gr.ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content={"path": final_answer.to_string(), "mime_type": "image/png"},
+            metadata={"status": "done"},
+        )
+    elif isinstance(final_answer, AgentAudio):
+        yield gr.ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
+            metadata={"status": "done"},
+        )
+    else:
+        yield gr.ChatMessage(
+            role=MessageRole.ASSISTANT, content=f"**Final answer:** {str(final_answer)}", metadata={"status": "done"}
+        )
+def pull_messages_from_step(step_log: ActionStep | PlanningStep | FinalAnswerStep, skip_model_outputs: bool = False):
+    """Extract Gradio ChatMessage objects from agent steps with proper nesting.
+    Args:
+        step_log: The step log to display as gr.ChatMessage objects.
+        skip_model_outputs: If True, skip the model outputs when creating the gr.ChatMessage objects:
+            This is used for instance when streaming model outputs have already been displayed.
+    """
+    if not _is_package_available("gradio"):
+        raise ModuleNotFoundError(
+            "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
+        )
+    if isinstance(step_log, ActionStep):
+        yield from _process_action_step(step_log, skip_model_outputs)
+    elif isinstance(step_log, PlanningStep):
+        yield from _process_planning_step(step_log, skip_model_outputs)
+    elif isinstance(step_log, FinalAnswerStep):
+        yield from _process_final_answer_step(step_log)
+    else:
+        raise ValueError(f"Unsupported step type: {type(step_log)}")
+def stream_to_gradio(
+    agent,
+    task: str,
+    task_images: list | None = None,
+    reset_agent_memory: bool = False,
+    additional_args: dict | None = None,
+) -> Generator:
+    """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
+    if not _is_package_available("gradio"):
+        raise ModuleNotFoundError(
+            "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
+        )
+    accumulated_events: list[ChatMessageStreamDelta] = []
+    for event in agent.run(
+        task, images=task_images, stream=True, reset=reset_agent_memory, additional_args=additional_args
+    ):
+        if isinstance(event, ActionStep | PlanningStep | FinalAnswerStep):
+            for message in pull_messages_from_step(
+                event,
+                # If we're streaming model outputs, no need to display them twice
+                skip_model_outputs=getattr(agent, "stream_outputs", False),
+            ):
+                yield message
+            accumulated_events = []
+        elif isinstance(event, ChatMessageStreamDelta):
+            accumulated_events.append(event)
+            text = agglomerate_stream_deltas(accumulated_events).render_as_markdown()
+            yield text
+class GradioUI:
+    """
+    Gradio interface for interacting with a [`MultiStepAgent`].
+    This class provides a web interface to interact with the agent in real-time, allowing users to submit prompts, upload files, and receive responses in a chat-like format.
+    It  can reset the agent's memory at the start of each interaction if desired.
+    It supports file uploads, which are saved to a specified folder.
+    It uses the [`gradio.Chatbot`] component to display the conversation history.
+    This class requires the `gradio` extra to be installed: `smolagents[gradio]`.
+    Args:
+        agent ([`MultiStepAgent`]): The agent to interact with.
+        file_upload_folder (`str`, *optional*): The folder where uploaded files will be saved.
+            If not provided, file uploads are disabled.
+        reset_agent_memory (`bool`, *optional*, defaults to `False`): Whether to reset the agent's memory at the start of each interaction.
+            If `True`, the agent will not remember previous interactions.
+    Raises:
+        ModuleNotFoundError: If the `gradio` extra is not installed.
+    Example:
+        ```python
+        from smolagents import CodeAgent, GradioUI, InferenceClientModel
+        model = InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct")
+        agent = CodeAgent(tools=[], model=model)
+        gradio_ui = GradioUI(agent, file_upload_folder="uploads", reset_agent_memory=True)
+        gradio_ui.launch()
+        ```
+    """
+    def __init__(self, agent: MultiStepAgent, file_upload_folder: str | None = None, reset_agent_memory: bool = False):
+        if not _is_package_available("gradio"):
+            raise ModuleNotFoundError(
+                "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
+            )
+        self.agent = agent
+        self.file_upload_folder = Path(file_upload_folder) if file_upload_folder is not None else None
+        self.reset_agent_memory = reset_agent_memory
+        self.name = getattr(agent, "name") or "Agent interface"
+        self.description = getattr(agent, "description", None)
+        if self.file_upload_folder is not None:
+            if not self.file_upload_folder.exists():
+                self.file_upload_folder.mkdir(parents=True, exist_ok=True)
+    def interact_with_agent(self, prompt, messages, session_state):
+        import gradio as gr
+        # Get the agent type from the template agent
+        if "agent" not in session_state:
+            session_state["agent"] = self.agent
+        try:
+            messages.append(gr.ChatMessage(role="user", content=prompt, metadata={"status": "done"}))
+            yield messages
+            for msg in stream_to_gradio(
+                session_state["agent"], task=prompt, reset_agent_memory=self.reset_agent_memory
+            ):
+                if isinstance(msg, gr.ChatMessage):
+                    messages[-1].metadata["status"] = "done"
+                    messages.append(msg)
+                elif isinstance(msg, str):  # Then it's only a completion delta
+                    msg = msg.replace("<", r"\<").replace(">", r"\>")  # HTML tags seem to break Gradio Chatbot
+                    if messages[-1].metadata["status"] == "pending":
+                        messages[-1].content = msg
+                    else:
+                        messages.append(
+                            gr.ChatMessage(role=MessageRole.ASSISTANT, content=msg, metadata={"status": "pending"})
+                        )
+                yield messages
+            yield messages
+        except Exception as e:
+            yield messages
+            raise gr.Error(f"Error in interaction: {str(e)}")
+    def upload_file(self, file, file_uploads_log, allowed_file_types=None):
+        """
+        Upload a file and add it to the list of uploaded files in the session state.
+        The file is saved to the `self.file_upload_folder` folder.
+        If the file type is not allowed, it returns a message indicating the disallowed file type.
+        Args:
+            file (`gradio.File`): The uploaded file.
+            file_uploads_log (`list`): A list to log uploaded files.
+            allowed_file_types (`list`, *optional*): List of allowed file extensions. Defaults to [".pdf", ".docx", ".txt"].
+        """
+        import gradio as gr
+        if file is None:
+            return gr.Textbox(value="No file uploaded", visible=True), file_uploads_log
+        if allowed_file_types is None:
+            allowed_file_types = [".pdf", ".docx", ".txt"]
+        file_ext = os.path.splitext(file.name)[1].lower()
+        if file_ext not in allowed_file_types:
+            return gr.Textbox("File type disallowed", visible=True), file_uploads_log
+        # Sanitize file name
+        original_name = os.path.basename(file.name)
+        sanitized_name = re.sub(
+            r"[^\w\-.]", "_", original_name
+        )  # Replace any non-alphanumeric, non-dash, or non-dot characters with underscores
+        # Save the uploaded file to the specified folder
+        file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
+        shutil.copy(file.name, file_path)
+        return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
+    def log_user_message(self, text_input, file_uploads_log):
+        import gradio as gr
+        return (
+            text_input
+            + (
+                f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"
+                if len(file_uploads_log) > 0
+                else ""
+            ),
+            "",
+            gr.Button(interactive=False),
+        )
+    def launch(self, share: bool = True, **kwargs):
+        """
+        Launch the Gradio app with the agent interface.
+        Args:
+            share (`bool`, defaults to `True`): Whether to share the app publicly.
+            **kwargs: Additional keyword arguments to pass to the Gradio launch method.
+        """
+        self.create_app().launch(debug=True, share=share, **kwargs)
+    def create_app(self):
+        import gradio as gr
+        with gr.Blocks(theme="ocean", fill_height=True) as demo:
+            # Add session state to store session-specific data
+            session_state = gr.State({})
+            stored_messages = gr.State([])
+            file_uploads_log = gr.State([])
+            with gr.Sidebar():
+                gr.Markdown(
+                    f"# {self.name.replace('_', ' ').capitalize()}"
+                    "\n> This web ui allows you to interact with a `smolagents` agent that can use tools and execute steps to complete tasks."
+                    + (f"\n\n**Agent description:**\n{self.description}" if self.description else "")
+                )
+                with gr.Group():
+                    gr.Markdown("**Your request**", container=True)
+                    text_input = gr.Textbox(
+                        lines=3,
+                        label="Chat Message",
+                        container=False,
+                        placeholder="Enter your prompt here and press Shift+Enter or press the button",
+                    )
+                    submit_btn = gr.Button("Submit", variant="primary")
+                # If an upload folder is provided, enable the upload feature
+                if self.file_upload_folder is not None:
+                    upload_file = gr.File(label="Upload a file")
+                    upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
+                    upload_file.change(
+                        self.upload_file,
+                        [upload_file, file_uploads_log],
+                        [upload_status, file_uploads_log],
+                    )
+                gr.HTML(
+                    "<br><br><h4><center>Powered by <a target='_blank' href='https://github.com/huggingface/smolagents'><b>smolagents</b></a></center></h4>"
+                )
+            # Main chat interface
+            chatbot = gr.Chatbot(
+                label="Agent",
+                type="messages",
+                avatar_images=(
+                    None,
+                    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
+                ),
+                resizeable=True,
+                scale=1,
+                latex_delimiters=[
+                    {"left": r"$$", "right": r"$$", "display": True},
+                    {"left": r"$", "right": r"$", "display": False},
+                    {"left": r"\[", "right": r"\]", "display": True},
+                    {"left": r"\(", "right": r"\)", "display": False},
+                ],
+            )
+            # Set up event handlers
+            text_input.submit(
+                self.log_user_message,
+                [text_input, file_uploads_log],
+                [stored_messages, text_input, submit_btn],
+            ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then(
+                lambda: (
+                    gr.Textbox(
+                        interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button"
+                    ),
+                    gr.Button(interactive=True),
+                ),
+                None,
+                [text_input, submit_btn],
+            )
+            submit_btn.click(
+                self.log_user_message,
+                [text_input, file_uploads_log],
+                [stored_messages, text_input, submit_btn],
+            ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then(
+                lambda: (
+                    gr.Textbox(
+                        interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button"
+                    ),
+                    gr.Button(interactive=True),
+                ),
+                None,
+                [text_input, submit_btn],
+            )
+        return demo
+__all__ = ["stream_to_gradio", "GradioUI"]

src/smolagents/local_python_executor.py ADDED Viewed

	@@ -0,0 +1,1611 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ast
+import builtins
+import difflib
+import inspect
+import logging
+import math
+import re
+from collections.abc import Callable, Mapping
+from functools import wraps
+from importlib import import_module
+from types import BuiltinFunctionType, FunctionType, ModuleType
+from typing import Any
+from .tools import Tool
+from .utils import BASE_BUILTIN_MODULES, truncate_content
+logger = logging.getLogger(__name__)
+class InterpreterError(ValueError):
+    """
+    An error raised when the interpreter cannot evaluate a Python expression, due to syntax error or unsupported
+    operations.
+    """
+    pass
+ERRORS = {
+    name: getattr(builtins, name)
+    for name in dir(builtins)
+    if isinstance(getattr(builtins, name), type) and issubclass(getattr(builtins, name), BaseException)
+}
+DEFAULT_MAX_LEN_OUTPUT = 50000
+MAX_OPERATIONS = 10000000
+MAX_WHILE_ITERATIONS = 1000000
+def custom_print(*args):
+    return None
+def nodunder_getattr(obj, name, default=None):
+    if name.startswith("__") and name.endswith("__"):
+        raise InterpreterError(f"Forbidden access to dunder attribute: {name}")
+    return getattr(obj, name, default)
+BASE_PYTHON_TOOLS = {
+    "print": custom_print,
+    "isinstance": isinstance,
+    "range": range,
+    "float": float,
+    "int": int,
+    "bool": bool,
+    "str": str,
+    "set": set,
+    "list": list,
+    "dict": dict,
+    "tuple": tuple,
+    "round": round,
+    "ceil": math.ceil,
+    "floor": math.floor,
+    "log": math.log,
+    "exp": math.exp,
+    "sin": math.sin,
+    "cos": math.cos,
+    "tan": math.tan,
+    "asin": math.asin,
+    "acos": math.acos,
+    "atan": math.atan,
+    "atan2": math.atan2,
+    "degrees": math.degrees,
+    "radians": math.radians,
+    "pow": pow,
+    "sqrt": math.sqrt,
+    "len": len,
+    "sum": sum,
+    "max": max,
+    "min": min,
+    "abs": abs,
+    "enumerate": enumerate,
+    "zip": zip,
+    "reversed": reversed,
+    "sorted": sorted,
+    "all": all,
+    "any": any,
+    "map": map,
+    "filter": filter,
+    "ord": ord,
+    "chr": chr,
+    "next": next,
+    "iter": iter,
+    "divmod": divmod,
+    "callable": callable,
+    "getattr": nodunder_getattr,
+    "hasattr": hasattr,
+    "setattr": setattr,
+    "issubclass": issubclass,
+    "type": type,
+    "complex": complex,
+}
+# Non-exhaustive list of dangerous modules that should not be imported
+DANGEROUS_MODULES = [
+    "builtins",
+    "io",
+    "multiprocessing",
+    "os",
+    "pathlib",
+    "pty",
+    "shutil",
+    "socket",
+    "subprocess",
+    "sys",
+]
+DANGEROUS_FUNCTIONS = [
+    "builtins.compile",
+    "builtins.eval",
+    "builtins.exec",
+    "builtins.globals",
+    "builtins.locals",
+    "builtins.__import__",
+    "os.popen",
+    "os.system",
+    "posix.system",
+]
+def check_safer_result(result: Any, static_tools: dict[str, Callable] = None, authorized_imports: list[str] = None):
+    """
+    Checks if a result is safer according to authorized imports and static tools.
+    Args:
+        result (Any): The result to check.
+        static_tools (dict[str, Callable]): Dictionary of static tools.
+        authorized_imports (list[str]): List of authorized imports.
+    Raises:
+        InterpreterError: If the result is not safe
+    """
+    if isinstance(result, ModuleType):
+        if not check_import_authorized(result.__name__, authorized_imports):
+            raise InterpreterError(f"Forbidden access to module: {result.__name__}")
+    elif isinstance(result, dict) and result.get("__spec__"):
+        if not check_import_authorized(result["__name__"], authorized_imports):
+            raise InterpreterError(f"Forbidden access to module: {result['__name__']}")
+    elif isinstance(result, (FunctionType, BuiltinFunctionType)):
+        for qualified_function_name in DANGEROUS_FUNCTIONS:
+            module_name, function_name = qualified_function_name.rsplit(".", 1)
+            if (
+                (static_tools is None or function_name not in static_tools)
+                and result.__name__ == function_name
+                and result.__module__ == module_name
+            ):
+                raise InterpreterError(f"Forbidden access to function: {function_name}")
+def safer_eval(func: Callable):
+    """
+    Decorator to enhance the security of an evaluation function by checking its return value.
+    Args:
+        func (Callable): Evaluation function to be made safer.
+    Returns:
+        Callable: Safer evaluation function with return value check.
+    """
+    @wraps(func)
+    def _check_return(
+        expression,
+        state,
+        static_tools,
+        custom_tools,
+        authorized_imports=BASE_BUILTIN_MODULES,
+    ):
+        result = func(expression, state, static_tools, custom_tools, authorized_imports=authorized_imports)
+        check_safer_result(result, static_tools, authorized_imports)
+        return result
+    return _check_return
+def safer_func(
+    func: Callable,
+    static_tools: dict[str, Callable] = BASE_PYTHON_TOOLS,
+    authorized_imports: list[str] = BASE_BUILTIN_MODULES,
+):
+    """
+    Decorator to enhance the security of a function call by checking its return value.
+    Args:
+        func (Callable): Function to be made safer.
+        static_tools (dict[str, Callable]): Dictionary of static tools.
+        authorized_imports (list[str]): List of authorized imports.
+    Returns:
+        Callable: Safer function with return value check.
+    """
+    # If the function is a type, return it directly without wrapping
+    if isinstance(func, type):
+        return func
+    @wraps(func)
+    def _check_return(*args, **kwargs):
+        result = func(*args, **kwargs)
+        check_safer_result(result, static_tools, authorized_imports)
+        return result
+    return _check_return
+class PrintContainer:
+    def __init__(self):
+        self.value = ""
+    def append(self, text):
+        self.value += text
+        return self
+    def __iadd__(self, other):
+        """Implements the += operator"""
+        self.value += str(other)
+        return self
+    def __str__(self):
+        """String representation"""
+        return self.value
+    def __repr__(self):
+        """Representation for debugging"""
+        return f"PrintContainer({self.value})"
+    def __len__(self):
+        """Implements len() function support"""
+        return len(self.value)
+class BreakException(Exception):
+    pass
+class ContinueException(Exception):
+    pass
+class ReturnException(Exception):
+    def __init__(self, value):
+        self.value = value
+def get_iterable(obj):
+    if isinstance(obj, list):
+        return obj
+    elif hasattr(obj, "__iter__"):
+        return list(obj)
+    else:
+        raise InterpreterError("Object is not iterable")
+def fix_final_answer_code(code: str) -> str:
+    """
+    Sometimes an LLM can try to assign a variable to final_answer, which would break the final_answer() tool.
+    This function fixes this behaviour by replacing variable assignments to final_answer with final_answer_variable,
+    while preserving function calls to final_answer().
+    """
+    # First, find if there's a direct assignment to final_answer
+    # Use word boundary and negative lookbehind to ensure it's not an object attribute
+    assignment_pattern = r"(?<!\.)(?<!\w)\bfinal_answer\s*="
+    if "final_answer(" not in code or not re.search(assignment_pattern, code):
+        # If final_answer tool is not called in this blob, then doing the replacement is hazardous because it could false the model's memory for next steps.
+        # Let's not modify the code and leave the subsequent assignment error happen.
+        return code
+    # Pattern for replacing variable assignments
+    # Looks for 'final_answer' followed by '=' with optional whitespace
+    # Negative lookbehind ensures we don't match object attributes
+    assignment_regex = r"(?<!\.)(?<!\w)(\bfinal_answer)(\s*=)"
+    code = re.sub(assignment_regex, r"final_answer_variable\2", code)
+    # Pattern for replacing variable usage but not function calls
+    # Negative lookahead (?!\s*\() ensures we don't match function calls
+    # Negative lookbehind (?<!\.|\w) ensures we don't match object methods or other variables
+    variable_regex = r"(?<!\.)(?<!\w)(\bfinal_answer\b)(?!\s*\()"
+    code = re.sub(variable_regex, "final_answer_variable", code)
+    return code
+def build_import_tree(authorized_imports: list[str]) -> dict[str, Any]:
+    tree = {}
+    for import_path in authorized_imports:
+        parts = import_path.split(".")
+        current = tree
+        for part in parts:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+    return tree
+def check_import_authorized(import_to_check: str, authorized_imports: list[str]) -> bool:
+    current_node = build_import_tree(authorized_imports)
+    for part in import_to_check.split("."):
+        if "*" in current_node:
+            return True
+        if part not in current_node:
+            return False
+        current_node = current_node[part]
+    return True
+def evaluate_attribute(
+    expression: ast.Attribute,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    if expression.attr.startswith("__") and expression.attr.endswith("__"):
+        raise InterpreterError(f"Forbidden access to dunder attribute: {expression.attr}")
+    value = evaluate_ast(expression.value, state, static_tools, custom_tools, authorized_imports)
+    return getattr(value, expression.attr)
+def evaluate_unaryop(
+    expression: ast.UnaryOp,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    operand = evaluate_ast(expression.operand, state, static_tools, custom_tools, authorized_imports)
+    if isinstance(expression.op, ast.USub):
+        return -operand
+    elif isinstance(expression.op, ast.UAdd):
+        return operand
+    elif isinstance(expression.op, ast.Not):
+        return not operand
+    elif isinstance(expression.op, ast.Invert):
+        return ~operand
+    else:
+        raise InterpreterError(f"Unary operation {expression.op.__class__.__name__} is not supported.")
+def evaluate_lambda(
+    lambda_expression: ast.Lambda,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Callable:
+    args = [arg.arg for arg in lambda_expression.args.args]
+    def lambda_func(*values: Any) -> Any:
+        new_state = state.copy()
+        for arg, value in zip(args, values):
+            new_state[arg] = value
+        return evaluate_ast(
+            lambda_expression.body,
+            new_state,
+            static_tools,
+            custom_tools,
+            authorized_imports,
+        )
+    return lambda_func
+def evaluate_while(
+    while_loop: ast.While,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> None:
+    iterations = 0
+    while evaluate_ast(while_loop.test, state, static_tools, custom_tools, authorized_imports):
+        for node in while_loop.body:
+            try:
+                evaluate_ast(node, state, static_tools, custom_tools, authorized_imports)
+            except BreakException:
+                return None
+            except ContinueException:
+                break
+        iterations += 1
+        if iterations > MAX_WHILE_ITERATIONS:
+            raise InterpreterError(f"Maximum number of {MAX_WHILE_ITERATIONS} iterations in While loop exceeded")
+    return None
+def create_function(
+    func_def: ast.FunctionDef,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Callable:
+    source_code = ast.unparse(func_def)
+    def new_func(*args: Any, **kwargs: Any) -> Any:
+        func_state = state.copy()
+        arg_names = [arg.arg for arg in func_def.args.args]
+        default_values = [
+            evaluate_ast(d, state, static_tools, custom_tools, authorized_imports) for d in func_def.args.defaults
+        ]
+        # Apply default values
+        defaults = dict(zip(arg_names[-len(default_values) :], default_values))
+        # Set positional arguments
+        for name, value in zip(arg_names, args):
+            func_state[name] = value
+        # Set keyword arguments
+        for name, value in kwargs.items():
+            func_state[name] = value
+        # Handle variable arguments
+        if func_def.args.vararg:
+            vararg_name = func_def.args.vararg.arg
+            func_state[vararg_name] = args
+        if func_def.args.kwarg:
+            kwarg_name = func_def.args.kwarg.arg
+            func_state[kwarg_name] = kwargs
+        # Set default values for arguments that were not provided
+        for name, value in defaults.items():
+            if name not in func_state:
+                func_state[name] = value
+        # Update function state with self and __class__
+        if func_def.args.args and func_def.args.args[0].arg == "self":
+            if args:
+                func_state["self"] = args[0]
+                func_state["__class__"] = args[0].__class__
+        result = None
+        try:
+            for stmt in func_def.body:
+                result = evaluate_ast(stmt, func_state, static_tools, custom_tools, authorized_imports)
+        except ReturnException as e:
+            result = e.value
+        if func_def.name == "__init__":
+            return None
+        return result
+    # Store original AST, source code, and name
+    new_func.__ast__ = func_def
+    new_func.__source__ = source_code
+    new_func.__name__ = func_def.name
+    return new_func
+def evaluate_function_def(
+    func_def: ast.FunctionDef,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Callable:
+    custom_tools[func_def.name] = create_function(func_def, state, static_tools, custom_tools, authorized_imports)
+    return custom_tools[func_def.name]
+def evaluate_class_def(
+    class_def: ast.ClassDef,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> type:
+    class_name = class_def.name
+    bases = [evaluate_ast(base, state, static_tools, custom_tools, authorized_imports) for base in class_def.bases]
+    class_dict = {}
+    for stmt in class_def.body:
+        if isinstance(stmt, ast.FunctionDef):
+            class_dict[stmt.name] = evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports)
+        elif isinstance(stmt, ast.AnnAssign):
+            if stmt.value:
+                value = evaluate_ast(stmt.value, state, static_tools, custom_tools, authorized_imports)
+            target = stmt.target
+            # Handle target types for annotation
+            if isinstance(target, ast.Name):
+                # Simple variable annotation like "x: int"
+                annotation = evaluate_ast(stmt.annotation, state, static_tools, custom_tools, authorized_imports)
+                class_dict.setdefault("__annotations__", {})[target.id] = annotation
+                # Assign value if provided
+                if stmt.value:
+                    class_dict[target.id] = value
+            elif isinstance(target, ast.Attribute):
+                # Attribute annotation like "obj.attr: int"
+                obj = evaluate_ast(target.value, class_dict, static_tools, custom_tools, authorized_imports)
+                # If there's a value assignment, set the attribute
+                if stmt.value:
+                    setattr(obj, target.attr, value)
+            elif isinstance(target, ast.Subscript):
+                # Subscript annotation like "dict[key]: int"
+                container = evaluate_ast(target.value, class_dict, static_tools, custom_tools, authorized_imports)
+                index = evaluate_ast(target.slice, state, static_tools, custom_tools, authorized_imports)
+                # If there's a value assignment, set the item
+                if stmt.value:
+                    container[index] = value
+            else:
+                raise InterpreterError(f"Unsupported AnnAssign target in class body: {type(target).__name__}")
+        elif isinstance(stmt, ast.Assign):
+            value = evaluate_ast(stmt.value, state, static_tools, custom_tools, authorized_imports)
+            for target in stmt.targets:
+                if isinstance(target, ast.Name):
+                    class_dict[target.id] = value
+                elif isinstance(target, ast.Attribute):
+                    obj = evaluate_ast(target.value, class_dict, static_tools, custom_tools, authorized_imports)
+                    setattr(obj, target.attr, value)
+        elif isinstance(stmt, ast.Pass):
+            pass
+        elif (
+            isinstance(stmt, ast.Expr)
+            and stmt == class_def.body[0]
+            and isinstance(stmt.value, ast.Constant)
+            and isinstance(stmt.value.value, str)
+        ):
+            # Check if it is a docstring: first statement in class body which is a string literal expression
+            class_dict["__doc__"] = stmt.value.value
+        else:
+            raise InterpreterError(f"Unsupported statement in class body: {stmt.__class__.__name__}")
+    new_class = type(class_name, tuple(bases), class_dict)
+    state[class_name] = new_class
+    return new_class
+def evaluate_annassign(
+    annassign: ast.AnnAssign,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    # If there's a value to assign, evaluate it
+    if annassign.value:
+        value = evaluate_ast(annassign.value, state, static_tools, custom_tools, authorized_imports)
+        # Set the value for the target
+        set_value(annassign.target, value, state, static_tools, custom_tools, authorized_imports)
+        return value
+    # For declarations without values (x: int), just return None
+    return None
+def evaluate_augassign(
+    expression: ast.AugAssign,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    def get_current_value(target: ast.AST) -> Any:
+        if isinstance(target, ast.Name):
+            return state.get(target.id, 0)
+        elif isinstance(target, ast.Subscript):
+            obj = evaluate_ast(target.value, state, static_tools, custom_tools, authorized_imports)
+            key = evaluate_ast(target.slice, state, static_tools, custom_tools, authorized_imports)
+            return obj[key]
+        elif isinstance(target, ast.Attribute):
+            obj = evaluate_ast(target.value, state, static_tools, custom_tools, authorized_imports)
+            return getattr(obj, target.attr)
+        elif isinstance(target, ast.Tuple):
+            return tuple(get_current_value(elt) for elt in target.elts)
+        elif isinstance(target, ast.List):
+            return [get_current_value(elt) for elt in target.elts]
+        else:
+            raise InterpreterError("AugAssign not supported for {type(target)} targets.")
+    current_value = get_current_value(expression.target)
+    value_to_add = evaluate_ast(expression.value, state, static_tools, custom_tools, authorized_imports)
+    if isinstance(expression.op, ast.Add):
+        if isinstance(current_value, list):
+            if not isinstance(value_to_add, list):
+                raise InterpreterError(f"Cannot add non-list value {value_to_add} to a list.")
+            current_value += value_to_add
+        else:
+            current_value += value_to_add
+    elif isinstance(expression.op, ast.Sub):
+        current_value -= value_to_add
+    elif isinstance(expression.op, ast.Mult):
+        current_value *= value_to_add
+    elif isinstance(expression.op, ast.Div):
+        current_value /= value_to_add
+    elif isinstance(expression.op, ast.Mod):
+        current_value %= value_to_add
+    elif isinstance(expression.op, ast.Pow):
+        current_value **= value_to_add
+    elif isinstance(expression.op, ast.FloorDiv):
+        current_value //= value_to_add
+    elif isinstance(expression.op, ast.BitAnd):
+        current_value &= value_to_add
+    elif isinstance(expression.op, ast.BitOr):
+        current_value |= value_to_add
+    elif isinstance(expression.op, ast.BitXor):
+        current_value ^= value_to_add
+    elif isinstance(expression.op, ast.LShift):
+        current_value <<= value_to_add
+    elif isinstance(expression.op, ast.RShift):
+        current_value >>= value_to_add
+    else:
+        raise InterpreterError(f"Operation {type(expression.op).__name__} is not supported.")
+    # Update the state: current_value has been updated in-place
+    set_value(
+        expression.target,
+        current_value,
+        state,
+        static_tools,
+        custom_tools,
+        authorized_imports,
+    )
+    return current_value
+def evaluate_boolop(
+    node: ast.BoolOp,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    # Determine which value should trigger short-circuit based on operation type:
+    # - 'and' returns the first falsy value encountered (or the last value if all are truthy)
+    # - 'or' returns the first truthy value encountered (or the last value if all are falsy)
+    is_short_circuit_value = (lambda x: not x) if isinstance(node.op, ast.And) else (lambda x: bool(x))
+    for value in node.values:
+        result = evaluate_ast(value, state, static_tools, custom_tools, authorized_imports)
+        # Short-circuit: return immediately if the condition is met
+        if is_short_circuit_value(result):
+            return result
+    # If no short-circuit occurred, return the last evaluated value
+    return result
+def evaluate_binop(
+    binop: ast.BinOp,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    # Recursively evaluate the left and right operands
+    left_val = evaluate_ast(binop.left, state, static_tools, custom_tools, authorized_imports)
+    right_val = evaluate_ast(binop.right, state, static_tools, custom_tools, authorized_imports)
+    # Determine the operation based on the type of the operator in the BinOp
+    if isinstance(binop.op, ast.Add):
+        return left_val + right_val
+    elif isinstance(binop.op, ast.Sub):
+        return left_val - right_val
+    elif isinstance(binop.op, ast.Mult):
+        return left_val * right_val
+    elif isinstance(binop.op, ast.Div):
+        return left_val / right_val
+    elif isinstance(binop.op, ast.Mod):
+        return left_val % right_val
+    elif isinstance(binop.op, ast.Pow):
+        return left_val**right_val
+    elif isinstance(binop.op, ast.FloorDiv):
+        return left_val // right_val
+    elif isinstance(binop.op, ast.BitAnd):
+        return left_val & right_val
+    elif isinstance(binop.op, ast.BitOr):
+        return left_val | right_val
+    elif isinstance(binop.op, ast.BitXor):
+        return left_val ^ right_val
+    elif isinstance(binop.op, ast.LShift):
+        return left_val << right_val
+    elif isinstance(binop.op, ast.RShift):
+        return left_val >> right_val
+    else:
+        raise NotImplementedError(f"Binary operation {type(binop.op).__name__} is not implemented.")
+def evaluate_assign(
+    assign: ast.Assign,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    result = evaluate_ast(assign.value, state, static_tools, custom_tools, authorized_imports)
+    if len(assign.targets) == 1:
+        target = assign.targets[0]
+        set_value(target, result, state, static_tools, custom_tools, authorized_imports)
+    else:
+        expanded_values = []
+        for tgt in assign.targets:
+            if isinstance(tgt, ast.Starred):
+                expanded_values.extend(result)
+            else:
+                expanded_values.append(result)
+        for tgt, val in zip(assign.targets, expanded_values):
+            set_value(tgt, val, state, static_tools, custom_tools, authorized_imports)
+    return result
+def set_value(
+    target: ast.AST,
+    value: Any,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> None:
+    if isinstance(target, ast.Name):
+        if target.id in static_tools:
+            raise InterpreterError(f"Cannot assign to name '{target.id}': doing this would erase the existing tool!")
+        state[target.id] = value
+    elif isinstance(target, ast.Tuple):
+        if not isinstance(value, tuple):
+            if hasattr(value, "__iter__") and not isinstance(value, (str, bytes)):
+                value = tuple(value)
+            else:
+                raise InterpreterError("Cannot unpack non-tuple value")
+        if len(target.elts) != len(value):
+            raise InterpreterError("Cannot unpack tuple of wrong size")
+        for i, elem in enumerate(target.elts):
+            set_value(elem, value[i], state, static_tools, custom_tools, authorized_imports)
+    elif isinstance(target, ast.Subscript):
+        obj = evaluate_ast(target.value, state, static_tools, custom_tools, authorized_imports)
+        key = evaluate_ast(target.slice, state, static_tools, custom_tools, authorized_imports)
+        obj[key] = value
+    elif isinstance(target, ast.Attribute):
+        obj = evaluate_ast(target.value, state, static_tools, custom_tools, authorized_imports)
+        setattr(obj, target.attr, value)
+def evaluate_call(
+    call: ast.Call,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    if not isinstance(call.func, (ast.Call, ast.Lambda, ast.Attribute, ast.Name, ast.Subscript)):
+        raise InterpreterError(f"This is not a correct function: {call.func}).")
+    func, func_name = None, None
+    if isinstance(call.func, ast.Call):
+        func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports)
+    elif isinstance(call.func, ast.Lambda):
+        func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports)
+    elif isinstance(call.func, ast.Attribute):
+        obj = evaluate_ast(call.func.value, state, static_tools, custom_tools, authorized_imports)
+        func_name = call.func.attr
+        if not hasattr(obj, func_name):
+            raise InterpreterError(f"Object {obj} has no attribute {func_name}")
+        func = getattr(obj, func_name)
+    elif isinstance(call.func, ast.Name):
+        func_name = call.func.id
+        if func_name in state:
+            func = state[func_name]
+        elif func_name in static_tools:
+            func = static_tools[func_name]
+        elif func_name in custom_tools:
+            func = custom_tools[func_name]
+        elif func_name in ERRORS:
+            func = ERRORS[func_name]
+        else:
+            raise InterpreterError(
+                f"Forbidden function evaluation: '{call.func.id}' is not among the explicitly allowed tools or defined/imported in the preceding code"
+            )
+    elif isinstance(call.func, ast.Subscript):
+        func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports)
+        if not callable(func):
+            raise InterpreterError(f"This is not a correct function: {call.func}).")
+        func_name = None
+    args = []
+    for arg in call.args:
+        if isinstance(arg, ast.Starred):
+            args.extend(evaluate_ast(arg.value, state, static_tools, custom_tools, authorized_imports))
+        else:
+            args.append(evaluate_ast(arg, state, static_tools, custom_tools, authorized_imports))
+    kwargs = {
+        keyword.arg: evaluate_ast(keyword.value, state, static_tools, custom_tools, authorized_imports)
+        for keyword in call.keywords
+    }
+    if func_name == "super":
+        if not args:
+            if "__class__" in state and "self" in state:
+                return super(state["__class__"], state["self"])
+            else:
+                raise InterpreterError("super() needs at least one argument")
+        cls = args[0]
+        if not isinstance(cls, type):
+            raise InterpreterError("super() argument 1 must be type")
+        if len(args) == 1:
+            return super(cls)
+        elif len(args) == 2:
+            instance = args[1]
+            return super(cls, instance)
+        else:
+            raise InterpreterError("super() takes at most 2 arguments")
+    elif func_name == "print":
+        state["_print_outputs"] += " ".join(map(str, args)) + "\n"
+        return None
+    else:  # Assume it's a callable object
+        if (inspect.getmodule(func) == builtins) and inspect.isbuiltin(func) and (func not in static_tools.values()):
+            raise InterpreterError(
+                f"Invoking a builtin function that has not been explicitly added as a tool is not allowed ({func_name})."
+            )
+        return func(*args, **kwargs)
+def evaluate_subscript(
+    subscript: ast.Subscript,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    index = evaluate_ast(subscript.slice, state, static_tools, custom_tools, authorized_imports)
+    value = evaluate_ast(subscript.value, state, static_tools, custom_tools, authorized_imports)
+    try:
+        return value[index]
+    except (KeyError, IndexError, TypeError) as e:
+        error_message = f"Could not index {value} with '{index}': {type(e).__name__}: {e}"
+        if isinstance(index, str) and isinstance(value, Mapping):
+            close_matches = difflib.get_close_matches(index, list(value.keys()))
+            if len(close_matches) > 0:
+                error_message += f". Maybe you meant one of these indexes instead: {str(close_matches)}"
+        raise InterpreterError(error_message) from e
+def evaluate_name(
+    name: ast.Name,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    if name.id in state:
+        return state[name.id]
+    elif name.id in static_tools:
+        return safer_func(static_tools[name.id], static_tools=static_tools, authorized_imports=authorized_imports)
+    elif name.id in custom_tools:
+        return custom_tools[name.id]
+    elif name.id in ERRORS:
+        return ERRORS[name.id]
+    close_matches = difflib.get_close_matches(name.id, list(state.keys()))
+    if len(close_matches) > 0:
+        return state[close_matches[0]]
+    raise InterpreterError(f"The variable `{name.id}` is not defined.")
+def evaluate_condition(
+    condition: ast.Compare,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> bool | object:
+    result = True
+    left = evaluate_ast(condition.left, state, static_tools, custom_tools, authorized_imports)
+    for i, (op, comparator) in enumerate(zip(condition.ops, condition.comparators)):
+        op = type(op)
+        right = evaluate_ast(comparator, state, static_tools, custom_tools, authorized_imports)
+        if op == ast.Eq:
+            current_result = left == right
+        elif op == ast.NotEq:
+            current_result = left != right
+        elif op == ast.Lt:
+            current_result = left < right
+        elif op == ast.LtE:
+            current_result = left <= right
+        elif op == ast.Gt:
+            current_result = left > right
+        elif op == ast.GtE:
+            current_result = left >= right
+        elif op == ast.Is:
+            current_result = left is right
+        elif op == ast.IsNot:
+            current_result = left is not right
+        elif op == ast.In:
+            current_result = left in right
+        elif op == ast.NotIn:
+            current_result = left not in right
+        else:
+            raise InterpreterError(f"Unsupported comparison operator: {op}")
+        if current_result is False:
+            return False
+        result = current_result if i == 0 else (result and current_result)
+        left = right
+    return result
+def evaluate_if(
+    if_statement: ast.If,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    result = None
+    test_result = evaluate_ast(if_statement.test, state, static_tools, custom_tools, authorized_imports)
+    if test_result:
+        for line in if_statement.body:
+            line_result = evaluate_ast(line, state, static_tools, custom_tools, authorized_imports)
+            if line_result is not None:
+                result = line_result
+    else:
+        for line in if_statement.orelse:
+            line_result = evaluate_ast(line, state, static_tools, custom_tools, authorized_imports)
+            if line_result is not None:
+                result = line_result
+    return result
+def evaluate_for(
+    for_loop: ast.For,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> Any:
+    result = None
+    iterator = evaluate_ast(for_loop.iter, state, static_tools, custom_tools, authorized_imports)
+    for counter in iterator:
+        set_value(
+            for_loop.target,
+            counter,
+            state,
+            static_tools,
+            custom_tools,
+            authorized_imports,
+        )
+        for node in for_loop.body:
+            try:
+                line_result = evaluate_ast(node, state, static_tools, custom_tools, authorized_imports)
+                if line_result is not None:
+                    result = line_result
+            except BreakException:
+                break
+            except ContinueException:
+                continue
+        else:
+            continue
+        break
+    return result
+def evaluate_listcomp(
+    listcomp: ast.ListComp,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> list[Any]:
+    def inner_evaluate(generators: list[ast.comprehension], index: int, current_state: dict[str, Any]) -> list[Any]:
+        if index >= len(generators):
+            return [
+                evaluate_ast(
+                    listcomp.elt,
+                    current_state,
+                    static_tools,
+                    custom_tools,
+                    authorized_imports,
+                )
+            ]
+        generator = generators[index]
+        iter_value = evaluate_ast(
+            generator.iter,
+            current_state,
+            static_tools,
+            custom_tools,
+            authorized_imports,
+        )
+        result = []
+        for value in iter_value:
+            new_state = current_state.copy()
+            if isinstance(generator.target, ast.Tuple):
+                for idx, elem in enumerate(generator.target.elts):
+                    new_state[elem.id] = value[idx]
+            else:
+                new_state[generator.target.id] = value
+            if all(
+                evaluate_ast(if_clause, new_state, static_tools, custom_tools, authorized_imports)
+                for if_clause in generator.ifs
+            ):
+                result.extend(inner_evaluate(generators, index + 1, new_state))
+        return result
+    return inner_evaluate(listcomp.generators, 0, state)
+def evaluate_setcomp(
+    setcomp: ast.SetComp,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> set[Any]:
+    result = set()
+    for gen in setcomp.generators:
+        iter_value = evaluate_ast(gen.iter, state, static_tools, custom_tools, authorized_imports)
+        for value in iter_value:
+            new_state = state.copy()
+            set_value(
+                gen.target,
+                value,
+                new_state,
+                static_tools,
+                custom_tools,
+                authorized_imports,
+            )
+            if all(
+                evaluate_ast(if_clause, new_state, static_tools, custom_tools, authorized_imports)
+                for if_clause in gen.ifs
+            ):
+                element = evaluate_ast(
+                    setcomp.elt,
+                    new_state,
+                    static_tools,
+                    custom_tools,
+                    authorized_imports,
+                )
+                result.add(element)
+    return result
+def evaluate_try(
+    try_node: ast.Try,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> None:
+    try:
+        for stmt in try_node.body:
+            evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports)
+    except Exception as e:
+        matched = False
+        for handler in try_node.handlers:
+            if handler.type is None or isinstance(
+                e,
+                evaluate_ast(handler.type, state, static_tools, custom_tools, authorized_imports),
+            ):
+                matched = True
+                if handler.name:
+                    state[handler.name] = e
+                for stmt in handler.body:
+                    evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports)
+                break
+        if not matched:
+            raise e
+    else:
+        if try_node.orelse:
+            for stmt in try_node.orelse:
+                evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports)
+    finally:
+        if try_node.finalbody:
+            for stmt in try_node.finalbody:
+                evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports)
+def evaluate_raise(
+    raise_node: ast.Raise,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> None:
+    if raise_node.exc is not None:
+        exc = evaluate_ast(raise_node.exc, state, static_tools, custom_tools, authorized_imports)
+    else:
+        exc = None
+    if raise_node.cause is not None:
+        cause = evaluate_ast(raise_node.cause, state, static_tools, custom_tools, authorized_imports)
+    else:
+        cause = None
+    if exc is not None:
+        if cause is not None:
+            raise exc from cause
+        else:
+            raise exc
+    else:
+        raise InterpreterError("Re-raise is not supported without an active exception")
+def evaluate_assert(
+    assert_node: ast.Assert,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> None:
+    test_result = evaluate_ast(assert_node.test, state, static_tools, custom_tools, authorized_imports)
+    if not test_result:
+        if assert_node.msg:
+            msg = evaluate_ast(assert_node.msg, state, static_tools, custom_tools, authorized_imports)
+            raise AssertionError(msg)
+        else:
+            # Include the failing condition in the assertion message
+            test_code = ast.unparse(assert_node.test)
+            raise AssertionError(f"Assertion failed: {test_code}")
+def evaluate_with(
+    with_node: ast.With,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> None:
+    contexts = []
+    for item in with_node.items:
+        context_expr = evaluate_ast(item.context_expr, state, static_tools, custom_tools, authorized_imports)
+        if item.optional_vars:
+            state[item.optional_vars.id] = context_expr.__enter__()
+            contexts.append(state[item.optional_vars.id])
+        else:
+            context_var = context_expr.__enter__()
+            contexts.append(context_var)
+    try:
+        for stmt in with_node.body:
+            evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports)
+    except Exception as e:
+        for context in reversed(contexts):
+            context.__exit__(type(e), e, e.__traceback__)
+        raise
+    else:
+        for context in reversed(contexts):
+            context.__exit__(None, None, None)
+def get_safe_module(raw_module, authorized_imports, visited=None):
+    """Creates a safe copy of a module or returns the original if it's a function"""
+    # If it's a function or non-module object, return it directly
+    if not isinstance(raw_module, ModuleType):
+        return raw_module
+    # Handle circular references: Initialize visited set for the first call
+    if visited is None:
+        visited = set()
+    module_id = id(raw_module)
+    if module_id in visited:
+        return raw_module  # Return original for circular refs
+    visited.add(module_id)
+    # Create new module for actual modules
+    safe_module = ModuleType(raw_module.__name__)
+    # Copy all attributes by reference, recursively checking modules
+    for attr_name in dir(raw_module):
+        try:
+            attr_value = getattr(raw_module, attr_name)
+        except (ImportError, AttributeError) as e:
+            # lazy / dynamic loading module -> INFO log and skip
+            logger.info(
+                f"Skipping import error while copying {raw_module.__name__}.{attr_name}: {type(e).__name__} - {e}"
+            )
+            continue
+        # Recursively process nested modules, passing visited set
+        if isinstance(attr_value, ModuleType):
+            attr_value = get_safe_module(attr_value, authorized_imports, visited=visited)
+        setattr(safe_module, attr_name, attr_value)
+    return safe_module
+def evaluate_import(expression, state, authorized_imports):
+    if isinstance(expression, ast.Import):
+        for alias in expression.names:
+            if check_import_authorized(alias.name, authorized_imports):
+                raw_module = import_module(alias.name)
+                state[alias.asname or alias.name] = get_safe_module(raw_module, authorized_imports)
+            else:
+                raise InterpreterError(
+                    f"Import of {alias.name} is not allowed. Authorized imports are: {str(authorized_imports)}"
+                )
+        return None
+    elif isinstance(expression, ast.ImportFrom):
+        if check_import_authorized(expression.module, authorized_imports):
+            raw_module = __import__(expression.module, fromlist=[alias.name for alias in expression.names])
+            module = get_safe_module(raw_module, authorized_imports)
+            if expression.names[0].name == "*":  # Handle "from module import *"
+                if hasattr(module, "__all__"):  # If module has __all__, import only those names
+                    for name in module.__all__:
+                        state[name] = getattr(module, name)
+                else:  # If no __all__, import all public names (those not starting with '_')
+                    for name in dir(module):
+                        if not name.startswith("_"):
+                            state[name] = getattr(module, name)
+            else:  # regular from imports
+                for alias in expression.names:
+                    if hasattr(module, alias.name):
+                        state[alias.asname or alias.name] = getattr(module, alias.name)
+                    else:
+                        raise InterpreterError(f"Module {expression.module} has no attribute {alias.name}")
+        else:
+            raise InterpreterError(
+                f"Import from {expression.module} is not allowed. Authorized imports are: {str(authorized_imports)}"
+            )
+        return None
+def evaluate_dictcomp(
+    dictcomp: ast.DictComp,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> dict[Any, Any]:
+    result = {}
+    for gen in dictcomp.generators:
+        iter_value = evaluate_ast(gen.iter, state, static_tools, custom_tools, authorized_imports)
+        for value in iter_value:
+            new_state = state.copy()
+            set_value(
+                gen.target,
+                value,
+                new_state,
+                static_tools,
+                custom_tools,
+                authorized_imports,
+            )
+            if all(
+                evaluate_ast(if_clause, new_state, static_tools, custom_tools, authorized_imports)
+                for if_clause in gen.ifs
+            ):
+                key = evaluate_ast(
+                    dictcomp.key,
+                    new_state,
+                    static_tools,
+                    custom_tools,
+                    authorized_imports,
+                )
+                val = evaluate_ast(
+                    dictcomp.value,
+                    new_state,
+                    static_tools,
+                    custom_tools,
+                    authorized_imports,
+                )
+                result[key] = val
+    return result
+def evaluate_delete(
+    delete_node: ast.Delete,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str],
+) -> None:
+    """
+    Evaluate a delete statement (del x, del x[y]).
+    Args:
+        delete_node: The AST Delete node to evaluate
+        state: The current state dictionary
+        static_tools: Dictionary of static tools
+        custom_tools: Dictionary of custom tools
+        authorized_imports: List of authorized imports
+    """
+    for target in delete_node.targets:
+        if isinstance(target, ast.Name):
+            # Handle simple variable deletion (del x)
+            if target.id in state:
+                del state[target.id]
+            else:
+                raise InterpreterError(f"Cannot delete name '{target.id}': name is not defined")
+        elif isinstance(target, ast.Subscript):
+            # Handle index/key deletion (del x[y])
+            obj = evaluate_ast(target.value, state, static_tools, custom_tools, authorized_imports)
+            index = evaluate_ast(target.slice, state, static_tools, custom_tools, authorized_imports)
+            try:
+                del obj[index]
+            except (TypeError, KeyError, IndexError) as e:
+                raise InterpreterError(f"Cannot delete index/key: {str(e)}")
+        else:
+            raise InterpreterError(f"Deletion of {type(target).__name__} targets is not supported")
+@safer_eval
+def evaluate_ast(
+    expression: ast.AST,
+    state: dict[str, Any],
+    static_tools: dict[str, Callable],
+    custom_tools: dict[str, Callable],
+    authorized_imports: list[str] = BASE_BUILTIN_MODULES,
+):
+    """
+    Evaluate an abstract syntax tree using the content of the variables stored in a state and only evaluating a given
+    set of functions.
+    This function will recurse through the nodes of the tree provided.
+    Args:
+        expression (`ast.AST`):
+            The code to evaluate, as an abstract syntax tree.
+        state (`Dict[str, Any]`):
+            A dictionary mapping variable names to values. The `state` is updated if need be when the evaluation
+            encounters assignments.
+        static_tools (`Dict[str, Callable]`):
+            Functions that may be called during the evaluation. Trying to change one of these static_tools will raise an error.
+        custom_tools (`Dict[str, Callable]`):
+            Functions that may be called during the evaluation. These custom_tools can be overwritten.
+        authorized_imports (`List[str]`):
+            The list of modules that can be imported by the code. By default, only a few safe modules are allowed.
+            If it contains "*", it will authorize any import. Use this at your own risk!
+    """
+    if state.setdefault("_operations_count", {"counter": 0})["counter"] >= MAX_OPERATIONS:
+        raise InterpreterError(
+            f"Reached the max number of operations of {MAX_OPERATIONS}. Maybe there is an infinite loop somewhere in the code, or you're just asking too many calculations."
+        )
+    state["_operations_count"]["counter"] += 1
+    common_params = (state, static_tools, custom_tools, authorized_imports)
+    if isinstance(expression, ast.Assign):
+        # Assignment -> we evaluate the assignment which should update the state
+        # We return the variable assigned as it may be used to determine the final result.
+        return evaluate_assign(expression, *common_params)
+    elif isinstance(expression, ast.AnnAssign):
+        return evaluate_annassign(expression, *common_params)
+    elif isinstance(expression, ast.AugAssign):
+        return evaluate_augassign(expression, *common_params)
+    elif isinstance(expression, ast.Call):
+        # Function call -> we return the value of the function call
+        return evaluate_call(expression, *common_params)
+    elif isinstance(expression, ast.Constant):
+        # Constant -> just return the value
+        return expression.value
+    elif isinstance(expression, ast.Tuple):
+        return tuple((evaluate_ast(elt, *common_params) for elt in expression.elts))
+    elif isinstance(expression, (ast.ListComp, ast.GeneratorExp)):
+        return evaluate_listcomp(expression, *common_params)
+    elif isinstance(expression, ast.DictComp):
+        return evaluate_dictcomp(expression, *common_params)
+    elif isinstance(expression, ast.SetComp):
+        return evaluate_setcomp(expression, *common_params)
+    elif isinstance(expression, ast.UnaryOp):
+        return evaluate_unaryop(expression, *common_params)
+    elif isinstance(expression, ast.Starred):
+        return evaluate_ast(expression.value, *common_params)
+    elif isinstance(expression, ast.BoolOp):
+        # Boolean operation -> evaluate the operation
+        return evaluate_boolop(expression, *common_params)
+    elif isinstance(expression, ast.Break):
+        raise BreakException()
+    elif isinstance(expression, ast.Continue):
+        raise ContinueException()
+    elif isinstance(expression, ast.BinOp):
+        # Binary operation -> execute operation
+        return evaluate_binop(expression, *common_params)
+    elif isinstance(expression, ast.Compare):
+        # Comparison -> evaluate the comparison
+        return evaluate_condition(expression, *common_params)
+    elif isinstance(expression, ast.Lambda):
+        return evaluate_lambda(expression, *common_params)
+    elif isinstance(expression, ast.FunctionDef):
+        return evaluate_function_def(expression, *common_params)
+    elif isinstance(expression, ast.Dict):
+        # Dict -> evaluate all keys and values
+        keys = (evaluate_ast(k, *common_params) for k in expression.keys)
+        values = (evaluate_ast(v, *common_params) for v in expression.values)
+        return dict(zip(keys, values))
+    elif isinstance(expression, ast.Expr):
+        # Expression -> evaluate the content
+        return evaluate_ast(expression.value, *common_params)
+    elif isinstance(expression, ast.For):
+        # For loop -> execute the loop
+        return evaluate_for(expression, *common_params)
+    elif isinstance(expression, ast.FormattedValue):
+        # Formatted value (part of f-string) -> evaluate the content and format it
+        value = evaluate_ast(expression.value, *common_params)
+        # Early return if no format spec
+        if not expression.format_spec:
+            return value
+        # Apply format specification
+        format_spec = evaluate_ast(expression.format_spec, *common_params)
+        return format(value, format_spec)
+    elif isinstance(expression, ast.If):
+        # If -> execute the right branch
+        return evaluate_if(expression, *common_params)
+    elif hasattr(ast, "Index") and isinstance(expression, ast.Index):
+        return evaluate_ast(expression.value, *common_params)
+    elif isinstance(expression, ast.JoinedStr):
+        return "".join([str(evaluate_ast(v, *common_params)) for v in expression.values])
+    elif isinstance(expression, ast.List):
+        # List -> evaluate all elements
+        return [evaluate_ast(elt, *common_params) for elt in expression.elts]
+    elif isinstance(expression, ast.Name):
+        # Name -> pick up the value in the state
+        return evaluate_name(expression, *common_params)
+    elif isinstance(expression, ast.Subscript):
+        # Subscript -> return the value of the indexing
+        return evaluate_subscript(expression, *common_params)
+    elif isinstance(expression, ast.IfExp):
+        test_val = evaluate_ast(expression.test, *common_params)
+        if test_val:
+            return evaluate_ast(expression.body, *common_params)
+        else:
+            return evaluate_ast(expression.orelse, *common_params)
+    elif isinstance(expression, ast.Attribute):
+        return evaluate_attribute(expression, *common_params)
+    elif isinstance(expression, ast.Slice):
+        return slice(
+            evaluate_ast(expression.lower, *common_params) if expression.lower is not None else None,
+            evaluate_ast(expression.upper, *common_params) if expression.upper is not None else None,
+            evaluate_ast(expression.step, *common_params) if expression.step is not None else None,
+        )
+    elif isinstance(expression, ast.While):
+        return evaluate_while(expression, *common_params)
+    elif isinstance(expression, (ast.Import, ast.ImportFrom)):
+        return evaluate_import(expression, state, authorized_imports)
+    elif isinstance(expression, ast.ClassDef):
+        return evaluate_class_def(expression, *common_params)
+    elif isinstance(expression, ast.Try):
+        return evaluate_try(expression, *common_params)
+    elif isinstance(expression, ast.Raise):
+        return evaluate_raise(expression, *common_params)
+    elif isinstance(expression, ast.Assert):
+        return evaluate_assert(expression, *common_params)
+    elif isinstance(expression, ast.With):
+        return evaluate_with(expression, *common_params)
+    elif isinstance(expression, ast.Set):
+        return set((evaluate_ast(elt, *common_params) for elt in expression.elts))
+    elif isinstance(expression, ast.Return):
+        raise ReturnException(evaluate_ast(expression.value, *common_params) if expression.value else None)
+    elif isinstance(expression, ast.Pass):
+        return None
+    elif isinstance(expression, ast.Delete):
+        return evaluate_delete(expression, *common_params)
+    else:
+        # For now we refuse anything else. Let's add things as we need them.
+        raise InterpreterError(f"{expression.__class__.__name__} is not supported.")
+class FinalAnswerException(Exception):
+    def __init__(self, value):
+        self.value = value
+def evaluate_python_code(
+    code: str,
+    static_tools: dict[str, Callable] | None = None,
+    custom_tools: dict[str, Callable] | None = None,
+    state: dict[str, Any] | None = None,
+    authorized_imports: list[str] = BASE_BUILTIN_MODULES,
+    max_print_outputs_length: int = DEFAULT_MAX_LEN_OUTPUT,
+):
+    """
+    Evaluate a python expression using the content of the variables stored in a state and only evaluating a given set
+    of functions.
+    This function will recurse through the nodes of the tree provided.
+    Args:
+        code (`str`):
+            The code to evaluate.
+        static_tools (`Dict[str, Callable]`):
+            The functions that may be called during the evaluation. These can also be agents in a multiagent setting.
+            These tools cannot be overwritten in the code: any assignment to their name will raise an error.
+        custom_tools (`Dict[str, Callable]`):
+            The functions that may be called during the evaluation.
+            These tools can be overwritten in the code: any assignment to their name will overwrite them.
+        state (`Dict[str, Any]`):
+            A dictionary mapping variable names to values. The `state` should contain the initial inputs but will be
+            updated by this function to contain all variables as they are evaluated.
+            The print outputs will be stored in the state under the key "_print_outputs".
+    """
+    try:
+        expression = ast.parse(code)
+    except SyntaxError as e:
+        raise InterpreterError(
+            f"Code parsing failed on line {e.lineno} due to: {type(e).__name__}\n"
+            f"{e.text}"
+            f"{' ' * (e.offset or 0)}^\n"
+            f"Error: {str(e)}"
+        )
+    if state is None:
+        state = {}
+    static_tools = static_tools.copy() if static_tools is not None else {}
+    custom_tools = custom_tools if custom_tools is not None else {}
+    result = None
+    state["_print_outputs"] = PrintContainer()
+    state["_operations_count"] = {"counter": 0}
+    if "final_answer" in static_tools:
+        previous_final_answer = static_tools["final_answer"]
+        def final_answer(*args, **kwargs):  # Allow arbitrary arguments to be passed
+            raise FinalAnswerException(previous_final_answer(*args, **kwargs))
+        static_tools["final_answer"] = final_answer
+    try:
+        for node in expression.body:
+            result = evaluate_ast(node, state, static_tools, custom_tools, authorized_imports)
+        state["_print_outputs"].value = truncate_content(
+            str(state["_print_outputs"]), max_length=max_print_outputs_length
+        )
+        is_final_answer = False
+        return result, is_final_answer
+    except FinalAnswerException as e:
+        state["_print_outputs"].value = truncate_content(
+            str(state["_print_outputs"]), max_length=max_print_outputs_length
+        )
+        is_final_answer = True
+        return e.value, is_final_answer
+    except Exception as e:
+        state["_print_outputs"].value = truncate_content(
+            str(state["_print_outputs"]), max_length=max_print_outputs_length
+        )
+        raise InterpreterError(
+            f"Code execution failed at line '{ast.get_source_segment(code, node)}' due to: {type(e).__name__}: {e}"
+        )
+class PythonExecutor:
+    pass
+class LocalPythonExecutor(PythonExecutor):
+    """
+    Executor of Python code in a local environment.
+    This executor evaluates Python code with restricted access to imports and built-in functions,
+    making it suitable for running untrusted code. It maintains state between executions,
+    allows for custom tools and functions to be made available to the code, and captures
+    print outputs separately from return values.
+    Args:
+        additional_authorized_imports (`list[str]`):
+            Additional authorized imports for the executor.
+        max_print_outputs_length (`int`, defaults to `DEFAULT_MAX_LEN_OUTPUT=50_000`):
+            Maximum length of the print outputs.
+        additional_functions (`dict[str, Callable]`, *optional*):
+            Additional Python functions to be added to the executor.
+    """
+    def __init__(
+        self,
+        additional_authorized_imports: list[str],
+        max_print_outputs_length: int | None = None,
+        additional_functions: dict[str, Callable] | None = None,
+    ):
+        self.custom_tools = {}
+        self.state = {"__name__": "__main__"}
+        self.max_print_outputs_length = max_print_outputs_length
+        if max_print_outputs_length is None:
+            self.max_print_outputs_length = DEFAULT_MAX_LEN_OUTPUT
+        self.additional_authorized_imports = additional_authorized_imports
+        self.authorized_imports = list(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports))
+        # TODO: assert self.authorized imports are all installed locally
+        self.static_tools = None
+        self.additional_functions = additional_functions or {}
+    def __call__(self, code_action: str) -> tuple[Any, str, bool]:
+        output, is_final_answer = evaluate_python_code(
+            code_action,
+            static_tools=self.static_tools,
+            custom_tools=self.custom_tools,
+            state=self.state,
+            authorized_imports=self.authorized_imports,
+            max_print_outputs_length=self.max_print_outputs_length,
+        )
+        logs = str(self.state["_print_outputs"])
+        return output, logs, is_final_answer
+    def send_variables(self, variables: dict):
+        self.state.update(variables)
+    def send_tools(self, tools: dict[str, Tool]):
+        # Combine agent tools, base Python tools, and additional Python functions
+        self.static_tools = {**tools, **BASE_PYTHON_TOOLS.copy(), **self.additional_functions}
+__all__ = ["evaluate_python_code", "LocalPythonExecutor"]

src/smolagents/mcp_client.py ADDED Viewed

	@@ -0,0 +1,154 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import warnings
+from types import TracebackType
+from typing import TYPE_CHECKING, Any
+from smolagents.tools import Tool
+__all__ = ["MCPClient"]
+if TYPE_CHECKING:
+    from mcpadapt.core import StdioServerParameters
+class MCPClient:
+    """Manages the connection to an MCP server and make its tools available to SmolAgents.
+    Note: tools can only be accessed after the connection has been started with the
+        `connect()` method, done during the init. If you don't use the context manager
+        we strongly encourage to use "try ... finally" to ensure the connection is cleaned up.
+    Args:
+        server_parameters (StdioServerParameters | dict[str, Any] | list[StdioServerParameters | dict[str, Any]]):
+            Configuration parameters to connect to the MCP server. Can be a list if you want to connect multiple MCPs at once.
+            - An instance of `mcp.StdioServerParameters` for connecting a Stdio MCP server via standard input/output using a subprocess.
+            - A `dict` with at least:
+              - "url": URL of the server.
+              - "transport": Transport protocol to use, one of:
+                - "streamable-http": (recommended) Streamable HTTP transport.
+                - "sse": Legacy HTTP+SSE transport (deprecated).
+              If "transport" is omitted, the legacy "sse" transport is assumed (a deprecation warning will be issued).
+            <Deprecated version="1.17.0">
+            The HTTP+SSE transport is deprecated and future behavior will default to the Streamable HTTP transport.
+            Please pass explicitly the "transport" key.
+            </Deprecated>
+    Example:
+        ```python
+        # fully managed context manager + stdio
+        with MCPClient(...) as tools:
+            # tools are now available
+        # context manager + Streamable HTTP transport:
+        with MCPClient({"url": "http://localhost:8000/mcp", "transport": "streamable-http"}) as tools:
+            # tools are now available
+        # manually manage the connection via the mcp_client object:
+        try:
+            mcp_client = MCPClient(...)
+            tools = mcp_client.get_tools()
+            # use your tools here.
+        finally:
+            mcp_client.disconnect()
+        ```
+    """
+    def __init__(
+        self,
+        server_parameters: "StdioServerParameters" | dict[str, Any] | list["StdioServerParameters" | dict[str, Any]],
+    ):
+        try:
+            from mcpadapt.core import MCPAdapt
+            from mcpadapt.smolagents_adapter import SmolAgentsAdapter
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError("Please install 'mcp' extra to use MCPClient: `pip install 'smolagents[mcp]'`")
+        if isinstance(server_parameters, dict):
+            transport = server_parameters.get("transport")
+            if transport is None:
+                warnings.warn(
+                    "Passing a dict as server_parameters without specifying the 'transport' key is deprecated. "
+                    "For now, it defaults to the legacy 'sse' (HTTP+SSE) transport, but this default will change "
+                    "to 'streamable-http' in version 1.20. Please add the 'transport' key explicitly. ",
+                    FutureWarning,
+                )
+                transport = "sse"
+                server_parameters["transport"] = transport
+            if transport not in {"sse", "streamable-http"}:
+                raise ValueError(
+                    f"Unsupported transport: {transport}. Supported transports are 'streamable-http' and 'sse'."
+                )
+        self._adapter = MCPAdapt(server_parameters, SmolAgentsAdapter())
+        self._tools: list[Tool] | None = None
+        self.connect()
+    def connect(self):
+        """Connect to the MCP server and initialize the tools."""
+        self._tools: list[Tool] = self._adapter.__enter__()
+    def disconnect(
+        self,
+        exc_type: type[BaseException] | None = None,
+        exc_value: BaseException | None = None,
+        exc_traceback: TracebackType | None = None,
+    ):
+        """Disconnect from the MCP server"""
+        self._adapter.__exit__(exc_type, exc_value, exc_traceback)
+    def get_tools(self) -> list[Tool]:
+        """The SmolAgents tools available from the MCP server.
+        Note: for now, this always returns the tools available at the creation of the session,
+        but it will in a future release return also new tools available from the MCP server if
+        any at call time.
+        Raises:
+            ValueError: If the MCP server tools is None (usually assuming the server is not started).
+        Returns:
+            list[Tool]: The SmolAgents tools available from the MCP server.
+        """
+        if self._tools is None:
+            raise ValueError(
+                "Couldn't retrieve tools from MCP server, run `mcp_client.connect()` first before accessing `tools`"
+            )
+        return self._tools
+    def __enter__(self) -> list[Tool]:
+        """Connect to the MCP server and return the tools directly.
+        Note that because of the `.connect` in the init, the mcp_client
+        is already connected at this point.
+        """
+        return self._tools
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        exc_traceback: TracebackType | None,
+    ):
+        """Disconnect from the MCP server."""
+        self.disconnect(exc_type, exc_value, exc_traceback)

src/smolagents/memory.py ADDED Viewed

	@@ -0,0 +1,257 @@

+from dataclasses import asdict, dataclass
+from logging import getLogger
+from typing import TYPE_CHECKING, Any
+from smolagents.models import ChatMessage, MessageRole
+from smolagents.monitoring import AgentLogger, LogLevel, Timing, TokenUsage
+from smolagents.utils import AgentError, make_json_serializable
+if TYPE_CHECKING:
+    import PIL.Image
+    from smolagents.models import ChatMessage
+    from smolagents.monitoring import AgentLogger
+logger = getLogger(__name__)
+@dataclass
+class ToolCall:
+    name: str
+    arguments: Any
+    id: str
+    def dict(self):
+        return {
+            "id": self.id,
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "arguments": make_json_serializable(self.arguments),
+            },
+        }
+@dataclass
+class MemoryStep:
+    def dict(self):
+        return asdict(self)
+    def to_messages(self, summary_mode: bool = False) -> list[ChatMessage]:
+        raise NotImplementedError
+@dataclass
+class ActionStep(MemoryStep):
+    step_number: int
+    timing: Timing
+    model_input_messages: list[ChatMessage] | None = None
+    tool_calls: list[ToolCall] | None = None
+    error: AgentError | None = None
+    model_output_message: ChatMessage | None = None
+    model_output: str | list[dict[str, Any]] | None = None
+    code_action: str | None = None
+    observations: str | None = None
+    observations_images: list["PIL.Image.Image"] | None = None
+    action_output: Any = None
+    token_usage: TokenUsage | None = None
+    is_final_answer: bool = False
+    def dict(self):
+        # We overwrite the method to parse the tool_calls and action_output manually
+        return {
+            "step_number": self.step_number,
+            "timing": self.timing.dict(),
+            "model_input_messages": self.model_input_messages,
+            "tool_calls": [tc.dict() for tc in self.tool_calls] if self.tool_calls else [],
+            "error": self.error.dict() if self.error else None,
+            "model_output_message": self.model_output_message.dict() if self.model_output_message else None,
+            "model_output": self.model_output,
+            "code_action": self.code_action,
+            "observations": self.observations,
+            "observations_images": [image.tobytes() for image in self.observations_images]
+            if self.observations_images
+            else None,
+            "action_output": make_json_serializable(self.action_output),
+            "token_usage": asdict(self.token_usage) if self.token_usage else None,
+            "is_final_answer": self.is_final_answer,
+        }
+    def to_messages(self, summary_mode: bool = False) -> list[ChatMessage]:
+        messages = []
+        if self.model_output is not None and not summary_mode:
+            messages.append(
+                ChatMessage(role=MessageRole.ASSISTANT, content=[{"type": "text", "text": self.model_output.strip()}])
+            )
+        if self.tool_calls is not None:
+            messages.append(
+                ChatMessage(
+                    role=MessageRole.TOOL_CALL,
+                    content=[
+                        {
+                            "type": "text",
+                            "text": "Calling tools:\n" + str([tc.dict() for tc in self.tool_calls]),
+                        }
+                    ],
+                )
+            )
+        if self.observations_images:
+            messages.append(
+                ChatMessage(
+                    role=MessageRole.USER,
+                    content=[
+                        {
+                            "type": "image",
+                            "image": image,
+                        }
+                        for image in self.observations_images
+                    ],
+                )
+            )
+        if self.observations is not None:
+            messages.append(
+                ChatMessage(
+                    role=MessageRole.TOOL_RESPONSE,
+                    content=[
+                        {
+                            "type": "text",
+                            "text": f"Observation:\n{self.observations}",
+                        }
+                    ],
+                )
+            )
+        if self.error is not None:
+            error_message = (
+                "Error:\n"
+                + str(self.error)
+                + "\nNow let's retry: take care not to repeat previous errors! If you have retried several times, try a completely different approach.\n"
+            )
+            message_content = f"Call id: {self.tool_calls[0].id}\n" if self.tool_calls else ""
+            message_content += error_message
+            messages.append(
+                ChatMessage(role=MessageRole.TOOL_RESPONSE, content=[{"type": "text", "text": message_content}])
+            )
+        return messages
+@dataclass
+class PlanningStep(MemoryStep):
+    model_input_messages: list[ChatMessage]
+    model_output_message: ChatMessage
+    plan: str
+    timing: Timing
+    token_usage: TokenUsage | None = None
+    def to_messages(self, summary_mode: bool = False) -> list[ChatMessage]:
+        if summary_mode:
+            return []
+        return [
+            ChatMessage(role=MessageRole.ASSISTANT, content=[{"type": "text", "text": self.plan.strip()}]),
+            ChatMessage(
+                role=MessageRole.USER, content=[{"type": "text", "text": "Now proceed and carry out this plan."}]
+            ),
+            # This second message creates a role change to prevent models models from simply continuing the plan message
+        ]
+@dataclass
+class TaskStep(MemoryStep):
+    task: str
+    task_images: list["PIL.Image.Image"] | None = None
+    def to_messages(self, summary_mode: bool = False) -> list[ChatMessage]:
+        content = [{"type": "text", "text": f"New task:\n{self.task}"}]
+        if self.task_images:
+            content.extend([{"type": "image", "image": image} for image in self.task_images])
+        return [ChatMessage(role=MessageRole.USER, content=content)]
+@dataclass
+class SystemPromptStep(MemoryStep):
+    system_prompt: str
+    def to_messages(self, summary_mode: bool = False) -> list[ChatMessage]:
+        if summary_mode:
+            return []
+        return [ChatMessage(role=MessageRole.SYSTEM, content=[{"type": "text", "text": self.system_prompt}])]
+@dataclass
+class FinalAnswerStep(MemoryStep):
+    output: Any
+class AgentMemory:
+    """Memory for the agent, containing the system prompt and all steps taken by the agent.
+    This class is used to store the agent's steps, including tasks, actions, and planning steps.
+    It allows for resetting the memory, retrieving succinct or full step information, and replaying the agent's steps.
+    Args:
+        system_prompt (`str`): System prompt for the agent, which sets the context and instructions for the agent's behavior.
+    **Attributes**:
+        - **system_prompt** (`SystemPromptStep`) -- System prompt step for the agent.
+        - **steps** (`list[TaskStep | ActionStep | PlanningStep]`) -- List of steps taken by the agent, which can include tasks, actions, and planning steps.
+    """
+    def __init__(self, system_prompt: str):
+        self.system_prompt: SystemPromptStep = SystemPromptStep(system_prompt=system_prompt)
+        self.steps: list[TaskStep | ActionStep | PlanningStep] = []
+    def reset(self):
+        """Reset the agent's memory, clearing all steps and keeping the system prompt."""
+        self.steps = []
+    def get_succinct_steps(self) -> list[dict]:
+        """Return a succinct representation of the agent's steps, excluding model input messages."""
+        return [
+            {key: value for key, value in step.dict().items() if key != "model_input_messages"} for step in self.steps
+        ]
+    def get_full_steps(self) -> list[dict]:
+        """Return a full representation of the agent's steps, including model input messages."""
+        if len(self.steps) == 0:
+            return []
+        return [step.dict() for step in self.steps]
+    def replay(self, logger: AgentLogger, detailed: bool = False):
+        """Prints a pretty replay of the agent's steps.
+        Args:
+            logger (`AgentLogger`): The logger to print replay logs to.
+            detailed (`bool`, default `False`): If True, also displays the memory at each step. Defaults to False.
+                Careful: will increase log length exponentially. Use only for debugging.
+        """
+        logger.console.log("Replaying the agent's steps:")
+        logger.log_markdown(title="System prompt", content=self.system_prompt.system_prompt, level=LogLevel.ERROR)
+        for step in self.steps:
+            if isinstance(step, TaskStep):
+                logger.log_task(step.task, "", level=LogLevel.ERROR)
+            elif isinstance(step, ActionStep):
+                logger.log_rule(f"Step {step.step_number}", level=LogLevel.ERROR)
+                if detailed and step.model_input_messages is not None:
+                    logger.log_messages(step.model_input_messages, level=LogLevel.ERROR)
+                if step.model_output is not None:
+                    logger.log_markdown(title="Agent output:", content=step.model_output, level=LogLevel.ERROR)
+            elif isinstance(step, PlanningStep):
+                logger.log_rule("Planning step", level=LogLevel.ERROR)
+                if detailed and step.model_input_messages is not None:
+                    logger.log_messages(step.model_input_messages, level=LogLevel.ERROR)
+                logger.log_markdown(title="Agent output:", content=step.plan, level=LogLevel.ERROR)
+    def return_full_code(self) -> str:
+        """Returns all code actions from the agent's steps, concatenated as a single script."""
+        return "\n\n".join(
+            [step.code_action for step in self.steps if isinstance(step, ActionStep) and step.code_action is not None]
+        )
+__all__ = ["AgentMemory"]

src/smolagents/models.py ADDED Viewed

	@@ -0,0 +1,1882 @@

+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+import os
+import re
+import uuid
+import warnings
+from collections.abc import Generator
+from copy import deepcopy
+from dataclasses import asdict, dataclass
+from enum import Enum
+from threading import Thread
+from typing import TYPE_CHECKING, Any
+from .monitoring import TokenUsage
+from .tools import Tool
+from .utils import _is_package_available, encode_image_base64, make_image_url, parse_json_blob
+if TYPE_CHECKING:
+    from transformers import StoppingCriteriaList
+logger = logging.getLogger(__name__)
+STRUCTURED_GENERATION_PROVIDERS = ["cerebras", "fireworks-ai"]
+CODEAGENT_RESPONSE_FORMAT = {
+    "type": "json_schema",
+    "json_schema": {
+        "schema": {
+            "additionalProperties": False,
+            "properties": {
+                "thought": {
+                    "description": "A free form text description of the thought process.",
+                    "title": "Thought",
+                    "type": "string",
+                },
+                "code": {
+                    "description": "Valid Python code snippet implementing the thought.",
+                    "title": "Code",
+                    "type": "string",
+                },
+            },
+            "required": ["thought", "code"],
+            "title": "ThoughtAndCodeAnswer",
+            "type": "object",
+        },
+        "name": "ThoughtAndCodeAnswer",
+        "strict": True,
+    },
+}
+def get_dict_from_nested_dataclasses(obj, ignore_key=None):
+    def convert(obj):
+        if hasattr(obj, "__dataclass_fields__"):
+            return {k: convert(v) for k, v in asdict(obj).items() if k != ignore_key}
+        return obj
+    return convert(obj)
+@dataclass
+class ChatMessageToolCallFunction:
+    arguments: Any
+    name: str
+    description: str | None = None
+@dataclass
+class ChatMessageToolCall:
+    function: ChatMessageToolCallFunction
+    id: str
+    type: str
+    def __str__(self) -> str:
+        return f"Call: {self.id}: Calling {str(self.function.name)} with arguments: {str(self.function.arguments)}"
+class MessageRole(str, Enum):
+    USER = "user"
+    ASSISTANT = "assistant"
+    SYSTEM = "system"
+    TOOL_CALL = "tool-call"
+    TOOL_RESPONSE = "tool-response"
+    @classmethod
+    def roles(cls):
+        return [r.value for r in cls]
+@dataclass
+class ChatMessage:
+    role: MessageRole
+    content: str | list[dict[str, Any]] | None = None
+    tool_calls: list[ChatMessageToolCall] | None = None
+    raw: Any | None = None  # Stores the raw output from the API
+    token_usage: TokenUsage | None = None
+    def model_dump_json(self):
+        return json.dumps(get_dict_from_nested_dataclasses(self, ignore_key="raw"))
+    @classmethod
+    def from_dict(cls, data: dict, raw: Any | None = None, token_usage: TokenUsage | None = None) -> "ChatMessage":
+        if data.get("tool_calls"):
+            tool_calls = [
+                ChatMessageToolCall(
+                    function=ChatMessageToolCallFunction(**tc["function"]), id=tc["id"], type=tc["type"]
+                )
+                for tc in data["tool_calls"]
+            ]
+            data["tool_calls"] = tool_calls
+        return cls(
+            role=data["role"],
+            content=data.get("content"),
+            tool_calls=data.get("tool_calls"),
+            raw=raw,
+            token_usage=token_usage,
+        )
+    def dict(self):
+        return get_dict_from_nested_dataclasses(self)
+    def render_as_markdown(self) -> str:
+        rendered = str(self.content) or ""
+        if self.tool_calls:
+            rendered += "\n".join(
+                [
+                    json.dumps({"tool": tool.function.name, "arguments": tool.function.arguments})
+                    for tool in self.tool_calls
+                ]
+            )
+        return rendered
+def parse_json_if_needed(arguments: str | dict) -> str | dict:
+    if isinstance(arguments, dict):
+        return arguments
+    else:
+        try:
+            return json.loads(arguments)
+        except Exception:
+            return arguments
+@dataclass
+class ChatMessageToolCallStreamDelta:
+    """Represents a streaming delta for tool calls during generation."""
+    index: int | None = None
+    id: str | None = None
+    type: str | None = None
+    function: ChatMessageToolCallFunction | None = None
+@dataclass
+class ChatMessageStreamDelta:
+    content: str | None = None
+    tool_calls: list[ChatMessageToolCallStreamDelta] | None = None
+    token_usage: TokenUsage | None = None
+def agglomerate_stream_deltas(
+    stream_deltas: list[ChatMessageStreamDelta], role: MessageRole = MessageRole.ASSISTANT
+) -> ChatMessage:
+    """
+    Agglomerate a list of stream deltas into a single stream delta.
+    """
+    accumulated_tool_calls: dict[int, ChatMessageToolCallStreamDelta] = {}
+    accumulated_content = ""
+    total_input_tokens = 0
+    total_output_tokens = 0
+    for stream_delta in stream_deltas:
+        if stream_delta.token_usage:
+            total_input_tokens += stream_delta.token_usage.input_tokens
+            total_output_tokens += stream_delta.token_usage.output_tokens
+        if stream_delta.content:
+            accumulated_content += stream_delta.content
+        if stream_delta.tool_calls:
+            for tool_call_delta in stream_delta.tool_calls:  # ?ormally there should be only one call at a time
+                # Extend accumulated_tool_calls list to accommodate the new tool call if needed
+                if tool_call_delta.index is not None:
+                    if tool_call_delta.index not in accumulated_tool_calls:
+                        accumulated_tool_calls[tool_call_delta.index] = ChatMessageToolCallStreamDelta(
+                            id=tool_call_delta.id,
+                            type=tool_call_delta.type,
+                            function=ChatMessageToolCallFunction(name="", arguments=""),
+                        )
+                    # Update the tool call at the specific index
+                    tool_call = accumulated_tool_calls[tool_call_delta.index]
+                    if tool_call_delta.id:
+                        tool_call.id = tool_call_delta.id
+                    if tool_call_delta.type:
+                        tool_call.type = tool_call_delta.type
+                    if tool_call_delta.function:
+                        if tool_call_delta.function.name and len(tool_call_delta.function.name) > 0:
+                            tool_call.function.name = tool_call_delta.function.name
+                        if tool_call_delta.function.arguments:
+                            tool_call.function.arguments += tool_call_delta.function.arguments
+                else:
+                    raise ValueError(f"Tool call index is not provided in tool delta: {tool_call_delta}")
+    return ChatMessage(
+        role=role,
+        content=accumulated_content,
+        tool_calls=[
+            ChatMessageToolCall(
+                function=ChatMessageToolCallFunction(
+                    name=tool_call_stream_delta.function.name,
+                    arguments=tool_call_stream_delta.function.arguments,
+                ),
+                id=tool_call_stream_delta.id or "",
+                type="function",
+            )
+            for tool_call_stream_delta in accumulated_tool_calls.values()
+            if tool_call_stream_delta.function
+        ],
+        token_usage=TokenUsage(
+            input_tokens=total_input_tokens,
+            output_tokens=total_output_tokens,
+        ),
+    )
+tool_role_conversions = {
+    MessageRole.TOOL_CALL: MessageRole.ASSISTANT,
+    MessageRole.TOOL_RESPONSE: MessageRole.USER,
+}
+def get_tool_json_schema(tool: Tool) -> dict:
+    properties = deepcopy(tool.inputs)
+    required = []
+    for key, value in properties.items():
+        if value["type"] == "any":
+            value["type"] = "string"
+        if not ("nullable" in value and value["nullable"]):
+            required.append(key)
+    return {
+        "type": "function",
+        "function": {
+            "name": tool.name,
+            "description": tool.description,
+            "parameters": {
+                "type": "object",
+                "properties": properties,
+                "required": required,
+            },
+        },
+    }
+def remove_stop_sequences(content: str, stop_sequences: list[str]) -> str:
+    for stop_seq in stop_sequences:
+        if content[-len(stop_seq) :] == stop_seq:
+            content = content[: -len(stop_seq)]
+    return content
+def get_clean_message_list(
+    message_list: list[ChatMessage],
+    role_conversions: dict[MessageRole, MessageRole] | dict[str, str] = {},
+    convert_images_to_image_urls: bool = False,
+    flatten_messages_as_text: bool = False,
+) -> list[dict[str, Any]]:
+    """
+    Creates a list of messages to give as input to the LLM. These messages are dictionaries and chat template compatible with transformers LLM chat template.
+    Subsequent messages with the same role will be concatenated to a single message.
+    Args:
+        message_list (`list[dict[str, str]]`): List of chat messages.
+        role_conversions (`dict[MessageRole, MessageRole]`, *optional* ): Mapping to convert roles.
+        convert_images_to_image_urls (`bool`, default `False`): Whether to convert images to image URLs.
+        flatten_messages_as_text (`bool`, default `False`): Whether to flatten messages as text.
+    """
+    output_message_list: list[dict[str, Any]] = []
+    message_list = deepcopy(message_list)  # Avoid modifying the original list
+    for message in message_list:
+        role = message.role
+        if role not in MessageRole.roles():
+            raise ValueError(f"Incorrect role {role}, only {MessageRole.roles()} are supported for now.")
+        if role in role_conversions:
+            message.role = role_conversions[role]  # type: ignore
+        # encode images if needed
+        if isinstance(message.content, list):
+            for element in message.content:
+                assert isinstance(element, dict), "Error: this element should be a dict:" + str(element)
+                if element["type"] == "image":
+                    assert not flatten_messages_as_text, f"Cannot use images with {flatten_messages_as_text=}"
+                    if convert_images_to_image_urls:
+                        element.update(
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": make_image_url(encode_image_base64(element.pop("image")))},
+                            }
+                        )
+                    else:
+                        element["image"] = encode_image_base64(element["image"])
+        if len(output_message_list) > 0 and message.role == output_message_list[-1]["role"]:
+            assert isinstance(message.content, list), "Error: wrong content:" + str(message.content)
+            if flatten_messages_as_text:
+                output_message_list[-1]["content"] += "\n" + message.content[0]["text"]
+            else:
+                for el in message.content:
+                    if el["type"] == "text" and output_message_list[-1]["content"][-1]["type"] == "text":
+                        # Merge consecutive text messages rather than creating new ones
+                        output_message_list[-1]["content"][-1]["text"] += "\n" + el["text"]
+                    else:
+                        output_message_list[-1]["content"].append(el)
+        else:
+            if flatten_messages_as_text:
+                content = message.content[0]["text"]
+            else:
+                content = message.content
+            output_message_list.append(
+                {
+                    "role": message.role,
+                    "content": content,
+                }
+            )
+    return output_message_list
+def get_tool_call_from_text(text: str, tool_name_key: str, tool_arguments_key: str) -> ChatMessageToolCall:
+    tool_call_dictionary, _ = parse_json_blob(text)
+    try:
+        tool_name = tool_call_dictionary[tool_name_key]
+    except Exception as e:
+        raise ValueError(
+            f"Key {tool_name_key=} not found in the generated tool call. Got keys: {list(tool_call_dictionary.keys())} instead"
+        ) from e
+    tool_arguments = tool_call_dictionary.get(tool_arguments_key, None)
+    if isinstance(tool_arguments, str):
+        tool_arguments = parse_json_if_needed(tool_arguments)
+    return ChatMessageToolCall(
+        id=str(uuid.uuid4()),
+        type="function",
+        function=ChatMessageToolCallFunction(name=tool_name, arguments=tool_arguments),
+    )
+def supports_stop_parameter(model_id: str) -> bool:
+    """
+    Check if the model supports the `stop` parameter.
+    Not supported with reasoning models openai/o3 and openai/o4-mini (and their versioned variants).
+    Args:
+        model_id (`str`): Model identifier (e.g. "openai/o3", "o4-mini-2025-04-16")
+    Returns:
+        bool: True if the model supports the stop parameter, False otherwise
+    """
+    model_name = model_id.split("/")[-1]
+    # o3 and o4-mini (including versioned variants, o3-2025-04-16) don't support stop parameter
+    pattern = r"^(o3[-\d]*|o4-mini[-\d]*)$"
+    return not re.match(pattern, model_name)
+class Model:
+    def __init__(
+        self,
+        flatten_messages_as_text: bool = False,
+        tool_name_key: str = "name",
+        tool_arguments_key: str = "arguments",
+        model_id: str | None = None,
+        **kwargs,
+    ):
+        self.flatten_messages_as_text = flatten_messages_as_text
+        self.tool_name_key = tool_name_key
+        self.tool_arguments_key = tool_arguments_key
+        self.kwargs = kwargs
+        self._last_input_token_count: int | None = None
+        self._last_output_token_count: int | None = None
+        self.model_id: str | None = model_id
+    @property
+    def last_input_token_count(self) -> int | None:
+        warnings.warn(
+            "Attribute last_input_token_count is deprecated and will be removed in version 1.20. "
+            "Please use TokenUsage.input_tokens instead.",
+            FutureWarning,
+        )
+        return self._last_input_token_count
+    @property
+    def last_output_token_count(self) -> int | None:
+        warnings.warn(
+            "Attribute last_output_token_count is deprecated and will be removed in version 1.20. "
+            "Please use TokenUsage.output_tokens instead.",
+            FutureWarning,
+        )
+        return self._last_output_token_count
+    def _prepare_completion_kwargs(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        convert_images_to_image_urls: bool = False,
+        tool_choice: str | dict | None = "required",  # Configurable tool_choice parameter
+        **kwargs,
+    ) -> dict[str, Any]:
+        """
+        Prepare parameters required for model invocation, handling parameter priorities.
+        Parameter priority from high to low:
+        1. Explicitly passed kwargs
+        2. Specific parameters (stop_sequences, response_format, etc.)
+        3. Default values in self.kwargs
+        """
+        # Clean and standardize the message list
+        flatten_messages_as_text = kwargs.pop("flatten_messages_as_text", self.flatten_messages_as_text)
+        messages_as_dicts = get_clean_message_list(
+            messages,
+            role_conversions=custom_role_conversions or tool_role_conversions,
+            convert_images_to_image_urls=convert_images_to_image_urls,
+            flatten_messages_as_text=flatten_messages_as_text,
+        )
+        # Use self.kwargs as the base configuration
+        completion_kwargs = {
+            **self.kwargs,
+            "messages": messages_as_dicts,
+        }
+        # Handle specific parameters
+        if stop_sequences is not None:
+            # Some models do not support stop parameter
+            if supports_stop_parameter(self.model_id or ""):
+                completion_kwargs["stop"] = stop_sequences
+        if response_format is not None:
+            completion_kwargs["response_format"] = response_format
+        # Handle tools parameter
+        if tools_to_call_from:
+            tools_config = {
+                "tools": [get_tool_json_schema(tool) for tool in tools_to_call_from],
+            }
+            if tool_choice is not None:
+                tools_config["tool_choice"] = tool_choice
+            completion_kwargs.update(tools_config)
+        # Finally, use the passed-in kwargs to override all settings
+        completion_kwargs.update(kwargs)
+        return completion_kwargs
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        """Process the input messages and return the model's response.
+        Parameters:
+            messages (`list[dict[str, str | list[dict]]] | list[ChatMessage]`):
+                A list of message dictionaries to be processed. Each dictionary should have the structure `{"role": "user/system", "content": "message content"}`.
+            stop_sequences (`List[str]`, *optional*):
+                A list of strings that will stop the generation if encountered in the model's output.
+            response_format (`dict[str, str]`, *optional*):
+                The response format to use in the model's response.
+            tools_to_call_from (`List[Tool]`, *optional*):
+                A list of tools that the model can use to generate responses.
+            **kwargs:
+                Additional keyword arguments to be passed to the underlying model.
+        Returns:
+            `ChatMessage`: A chat message object containing the model's response.
+        """
+        raise NotImplementedError("This method must be implemented in child classes")
+    def __call__(self, *args, **kwargs):
+        return self.generate(*args, **kwargs)
+    def parse_tool_calls(self, message: ChatMessage) -> ChatMessage:
+        """Sometimes APIs do not return the tool call as a specific object, so we need to parse it."""
+        message.role = MessageRole.ASSISTANT  # Overwrite role if needed
+        if not message.tool_calls:
+            assert message.content is not None, "Message contains no content and no tool calls"
+            message.tool_calls = [
+                get_tool_call_from_text(message.content, self.tool_name_key, self.tool_arguments_key)
+            ]
+        assert len(message.tool_calls) > 0, "No tool call was found in the model output"
+        for tool_call in message.tool_calls:
+            tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
+        return message
+    def to_dict(self) -> dict:
+        """
+        Converts the model into a JSON-compatible dictionary.
+        """
+        model_dictionary = {
+            **self.kwargs,
+            "model_id": self.model_id,
+        }
+        for attribute in [
+            "custom_role_conversion",
+            "temperature",
+            "max_tokens",
+            "provider",
+            "timeout",
+            "api_base",
+            "torch_dtype",
+            "device_map",
+            "organization",
+            "project",
+            "azure_endpoint",
+        ]:
+            if hasattr(self, attribute):
+                model_dictionary[attribute] = getattr(self, attribute)
+        dangerous_attributes = ["token", "api_key"]
+        for attribute_name in dangerous_attributes:
+            if hasattr(self, attribute_name):
+                print(
+                    f"For security reasons, we do not export the `{attribute_name}` attribute of your model. Please export it manually."
+                )
+        return model_dictionary
+    @classmethod
+    def from_dict(cls, model_dictionary: dict[str, Any]) -> "Model":
+        return cls(**{k: v for k, v in model_dictionary.items()})
+class VLLMModel(Model):
+    """Model to use [vLLM](https://docs.vllm.ai/) for fast LLM inference and serving.
+    Parameters:
+        model_id (`str`):
+            The Hugging Face model ID to be used for inference.
+            This can be a path or model identifier from the Hugging Face model hub.
+        model_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the vLLM model (like revision, max_model_len, etc.).
+    """
+    def __init__(
+        self,
+        model_id,
+        model_kwargs: dict[str, Any] | None = None,
+        **kwargs,
+    ):
+        if not _is_package_available("vllm"):
+            raise ModuleNotFoundError("Please install 'vllm' extra to use VLLMModel: `pip install 'smolagents[vllm]'`")
+        from vllm import LLM  # type: ignore
+        from vllm.transformers_utils.tokenizer import get_tokenizer  # type: ignore
+        self.model_kwargs = model_kwargs or {}
+        super().__init__(**kwargs)
+        self.model_id = model_id
+        self.model = LLM(model=model_id, **self.model_kwargs)
+        assert self.model is not None
+        self.tokenizer = get_tokenizer(model_id)
+        self._is_vlm = False  # VLLMModel does not support vision models yet.
+    def cleanup(self):
+        import gc
+        import torch
+        from vllm.distributed.parallel_state import (  # type: ignore
+            destroy_distributed_environment,
+            destroy_model_parallel,
+        )
+        destroy_model_parallel()
+        if self.model is not None:
+            # taken from https://github.com/vllm-project/vllm/issues/1908#issuecomment-2076870351
+            del self.model.llm_engine.model_executor.driver_worker
+        gc.collect()
+        destroy_distributed_environment()
+        torch.cuda.empty_cache()
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        from vllm import SamplingParams  # type: ignore
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            flatten_messages_as_text=(not self._is_vlm),
+            stop_sequences=stop_sequences,
+            tools_to_call_from=tools_to_call_from,
+            **kwargs,
+        )
+        # Override the OpenAI schema for VLLM compatibility
+        guided_options_request = {"guided_json": response_format["json_schema"]["schema"]} if response_format else None
+        messages = completion_kwargs.pop("messages")
+        prepared_stop_sequences = completion_kwargs.pop("stop", [])
+        tools = completion_kwargs.pop("tools", None)
+        completion_kwargs.pop("tool_choice", None)
+        prompt = self.tokenizer.apply_chat_template(
+            messages,
+            tools=tools,
+            add_generation_prompt=True,
+            tokenize=False,
+        )
+        sampling_params = SamplingParams(
+            n=kwargs.get("n", 1),
+            temperature=kwargs.get("temperature", 0.0),
+            max_tokens=kwargs.get("max_tokens", 2048),
+            stop=prepared_stop_sequences,
+        )
+        out = self.model.generate(
+            prompt,
+            sampling_params=sampling_params,
+            guided_options_request=guided_options_request,
+        )
+        output_text = out[0].outputs[0].text
+        self._last_input_token_count = len(out[0].prompt_token_ids)
+        self._last_output_token_count = len(out[0].outputs[0].token_ids)
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=output_text,
+            raw={"out": output_text, "completion_kwargs": completion_kwargs},
+            token_usage=TokenUsage(
+                input_tokens=len(out[0].prompt_token_ids),
+                output_tokens=len(out[0].outputs[0].token_ids),
+            ),
+        )
+class MLXModel(Model):
+    """A class to interact with models loaded using MLX on Apple silicon.
+    > [!TIP]
+    > You must have `mlx-lm` installed on your machine. Please run `pip install smolagents[mlx-lm]` if it's not the case.
+    Parameters:
+        model_id (str):
+            The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
+        tool_name_key (str):
+            The key, which can usually be found in the model's chat template, for retrieving a tool name.
+        tool_arguments_key (str):
+            The key, which can usually be found in the model's chat template, for retrieving tool arguments.
+        trust_remote_code (bool, default `False`):
+            Some models on the Hub require running remote code: for this model, you would have to set this flag to True.
+        load_kwargs (dict[str, Any], *optional*):
+            Additional keyword arguments to pass to the `mlx.lm.load` method when loading the model and tokenizer.
+        apply_chat_template_kwargs (dict, *optional*):
+            Additional keyword arguments to pass to the `apply_chat_template` method of the tokenizer.
+        kwargs (dict, *optional*):
+            Any additional keyword arguments that you want to use in model.generate(), for instance `max_tokens`.
+    Example:
+    ```python
+    >>> engine = MLXModel(
+    ...     model_id="mlx-community/Qwen2.5-Coder-32B-Instruct-4bit",
+    ...     max_tokens=10000,
+    ... )
+    >>> messages = [
+    ...     {
+    ...         "role": "user",
+    ...         "content": "Explain quantum mechanics in simple terms."
+    ...     }
+    ... ]
+    >>> response = engine(messages, stop_sequences=["END"])
+    >>> print(response)
+    "Quantum mechanics is the branch of physics that studies..."
+    ```
+    """
+    def __init__(
+        self,
+        model_id: str,
+        trust_remote_code: bool = False,
+        load_kwargs: dict[str, Any] | None = None,
+        apply_chat_template_kwargs: dict[str, Any] | None = None,
+        **kwargs,
+    ):
+        if not _is_package_available("mlx_lm"):
+            raise ModuleNotFoundError(
+                "Please install 'mlx-lm' extra to use 'MLXModel': `pip install 'smolagents[mlx-lm]'`"
+            )
+        import mlx_lm
+        self.load_kwargs = load_kwargs or {}
+        self.load_kwargs.setdefault("tokenizer_config", {}).setdefault("trust_remote_code", trust_remote_code)
+        self.apply_chat_template_kwargs = apply_chat_template_kwargs or {}
+        self.apply_chat_template_kwargs.setdefault("add_generation_prompt", True)
+        # mlx-lm doesn't support vision models: flatten_messages_as_text=True
+        super().__init__(model_id=model_id, flatten_messages_as_text=True, **kwargs)
+        self.model, self.tokenizer = mlx_lm.load(self.model_id, **self.load_kwargs)
+        self.stream_generate = mlx_lm.stream_generate
+        self.is_vlm = False  # mlx-lm doesn't support vision models
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        if response_format is not None:
+            raise ValueError("MLX does not support structured outputs.")
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            tools_to_call_from=tools_to_call_from,
+            **kwargs,
+        )
+        messages = completion_kwargs.pop("messages")
+        stops = completion_kwargs.pop("stop", [])
+        tools = completion_kwargs.pop("tools", None)
+        completion_kwargs.pop("tool_choice", None)
+        prompt_ids = self.tokenizer.apply_chat_template(messages, tools=tools, **self.apply_chat_template_kwargs)
+        output_tokens = 0
+        text = ""
+        for response in self.stream_generate(self.model, self.tokenizer, prompt=prompt_ids, **completion_kwargs):
+            output_tokens += 1
+            text += response.text
+            if any((stop_index := text.rfind(stop)) != -1 for stop in stops):
+                text = text[:stop_index]
+                break
+        self._last_input_token_count = len(prompt_ids)
+        self._last_output_token_count = output_tokens
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=text,
+            raw={"out": text, "completion_kwargs": completion_kwargs},
+            token_usage=TokenUsage(
+                input_tokens=len(prompt_ids),
+                output_tokens=output_tokens,
+            ),
+        )
+class TransformersModel(Model):
+    """A class that uses Hugging Face's Transformers library for language model interaction.
+    This model allows you to load and use Hugging Face's models locally using the Transformers library. It supports features like stop sequences and grammar customization.
+    > [!TIP]
+    > You must have `transformers` and `torch` installed on your machine. Please run `pip install smolagents[transformers]` if it's not the case.
+    Parameters:
+        model_id (`str`):
+            The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
+            For example, `"Qwen/Qwen2.5-Coder-32B-Instruct"`.
+        device_map (`str`, *optional*):
+            The device_map to initialize your model with.
+        torch_dtype (`str`, *optional*):
+            The torch_dtype to initialize your model with.
+        trust_remote_code (bool, default `False`):
+            Some models on the Hub require running remote code: for this model, you would have to set this flag to True.
+        kwargs (dict, *optional*):
+            Any additional keyword arguments that you want to use in model.generate(), for instance `max_new_tokens` or `device`.
+        **kwargs:
+            Additional keyword arguments to pass to `model.generate()`, for instance `max_new_tokens` or `device`.
+    Raises:
+        ValueError:
+            If the model name is not provided.
+    Example:
+    ```python
+    >>> engine = TransformersModel(
+    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    ...     device="cuda",
+    ...     max_new_tokens=5000,
+    ... )
+    >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
+    >>> response = engine(messages, stop_sequences=["END"])
+    >>> print(response)
+    "Quantum mechanics is the branch of physics that studies..."
+    ```
+    """
+    def __init__(
+        self,
+        model_id: str | None = None,
+        device_map: str | None = None,
+        torch_dtype: str | None = None,
+        trust_remote_code: bool = False,
+        **kwargs,
+    ):
+        try:
+            import torch
+            from transformers import (
+                AutoModelForCausalLM,
+                AutoModelForImageTextToText,
+                AutoProcessor,
+                AutoTokenizer,
+                TextIteratorStreamer,
+            )
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "Please install 'transformers' extra to use 'TransformersModel': `pip install 'smolagents[transformers]'`"
+            )
+        if not model_id:
+            warnings.warn(
+                "The 'model_id' parameter will be required in version 2.0.0. "
+                "Please update your code to pass this parameter to avoid future errors. "
+                "For now, it defaults to 'HuggingFaceTB/SmolLM2-1.7B-Instruct'.",
+                FutureWarning,
+            )
+            model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
+        default_max_tokens = 4096
+        max_new_tokens = kwargs.get("max_new_tokens") or kwargs.get("max_tokens")
+        if not max_new_tokens:
+            kwargs["max_new_tokens"] = default_max_tokens
+            logger.warning(
+                f"`max_new_tokens` not provided, using this default value for `max_new_tokens`: {default_max_tokens}"
+            )
+        if device_map is None:
+            device_map = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {device_map}")
+        self._is_vlm = False
+        try:
+            self.model = AutoModelForImageTextToText.from_pretrained(
+                model_id,
+                device_map=device_map,
+                torch_dtype=torch_dtype,
+                trust_remote_code=trust_remote_code,
+            )
+            self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=trust_remote_code)
+            self._is_vlm = True
+            self.streamer = TextIteratorStreamer(self.processor.tokenizer, skip_prompt=True, skip_special_tokens=True)  # type: ignore
+        except ValueError as e:
+            if "Unrecognized configuration class" in str(e):
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    device_map=device_map,
+                    torch_dtype=torch_dtype,
+                    trust_remote_code=trust_remote_code,
+                )
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code)
+                self.streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)  # type: ignore
+            else:
+                raise e
+        except Exception as e:
+            raise ValueError(f"Failed to load tokenizer and model for {model_id=}: {e}") from e
+        super().__init__(flatten_messages_as_text=not self._is_vlm, model_id=model_id, **kwargs)
+    def make_stopping_criteria(self, stop_sequences: list[str], tokenizer) -> "StoppingCriteriaList":
+        from transformers import StoppingCriteria, StoppingCriteriaList
+        class StopOnStrings(StoppingCriteria):
+            def __init__(self, stop_strings: list[str], tokenizer):
+                self.stop_strings = stop_strings
+                self.tokenizer = tokenizer
+                self.stream = ""
+            def reset(self):
+                self.stream = ""
+            def __call__(self, input_ids, scores, **kwargs):
+                generated = self.tokenizer.decode(input_ids[0][-1], skip_special_tokens=True)
+                self.stream += generated
+                if any([self.stream.endswith(stop_string) for stop_string in self.stop_strings]):
+                    return True
+                return False
+        return StoppingCriteriaList([StopOnStrings(stop_sequences, tokenizer)])
+    def _prepare_completion_args(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> dict[str, Any]:
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            **kwargs,
+        )
+        messages = completion_kwargs.pop("messages")
+        stop_sequences = completion_kwargs.pop("stop", None)
+        tools = completion_kwargs.pop("tools", None)
+        max_new_tokens = (
+            kwargs.get("max_new_tokens")
+            or kwargs.get("max_tokens")
+            or self.kwargs.get("max_new_tokens")
+            or self.kwargs.get("max_tokens")
+            or 1024
+        )
+        prompt_tensor = (self.processor if hasattr(self, "processor") else self.tokenizer).apply_chat_template(
+            messages,
+            tools=tools,
+            return_tensors="pt",
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+        )
+        prompt_tensor = prompt_tensor.to(self.model.device)  # type: ignore
+        if hasattr(prompt_tensor, "input_ids"):
+            prompt_tensor = prompt_tensor["input_ids"]
+        model_tokenizer = self.processor.tokenizer if hasattr(self, "processor") else self.tokenizer
+        stopping_criteria = (
+            self.make_stopping_criteria(stop_sequences, tokenizer=model_tokenizer) if stop_sequences else None
+        )
+        completion_kwargs["max_new_tokens"] = max_new_tokens
+        return dict(
+            inputs=prompt_tensor,
+            use_cache=True,
+            stopping_criteria=stopping_criteria,
+            **completion_kwargs,
+        )
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        if response_format is not None:
+            raise ValueError("Transformers does not support structured outputs, use VLLMModel for this.")
+        generation_kwargs = self._prepare_completion_args(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            tools_to_call_from=tools_to_call_from,
+            **kwargs,
+        )
+        count_prompt_tokens = generation_kwargs["inputs"].shape[1]  # type: ignore
+        out = self.model.generate(
+            **generation_kwargs,
+        )
+        generated_tokens = out[0, count_prompt_tokens:]
+        if hasattr(self, "processor"):
+            output_text = self.processor.decode(generated_tokens, skip_special_tokens=True)
+        else:
+            output_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
+        if stop_sequences is not None:
+            output_text = remove_stop_sequences(output_text, stop_sequences)
+        self._last_input_token_count = count_prompt_tokens
+        self._last_output_token_count = len(generated_tokens)
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=output_text,
+            raw={
+                "out": output_text,
+                "completion_kwargs": {key: value for key, value in generation_kwargs.items() if key != "inputs"},
+            },
+            token_usage=TokenUsage(
+                input_tokens=count_prompt_tokens,
+                output_tokens=len(generated_tokens),
+            ),
+        )
+    def generate_stream(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator[ChatMessageStreamDelta]:
+        if response_format is not None:
+            raise ValueError("Transformers does not support structured outputs, use VLLMModel for this.")
+        generation_kwargs = self._prepare_completion_args(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            **kwargs,
+        )
+        count_prompt_tokens = generation_kwargs["inputs"].shape[1]  # type: ignore
+        thread = Thread(target=self.model.generate, kwargs={"streamer": self.streamer, **generation_kwargs})
+        thread.start()
+        # Generate with streaming
+        for new_text in self.streamer:
+            self._last_input_token_count = count_prompt_tokens
+            self._last_output_token_count = 1
+            yield ChatMessageStreamDelta(
+                content=new_text,
+                tool_calls=None,
+                token_usage=TokenUsage(input_tokens=count_prompt_tokens, output_tokens=1),
+            )
+        thread.join()
+class ApiModel(Model):
+    """
+    Base class for API-based language models.
+    This class serves as a foundation for implementing models that interact with
+    external APIs. It handles the common functionality for managing model IDs,
+    custom role mappings, and API client connections.
+    Parameters:
+        model_id (`str`):
+            The identifier for the model to be used with the API.
+        custom_role_conversions (`dict[str, str`], **optional**):
+            Mapping to convert  between internal role names and API-specific role names. Defaults to None.
+        client (`Any`, **optional**):
+            Pre-configured API client instance. If not provided, a default client will be created. Defaults to None.
+        **kwargs: Additional keyword arguments to pass to the parent class.
+    """
+    def __init__(
+        self, model_id: str, custom_role_conversions: dict[str, str] | None = None, client: Any | None = None, **kwargs
+    ):
+        super().__init__(model_id=model_id, **kwargs)
+        self.custom_role_conversions = custom_role_conversions or {}
+        self.client = client or self.create_client()
+    def create_client(self):
+        """Create the API client for the specific service."""
+        raise NotImplementedError("Subclasses must implement this method to create a client")
+class LiteLLMModel(ApiModel):
+    """Model to use [LiteLLM Python SDK](https://docs.litellm.ai/docs/#litellm-python-sdk) to access hundreds of LLMs.
+    Parameters:
+        model_id (`str`):
+            The model identifier to use on the server (e.g. "gpt-3.5-turbo").
+        api_base (`str`, *optional*):
+            The base URL of the provider API to call the model.
+        api_key (`str`, *optional*):
+            The API key to use for authentication.
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text.
+            Defaults to `True` for models that start with "ollama", "groq", "cerebras".
+        **kwargs:
+            Additional keyword arguments to pass to the OpenAI API.
+    """
+    def __init__(
+        self,
+        model_id: str | None = None,
+        api_base: str | None = None,
+        api_key: str | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        flatten_messages_as_text: bool | None = None,
+        **kwargs,
+    ):
+        if not model_id:
+            warnings.warn(
+                "The 'model_id' parameter will be required in version 2.0.0. "
+                "Please update your code to pass this parameter to avoid future errors. "
+                "For now, it defaults to 'anthropic/claude-3-5-sonnet-20240620'.",
+                FutureWarning,
+            )
+            model_id = "anthropic/claude-3-5-sonnet-20240620"
+        self.api_base = api_base
+        self.api_key = api_key
+        flatten_messages_as_text = (
+            flatten_messages_as_text
+            if flatten_messages_as_text is not None
+            else model_id.startswith(("ollama", "groq", "cerebras"))
+        )
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=flatten_messages_as_text,
+            **kwargs,
+        )
+    def create_client(self):
+        """Create the LiteLLM client."""
+        try:
+            import litellm
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'litellm' extra to use LiteLLMModel: `pip install 'smolagents[litellm]'`"
+            ) from e
+        return litellm
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            api_base=self.api_base,
+            api_key=self.api_key,
+            convert_images_to_image_urls=True,
+            custom_role_conversions=self.custom_role_conversions,
+            **kwargs,
+        )
+        response = self.client.completion(**completion_kwargs)
+        self._last_input_token_count = response.usage.prompt_tokens
+        self._last_output_token_count = response.usage.completion_tokens
+        return ChatMessage.from_dict(
+            response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}),
+            raw=response,
+            token_usage=TokenUsage(
+                input_tokens=response.usage.prompt_tokens,
+                output_tokens=response.usage.completion_tokens,
+            ),
+        )
+    def generate_stream(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator[ChatMessageStreamDelta]:
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            api_base=self.api_base,
+            api_key=self.api_key,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+        for event in self.client.completion(**completion_kwargs, stream=True, stream_options={"include_usage": True}):
+            if getattr(event, "usage", None):
+                self._last_input_token_count = event.usage.prompt_tokens
+                self._last_output_token_count = event.usage.completion_tokens
+                yield ChatMessageStreamDelta(
+                    content="",
+                    token_usage=TokenUsage(
+                        input_tokens=event.usage.prompt_tokens,
+                        output_tokens=event.usage.completion_tokens,
+                    ),
+                )
+            if event.choices:
+                choice = event.choices[0]
+                if choice.delta:
+                    yield ChatMessageStreamDelta(
+                        content=choice.delta.content,
+                        tool_calls=[
+                            ChatMessageToolCallStreamDelta(
+                                index=delta.index,
+                                id=delta.id,
+                                type=delta.type,
+                                function=delta.function,
+                            )
+                            for delta in choice.delta.tool_calls
+                        ]
+                        if choice.delta.tool_calls
+                        else None,
+                    )
+                else:
+                    if not getattr(choice, "finish_reason", None):
+                        raise ValueError(f"No content or tool calls in event: {event}")
+class LiteLLMRouterModel(LiteLLMModel):
+    """Router‑based client for interacting with the [LiteLLM Python SDK Router](https://docs.litellm.ai/docs/routing).
+    This class provides a high-level interface for distributing requests among multiple language models using
+    the LiteLLM SDK's routing capabilities. It is responsible for initializing and configuring the router client,
+    applying custom role conversions, and managing message formatting to ensure seamless integration with various LLMs.
+    Parameters:
+        model_id (`str`):
+            Identifier for the model group to use from the model list (e.g., "model-group-1").
+        model_list (`list[dict[str, Any]]`):
+            Model configurations to be used for routing.
+            Each configuration should include the model group name and any necessary parameters.
+            For more details, refer to the [LiteLLM Routing](https://docs.litellm.ai/docs/routing#quick-start) documentation.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional configuration parameters for the Router client. For more details, see the
+            [LiteLLM Routing Configurations](https://docs.litellm.ai/docs/routing).
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text.
+            Defaults to `True` for models that start with "ollama", "groq", "cerebras".
+        **kwargs:
+            Additional keyword arguments to pass to the LiteLLM Router completion method.
+    Example:
+    ```python
+    >>> import os
+    >>> from smolagents import CodeAgent, WebSearchTool, LiteLLMRouterModel
+    >>> os.environ["OPENAI_API_KEY"] = ""
+    >>> os.environ["AWS_ACCESS_KEY_ID"] = ""
+    >>> os.environ["AWS_SECRET_ACCESS_KEY"] = ""
+    >>> os.environ["AWS_REGION"] = ""
+    >>> llm_loadbalancer_model_list = [
+    ...     {
+    ...         "model_name": "model-group-1",
+    ...         "litellm_params": {
+    ...             "model": "gpt-4o-mini",
+    ...             "api_key": os.getenv("OPENAI_API_KEY"),
+    ...         },
+    ...     },
+    ...     {
+    ...         "model_name": "model-group-1",
+    ...         "litellm_params": {
+    ...             "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+    ...             "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
+    ...             "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
+    ...             "aws_region_name": os.getenv("AWS_REGION"),
+    ...         },
+    ...     },
+    >>> ]
+    >>> model = LiteLLMRouterModel(
+    ...    model_id="model-group-1",
+    ...    model_list=llm_loadbalancer_model_list,
+    ...    client_kwargs={
+    ...        "routing_strategy":"simple-shuffle"
+    ...    }
+    >>> )
+    >>> agent = CodeAgent(tools=[WebSearchTool()], model=model)
+    >>> agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
+    ```
+    """
+    def __init__(
+        self,
+        model_id: str,
+        model_list: list[dict[str, Any]],
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        flatten_messages_as_text: bool | None = None,
+        **kwargs,
+    ):
+        self.client_kwargs = {
+            "model_list": model_list,
+            **(client_kwargs or {}),
+        }
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=flatten_messages_as_text,
+            **kwargs,
+        )
+    def create_client(self):
+        try:
+            from litellm.router import Router
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'litellm' extra to use LiteLLMRouterModel: `pip install 'smolagents[litellm]'`"
+            ) from e
+        return Router(**self.client_kwargs)
+class InferenceClientModel(ApiModel):
+    """A class to interact with Hugging Face's Inference Providers for language model interaction.
+    This model allows you to communicate with Hugging Face's models using Inference Providers. It can be used in both serverless mode, with a dedicated endpoint, or even with a local URL, supporting features like stop sequences and grammar customization.
+    Providers include Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more.
+    Parameters:
+        model_id (`str`, *optional*, default `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
+            The Hugging Face model ID to be used for inference.
+            This can be a model identifier from the Hugging Face model hub or a URL to a deployed Inference Endpoint.
+            Currently, it defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`, but this may change in the future.
+        provider (`str`, *optional*):
+            Name of the provider to use for inference. A list of supported providers can be found in the [Inference Providers documentation](https://huggingface.co/docs/inference-providers/index#partners).
+            Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order [here](https://hf.co/settings/inference-providers).
+            If `base_url` is passed, then `provider` is not used.
+        token (`str`, *optional*):
+            Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference Providers'.
+            If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'.
+            If not provided, the class will try to use environment variable 'HF_TOKEN', else use the token stored in the Hugging Face CLI configuration.
+        timeout (`int`, *optional*, defaults to 120):
+            Timeout for the API request, in seconds.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the Hugging Face InferenceClient.
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        api_key (`str`, *optional*):
+            Token to use for authentication. This is a duplicated argument from `token` to make [`InferenceClientModel`]
+            follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
+        bill_to (`str`, *optional*):
+            The billing account to use for the requests. By default the requests are billed on the user's account. Requests can only be billed to
+            an organization the user is a member of, and which has subscribed to Enterprise Hub.
+        base_url (`str`, `optional`):
+            Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClientModel`]
+            follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
+        **kwargs:
+            Additional keyword arguments to pass to the Hugging Face InferenceClient.
+    Raises:
+        ValueError:
+            If the model name is not provided.
+    Example:
+    ```python
+    >>> engine = InferenceClientModel(
+    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    ...     provider="nebius",
+    ...     token="your_hf_token_here",
+    ...     max_tokens=5000,
+    ... )
+    >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
+    >>> response = engine(messages, stop_sequences=["END"])
+    >>> print(response)
+    "Quantum mechanics is the branch of physics that studies..."
+    ```
+    """
+    def __init__(
+        self,
+        model_id: str = "Qwen/Qwen2.5-Coder-32B-Instruct",
+        provider: str | None = None,
+        token: str | None = None,
+        timeout: int = 120,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        api_key: str | None = None,
+        bill_to: str | None = None,
+        base_url: str | None = None,
+        **kwargs,
+    ):
+        if token is not None and api_key is not None:
+            raise ValueError(
+                "Received both `token` and `api_key` arguments. Please provide only one of them."
+                " `api_key` is an alias for `token` to make the API compatible with OpenAI's client."
+                " It has the exact same behavior as `token`."
+            )
+        token = token if token is not None else api_key
+        if token is None:
+            token = os.getenv("HF_TOKEN")
+        self.client_kwargs = {
+            **(client_kwargs or {}),
+            "model": model_id,
+            "provider": provider,
+            "token": token,
+            "timeout": timeout,
+            "bill_to": bill_to,
+            "base_url": base_url,
+        }
+        super().__init__(model_id=model_id, custom_role_conversions=custom_role_conversions, **kwargs)
+    def create_client(self):
+        """Create the Hugging Face client."""
+        from huggingface_hub import InferenceClient
+        return InferenceClient(**self.client_kwargs)
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        if response_format is not None and self.client_kwargs["provider"] not in STRUCTURED_GENERATION_PROVIDERS:
+            raise ValueError(
+                "InferenceClientModel only supports structured outputs with these providers:"
+                + ", ".join(STRUCTURED_GENERATION_PROVIDERS)
+            )
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            tools_to_call_from=tools_to_call_from,
+            # response_format=response_format,
+            convert_images_to_image_urls=True,
+            custom_role_conversions=self.custom_role_conversions,
+            **kwargs,
+        )
+        response = self.client.chat_completion(**completion_kwargs)
+        self._last_input_token_count = response.usage.prompt_tokens
+        self._last_output_token_count = response.usage.completion_tokens
+        return ChatMessage.from_dict(
+            asdict(response.choices[0].message),
+            raw=response,
+            token_usage=TokenUsage(
+                input_tokens=response.usage.prompt_tokens,
+                output_tokens=response.usage.completion_tokens,
+            ),
+        )
+    def generate_stream(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator[ChatMessageStreamDelta]:
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+        for event in self.client.chat.completions.create(
+            **completion_kwargs, stream=True, stream_options={"include_usage": True}
+        ):
+            if getattr(event, "usage", None):
+                self._last_input_token_count = event.usage.prompt_tokens
+                self._last_output_token_count = event.usage.completion_tokens
+                yield ChatMessageStreamDelta(
+                    content="",
+                    token_usage=TokenUsage(
+                        input_tokens=event.usage.prompt_tokens,
+                        output_tokens=event.usage.completion_tokens,
+                    ),
+                )
+            if event.choices:
+                choice = event.choices[0]
+                if choice.delta:
+                    yield ChatMessageStreamDelta(
+                        content=choice.delta.content,
+                        tool_calls=[
+                            ChatMessageToolCallStreamDelta(
+                                index=delta.index,
+                                id=delta.id,
+                                type=delta.type,
+                                function=delta.function,
+                            )
+                            for delta in choice.delta.tool_calls
+                        ]
+                        if choice.delta.tool_calls
+                        else None,
+                    )
+                else:
+                    if not getattr(choice, "finish_reason", None):
+                        raise ValueError(f"No content or tool calls in event: {event}")
+class OpenAIServerModel(ApiModel):
+    """This model connects to an OpenAI-compatible API server.
+    Parameters:
+        model_id (`str`):
+            The model identifier to use on the server (e.g. "gpt-3.5-turbo").
+        api_base (`str`, *optional*):
+            The base URL of the OpenAI-compatible API server.
+        api_key (`str`, *optional*):
+            The API key to use for authentication.
+        organization (`str`, *optional*):
+            The organization to use for the API request.
+        project (`str`, *optional*):
+            The project to use for the API request.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the OpenAI client (like organization, project, max_retries etc.).
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        flatten_messages_as_text (`bool`, default `False`):
+            Whether to flatten messages as text.
+        **kwargs:
+            Additional keyword arguments to pass to the OpenAI API.
+    """
+    def __init__(
+        self,
+        model_id: str,
+        api_base: str | None = None,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        flatten_messages_as_text: bool = False,
+        **kwargs,
+    ):
+        self.client_kwargs = {
+            **(client_kwargs or {}),
+            "api_key": api_key,
+            "base_url": api_base,
+            "organization": organization,
+            "project": project,
+        }
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=flatten_messages_as_text,
+            **kwargs,
+        )
+    def create_client(self):
+        try:
+            import openai
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'openai' extra to use OpenAIServerModel: `pip install 'smolagents[openai]'`"
+            ) from e
+        return openai.OpenAI(**self.client_kwargs)
+    def generate_stream(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> Generator[ChatMessageStreamDelta]:
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+        for event in self.client.chat.completions.create(
+            **completion_kwargs, stream=True, stream_options={"include_usage": True}
+        ):
+            if event.usage:
+                self._last_input_token_count = event.usage.prompt_tokens
+                self._last_output_token_count = event.usage.completion_tokens
+                yield ChatMessageStreamDelta(
+                    content="",
+                    token_usage=TokenUsage(
+                        input_tokens=event.usage.prompt_tokens,
+                        output_tokens=event.usage.completion_tokens,
+                    ),
+                )
+            if event.choices:
+                choice = event.choices[0]
+                if choice.delta:
+                    yield ChatMessageStreamDelta(
+                        content=choice.delta.content,
+                        tool_calls=[
+                            ChatMessageToolCallStreamDelta(
+                                index=delta.index,
+                                id=delta.id,
+                                type=delta.type,
+                                function=delta.function,
+                            )
+                            for delta in choice.delta.tool_calls
+                        ]
+                        if choice.delta.tool_calls
+                        else None,
+                    )
+                else:
+                    if not getattr(choice, "finish_reason", None):
+                        raise ValueError(f"No content or tool calls in event: {event}")
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        completion_kwargs = self._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=stop_sequences,
+            response_format=response_format,
+            tools_to_call_from=tools_to_call_from,
+            model=self.model_id,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+        response = self.client.chat.completions.create(**completion_kwargs)
+        # Reported that `response.usage` can be None in some cases when using OpenRouter: see GH-1401
+        self._last_input_token_count = getattr(response.usage, "prompt_tokens", 0)
+        self._last_output_token_count = getattr(response.usage, "completion_tokens", 0)
+        return ChatMessage.from_dict(
+            response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}),
+            raw=response,
+            token_usage=TokenUsage(
+                input_tokens=response.usage.prompt_tokens,
+                output_tokens=response.usage.completion_tokens,
+            ),
+        )
+OpenAIModel = OpenAIServerModel
+class AzureOpenAIServerModel(OpenAIServerModel):
+    """This model connects to an Azure OpenAI deployment.
+    Parameters:
+        model_id (`str`):
+            The model deployment name to use when connecting (e.g. "gpt-4o-mini").
+        azure_endpoint (`str`, *optional*):
+            The Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`. If not provided, it will be inferred from the `AZURE_OPENAI_ENDPOINT` environment variable.
+        api_key (`str`, *optional*):
+            The API key to use for authentication. If not provided, it will be inferred from the `AZURE_OPENAI_API_KEY` environment variable.
+        api_version (`str`, *optional*):
+            The API version to use. If not provided, it will be inferred from the `OPENAI_API_VERSION` environment variable.
+        client_kwargs (`dict[str, Any]`, *optional*):
+            Additional keyword arguments to pass to the AzureOpenAI client (like organization, project, max_retries etc.).
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+        **kwargs:
+            Additional keyword arguments to pass to the Azure OpenAI API.
+    """
+    def __init__(
+        self,
+        model_id: str,
+        azure_endpoint: str | None = None,
+        api_key: str | None = None,
+        api_version: str | None = None,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        **kwargs,
+    ):
+        client_kwargs = client_kwargs or {}
+        client_kwargs.update(
+            {
+                "api_version": api_version,
+                "azure_endpoint": azure_endpoint,
+            }
+        )
+        super().__init__(
+            model_id=model_id,
+            api_key=api_key,
+            client_kwargs=client_kwargs,
+            custom_role_conversions=custom_role_conversions,
+            **kwargs,
+        )
+    def create_client(self):
+        try:
+            import openai
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'openai' extra to use AzureOpenAIServerModel: `pip install 'smolagents[openai]'`"
+            ) from e
+        return openai.AzureOpenAI(**self.client_kwargs)
+AzureOpenAIModel = AzureOpenAIServerModel
+class AmazonBedrockServerModel(ApiModel):
+    """
+    A model class for interacting with Amazon Bedrock Server models through the Bedrock API.
+    This class provides an interface to interact with various Bedrock language models,
+    allowing for customized model inference, guardrail configuration, message handling,
+    and other parameters allowed by boto3 API.
+    Parameters:
+        model_id (`str`):
+            The model identifier to use on Bedrock (e.g. "us.amazon.nova-pro-v1:0").
+        client (`boto3.client`, *optional*):
+            A custom boto3 client for AWS interactions. If not provided, a default client will be created.
+        client_kwargs (dict[str, Any], *optional*):
+            Keyword arguments used to configure the boto3 client if it needs to be created internally.
+            Examples include `region_name`, `config`, or `endpoint_url`.
+        custom_role_conversions (`dict[str, str]`, *optional*):
+            Custom role conversion mapping to convert message roles in others.
+            Useful for specific models that do not support specific message roles like "system".
+            Defaults to converting all roles to "user" role to enable using all the Bedrock models.
+        flatten_messages_as_text (`bool`, default `False`):
+            Whether to flatten messages as text.
+        **kwargs
+            Additional keyword arguments passed directly to the underlying API calls.
+    Example:
+        Creating a model instance with default settings:
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='us.amazon.nova-pro-v1:0'
+        ... )
+        Creating a model instance with a custom boto3 client:
+        >>> import boto3
+        >>> client = boto3.client('bedrock-runtime', region_name='us-west-2')
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='us.amazon.nova-pro-v1:0',
+        ...     client=client
+        ... )
+        Creating a model instance with client_kwargs for internal client creation:
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='us.amazon.nova-pro-v1:0',
+        ...     client_kwargs={'region_name': 'us-west-2', 'endpoint_url': 'https://custom-endpoint.com'}
+        ... )
+        Creating a model instance with inference and guardrail configurations:
+        >>> additional_api_config = {
+        ...     "inferenceConfig": {
+        ...         "maxTokens": 3000
+        ...     },
+        ...     "guardrailConfig": {
+        ...         "guardrailIdentifier": "identify1",
+        ...         "guardrailVersion": 'v1'
+        ...     },
+        ... }
+        >>> bedrock_model = AmazonBedrockServerModel(
+        ...     model_id='anthropic.claude-3-haiku-20240307-v1:0',
+        ...     **additional_api_config
+        ... )
+    """
+    def __init__(
+        self,
+        model_id: str,
+        client=None,
+        client_kwargs: dict[str, Any] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        **kwargs,
+    ):
+        self.client_kwargs = client_kwargs or {}
+        # Bedrock only supports `assistant` and `user` roles.
+        # Many Bedrock models do not allow conversations to start with the `assistant` role, so the default is set to `user/user`.
+        # This parameter is retained for future model implementations and extended support.
+        custom_role_conversions = custom_role_conversions or {
+            MessageRole.SYSTEM: MessageRole.USER,
+            MessageRole.ASSISTANT: MessageRole.USER,
+            MessageRole.TOOL_CALL: MessageRole.USER,
+            MessageRole.TOOL_RESPONSE: MessageRole.USER,
+        }
+        super().__init__(
+            model_id=model_id,
+            custom_role_conversions=custom_role_conversions,
+            flatten_messages_as_text=False,  # Bedrock API doesn't support flatten messages, must be a list of messages
+            client=client,
+            **kwargs,
+        )
+    def _prepare_completion_kwargs(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        custom_role_conversions: dict[str, str] | None = None,
+        convert_images_to_image_urls: bool = False,
+        tool_choice: str | dict[Any, Any] | None = None,
+        **kwargs,
+    ) -> dict:
+        """
+        Overrides the base method to handle Bedrock-specific configurations.
+        This implementation adapts the completion keyword arguments to align with
+        Bedrock's requirements, ensuring compatibility with its unique setup and
+        constraints.
+        """
+        completion_kwargs = super()._prepare_completion_kwargs(
+            messages=messages,
+            stop_sequences=None,  # Bedrock support stop_sequence using Inference Config
+            tools_to_call_from=tools_to_call_from,
+            custom_role_conversions=custom_role_conversions,
+            convert_images_to_image_urls=convert_images_to_image_urls,
+            **kwargs,
+        )
+        # Not all models in Bedrock support `toolConfig`. Also, smolagents already include the tool call in the prompt,
+        # so adding `toolConfig` could cause conflicts. We remove it to avoid issues.
+        completion_kwargs.pop("toolConfig", None)
+        # The Bedrock API does not support the `type` key in requests.
+        # This block of code modifies the object to meet Bedrock's requirements.
+        for message in completion_kwargs.get("messages", []):
+            for content in message.get("content", []):
+                if "type" in content:
+                    del content["type"]
+        return {
+            "modelId": self.model_id,
+            **completion_kwargs,
+        }
+    def create_client(self):
+        try:
+            import boto3  # type: ignore
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError(
+                "Please install 'bedrock' extra to use AmazonBedrockServerModel: `pip install 'smolagents[bedrock]'`"
+            ) from e
+        return boto3.client("bedrock-runtime", **self.client_kwargs)
+    def generate(
+        self,
+        messages: list[ChatMessage],
+        stop_sequences: list[str] | None = None,
+        response_format: dict[str, str] | None = None,
+        tools_to_call_from: list[Tool] | None = None,
+        **kwargs,
+    ) -> ChatMessage:
+        if response_format is not None:
+            raise ValueError("Amazon Bedrock does not support response_format")
+        completion_kwargs: dict = self._prepare_completion_kwargs(
+            messages=messages,
+            tools_to_call_from=tools_to_call_from,
+            custom_role_conversions=self.custom_role_conversions,
+            convert_images_to_image_urls=True,
+            **kwargs,
+        )
+        # self.client is created in ApiModel class
+        response = self.client.converse(**completion_kwargs)
+        # Get first message
+        response["output"]["message"]["content"] = response["output"]["message"]["content"][0]["text"]
+        self._last_input_token_count = response["usage"]["inputTokens"]
+        self._last_output_token_count = response["usage"]["outputTokens"]
+        return ChatMessage.from_dict(
+            response["output"]["message"],
+            raw=response,
+            token_usage=TokenUsage(
+                input_tokens=response["usage"]["inputTokens"],
+                output_tokens=response["usage"]["outputTokens"],
+            ),
+        )
+AmazonBedrockModel = AmazonBedrockServerModel
+__all__ = [
+    "MessageRole",
+    "tool_role_conversions",
+    "get_clean_message_list",
+    "Model",
+    "MLXModel",
+    "TransformersModel",
+    "ApiModel",
+    "InferenceClientModel",
+    "LiteLLMModel",
+    "LiteLLMRouterModel",
+    "OpenAIServerModel",
+    "OpenAIModel",
+    "VLLMModel",
+    "AzureOpenAIServerModel",
+    "AzureOpenAIModel",
+    "AmazonBedrockServerModel",
+    "AmazonBedrockModel",
+    "ChatMessage",
+]

src/smolagents/monitoring.py ADDED Viewed

	@@ -0,0 +1,265 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+from dataclasses import dataclass, field
+from enum import IntEnum
+from rich import box
+from rich.console import Console, Group
+from rich.panel import Panel
+from rich.rule import Rule
+from rich.syntax import Syntax
+from rich.table import Table
+from rich.text import Text
+from rich.tree import Tree
+from smolagents.utils import escape_code_brackets
+__all__ = ["AgentLogger", "LogLevel", "Monitor", "TokenUsage", "Timing"]
+@dataclass
+class TokenUsage:
+    """
+    Contains the token usage information for a given step or run.
+    """
+    input_tokens: int
+    output_tokens: int
+    total_tokens: int = field(init=False)
+    def __post_init__(self):
+        self.total_tokens = self.input_tokens + self.output_tokens
+    def dict(self):
+        return {
+            "input_tokens": self.input_tokens,
+            "output_tokens": self.output_tokens,
+            "total_tokens": self.total_tokens,
+        }
+@dataclass
+class Timing:
+    """
+    Contains the timing information for a given step or run.
+    """
+    start_time: float
+    end_time: float | None = None
+    @property
+    def duration(self):
+        return None if self.end_time is None else self.end_time - self.start_time
+    def dict(self):
+        return {
+            "start_time": self.start_time,
+            "end_time": self.end_time,
+            "duration": self.duration,
+        }
+    def __repr__(self) -> str:
+        return f"Timing(start_time={self.start_time}, end_time={self.end_time}, duration={self.duration})"
+class Monitor:
+    def __init__(self, tracked_model, logger):
+        self.step_durations = []
+        self.tracked_model = tracked_model
+        self.logger = logger
+        self.total_input_token_count = 0
+        self.total_output_token_count = 0
+    def get_total_token_counts(self) -> TokenUsage:
+        return TokenUsage(
+            input_tokens=self.total_input_token_count,
+            output_tokens=self.total_output_token_count,
+        )
+    def reset(self):
+        self.step_durations = []
+        self.total_input_token_count = 0
+        self.total_output_token_count = 0
+    def update_metrics(self, step_log):
+        """Update the metrics of the monitor.
+        Args:
+            step_log ([`MemoryStep`]): Step log to update the monitor with.
+        """
+        step_duration = step_log.timing.duration
+        self.step_durations.append(step_duration)
+        console_outputs = f"[Step {len(self.step_durations)}: Duration {step_duration:.2f} seconds"
+        if step_log.token_usage is not None:
+            self.total_input_token_count += step_log.token_usage.input_tokens
+            self.total_output_token_count += step_log.token_usage.output_tokens
+            console_outputs += (
+                f"| Input tokens: {self.total_input_token_count:,} | Output tokens: {self.total_output_token_count:,}"
+            )
+        console_outputs += "]"
+        self.logger.log(Text(console_outputs, style="dim"), level=1)
+class LogLevel(IntEnum):
+    OFF = -1  # No output
+    ERROR = 0  # Only errors
+    INFO = 1  # Normal output (default)
+    DEBUG = 2  # Detailed output
+YELLOW_HEX = "#d4b702"
+class AgentLogger:
+    def __init__(self, level: LogLevel = LogLevel.INFO, console: Console | None = None):
+        self.level = level
+        if console is None:
+            self.console = Console()
+        else:
+            self.console = console
+    def log(self, *args, level: int | str | LogLevel = LogLevel.INFO, **kwargs) -> None:
+        """Logs a message to the console.
+        Args:
+            level (LogLevel, optional): Defaults to LogLevel.INFO.
+        """
+        if isinstance(level, str):
+            level = LogLevel[level.upper()]
+        if level <= self.level:
+            self.console.print(*args, **kwargs)
+    def log_error(self, error_message: str) -> None:
+        self.log(escape_code_brackets(error_message), style="bold red", level=LogLevel.ERROR)
+    def log_markdown(self, content: str, title: str | None = None, level=LogLevel.INFO, style=YELLOW_HEX) -> None:
+        markdown_content = Syntax(
+            content,
+            lexer="markdown",
+            theme="github-dark",
+            word_wrap=True,
+        )
+        if title:
+            self.log(
+                Group(
+                    Rule(
+                        "[bold italic]" + title,
+                        align="left",
+                        style=style,
+                    ),
+                    markdown_content,
+                ),
+                level=level,
+            )
+        else:
+            self.log(markdown_content, level=level)
+    def log_code(self, title: str, content: str, level: int = LogLevel.INFO) -> None:
+        self.log(
+            Panel(
+                Syntax(
+                    content,
+                    lexer="python",
+                    theme="monokai",
+                    word_wrap=True,
+                ),
+                title="[bold]" + title,
+                title_align="left",
+                box=box.HORIZONTALS,
+            ),
+            level=level,
+        )
+    def log_rule(self, title: str, level: int = LogLevel.INFO) -> None:
+        self.log(
+            Rule(
+                "[bold]" + title,
+                characters="━",
+                style=YELLOW_HEX,
+            ),
+            level=LogLevel.INFO,
+        )
+    def log_task(self, content: str, subtitle: str, title: str | None = None, level: LogLevel = LogLevel.INFO) -> None:
+        self.log(
+            Panel(
+                f"\n[bold]{escape_code_brackets(content)}\n",
+                title="[bold]New run" + (f" - {title}" if title else ""),
+                subtitle=subtitle,
+                border_style=YELLOW_HEX,
+                subtitle_align="left",
+            ),
+            level=level,
+        )
+    def log_messages(self, messages: list[dict], level: LogLevel = LogLevel.DEBUG) -> None:
+        messages_as_string = "\n".join([json.dumps(dict(message), indent=4) for message in messages])
+        self.log(
+            Syntax(
+                messages_as_string,
+                lexer="markdown",
+                theme="github-dark",
+                word_wrap=True,
+            ),
+            level=level,
+        )
+    def visualize_agent_tree(self, agent):
+        def create_tools_section(tools_dict):
+            table = Table(show_header=True, header_style="bold")
+            table.add_column("Name", style="#1E90FF")
+            table.add_column("Description")
+            table.add_column("Arguments")
+            for name, tool in tools_dict.items():
+                args = [
+                    f"{arg_name} (`{info.get('type', 'Any')}`{', optional' if info.get('optional') else ''}): {info.get('description', '')}"
+                    for arg_name, info in getattr(tool, "inputs", {}).items()
+                ]
+                table.add_row(name, getattr(tool, "description", str(tool)), "\n".join(args))
+            return Group("🛠️ [italic #1E90FF]Tools:[/italic #1E90FF]", table)
+        def get_agent_headline(agent, name: str | None = None):
+            name_headline = f"{name} | " if name else ""
+            return f"[bold {YELLOW_HEX}]{name_headline}{agent.__class__.__name__} | {agent.model.model_id}"
+        def build_agent_tree(parent_tree, agent_obj):
+            """Recursively builds the agent tree."""
+            parent_tree.add(create_tools_section(agent_obj.tools))
+            if agent_obj.managed_agents:
+                agents_branch = parent_tree.add("🤖 [italic #1E90FF]Managed agents:")
+                for name, managed_agent in agent_obj.managed_agents.items():
+                    agent_tree = agents_branch.add(get_agent_headline(managed_agent, name))
+                    if managed_agent.__class__.__name__ == "CodeAgent":
+                        agent_tree.add(
+                            f"✅ [italic #1E90FF]Authorized imports:[/italic #1E90FF] {managed_agent.additional_authorized_imports}"
+                        )
+                    agent_tree.add(f"📝 [italic #1E90FF]Description:[/italic #1E90FF] {managed_agent.description}")
+                    build_agent_tree(agent_tree, managed_agent)
+        main_tree = Tree(get_agent_headline(agent))
+        if agent.__class__.__name__ == "CodeAgent":
+            main_tree.add(
+                f"✅ [italic #1E90FF]Authorized imports:[/italic #1E90FF] {agent.additional_authorized_imports}"
+            )
+        build_agent_tree(main_tree, agent)
+        self.console.print(main_tree)

src/smolagents/remote_executors.py ADDED Viewed

	@@ -0,0 +1,451 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import inspect
+import json
+import pickle
+import time
+from io import BytesIO
+from pathlib import Path
+from textwrap import dedent
+from typing import Any
+import PIL.Image
+import requests
+from .default_tools import FinalAnswerTool
+from .local_python_executor import PythonExecutor
+from .monitoring import LogLevel
+from .tools import Tool, get_tools_definition_code
+from .utils import AgentError
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ModuleNotFoundError:
+    pass
+class RemotePythonExecutor(PythonExecutor):
+    FINAL_ANSWER_EXCEPTION = "FinalAnswerException"
+    def __init__(self, additional_imports: list[str], logger):
+        self.additional_imports = additional_imports
+        self.logger = logger
+        self.logger.log("Initializing executor, hold on...")
+        self.installed_packages = []
+    def run_code_raise_errors(self, code: str) -> tuple[Any, str, bool]:
+        """
+        Execute code, return the result and output, also determining if
+        the result is the final answer.
+        """
+        raise NotImplementedError
+    def send_tools(self, tools: dict[str, Tool]):
+        if "final_answer" in tools:
+            self._patch_final_answer_with_exception(tools["final_answer"])
+        # Install tool packages
+        packages_to_install = {
+            pkg
+            for tool in tools.values()
+            for pkg in tool.to_dict()["requirements"]
+            if pkg not in self.installed_packages + ["smolagents"]
+        }
+        if packages_to_install:
+            self.installed_packages += self.install_packages(list(packages_to_install))
+        # Get tool definitions
+        code = get_tools_definition_code(tools)
+        if code:
+            execution = self.run_code_raise_errors(code)
+            self.logger.log(execution[1])
+    def send_variables(self, variables: dict):
+        """
+        Send variables to the kernel namespace using pickle.
+        """
+        pickled_vars = base64.b64encode(pickle.dumps(variables)).decode()
+        code = f"""
+import pickle, base64
+vars_dict = pickle.loads(base64.b64decode('{pickled_vars}'))
+locals().update(vars_dict)
+"""
+        self.run_code_raise_errors(code)
+    def __call__(self, code_action: str) -> tuple[Any, str, bool]:
+        """Run the code and determine if it is the final answer."""
+        return self.run_code_raise_errors(code_action)
+    def install_packages(self, additional_imports: list[str]):
+        if additional_imports:
+            _, execution_logs, _ = self.run_code_raise_errors(f"!pip install {' '.join(additional_imports)}")
+            self.logger.log(execution_logs)
+        return additional_imports
+    def _patch_final_answer_with_exception(self, final_answer_tool: FinalAnswerTool):
+        """Patch the FinalAnswerTool to raise an exception.
+        This is necessary because the remote executors
+        rely on the FinalAnswerTool to detect the final answer.
+        It modifies the `forward` method of the FinalAnswerTool to raise
+        a `FinalAnswerException` with the final answer as a pickled value.
+        This allows the executor to catch this exception and return the final answer.
+        Args:
+            final_answer_tool (`FinalAnswerTool`): FinalAnswerTool instance to patch.
+        """
+        # Create a new class that inherits from the original FinalAnswerTool
+        class _FinalAnswerTool(final_answer_tool.__class__):
+            pass
+        # Add a new forward method that raises the FinalAnswerException
+        # - Define the new forward method function
+        def forward(self, *args, **kwargs) -> Any:
+            import base64
+            import pickle
+            class FinalAnswerException(Exception):
+                def __init__(self, value):
+                    self.value = value
+            raise FinalAnswerException(base64.b64encode(pickle.dumps(self._forward(*args, **kwargs))).decode())
+        # - Set the new forward method function to the _FinalAnswerTool class
+        _FinalAnswerTool.forward = forward
+        # Rename the original forward method to _forward
+        # - Get the original forward method function from the final_answer_tool instance
+        original_forward_function = final_answer_tool.forward.__func__
+        # - Set the new _forward method function to the _FinalAnswerTool class
+        _FinalAnswerTool._forward = original_forward_function
+        # - Update the source code of the new forward method to match the original but with the new name
+        _FinalAnswerTool._forward.__source__ = inspect.getsource(original_forward_function).replace(
+            "def forward(", "def _forward("
+        )
+        # Set the new class as the class of the final_answer_tool instance
+        final_answer_tool.__class__ = _FinalAnswerTool
+class E2BExecutor(RemotePythonExecutor):
+    """
+    Executes Python code using E2B.
+    Args:
+        additional_imports (`list[str]`): Additional imports to install.
+        logger (`Logger`): Logger to use.
+        **kwargs: Additional arguments to pass to the E2B Sandbox.
+    """
+    def __init__(self, additional_imports: list[str], logger, **kwargs):
+        super().__init__(additional_imports, logger)
+        try:
+            from e2b_code_interpreter import Sandbox
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                """Please install 'e2b' extra to use E2BExecutor: `pip install 'smolagents[e2b]'`"""
+            )
+        self.sandbox = Sandbox(**kwargs)
+        self.installed_packages = self.install_packages(additional_imports)
+        self.logger.log("E2B is running", level=LogLevel.INFO)
+    def run_code_raise_errors(self, code: str) -> tuple[Any, str, bool]:
+        execution = self.sandbox.run_code(code)
+        execution_logs = "\n".join([str(log) for log in execution.logs.stdout])
+        # Handle errors
+        if execution.error:
+            # Check if the error is a FinalAnswerException
+            if execution.error.name == RemotePythonExecutor.FINAL_ANSWER_EXCEPTION:
+                final_answer = pickle.loads(base64.b64decode(execution.error.value))
+                return final_answer, execution_logs, True
+            # Construct error message
+            error_message = (
+                f"{execution_logs}\n"
+                f"Executing code yielded an error:\n"
+                f"{execution.error.name}\n"
+                f"{execution.error.value}\n"
+                f"{execution.error.traceback}"
+            )
+            raise AgentError(error_message, self.logger)
+        # Handle results
+        if not execution.results:
+            return None, execution_logs, False
+        for result in execution.results:
+            if not result.is_main_result:
+                continue
+            # Handle image outputs
+            for attribute_name in ["jpeg", "png"]:
+                img_data = getattr(result, attribute_name, None)
+                if img_data is not None:
+                    decoded_bytes = base64.b64decode(img_data.encode("utf-8"))
+                    return PIL.Image.open(BytesIO(decoded_bytes)), execution_logs, False
+            # Handle other data formats
+            for attribute_name in [
+                "chart",
+                "data",
+                "html",
+                "javascript",
+                "json",
+                "latex",
+                "markdown",
+                "pdf",
+                "svg",
+                "text",
+            ]:
+                data = getattr(result, attribute_name, None)
+                if data is not None:
+                    return data, execution_logs, False
+        # If no main result found, return None
+        return None, execution_logs, False
+    def cleanup(self):
+        """Clean up the E2B sandbox and resources."""
+        try:
+            if hasattr(self, "sandbox"):
+                self.logger.log("Shutting down sandbox...", level=LogLevel.INFO)
+                self.sandbox.kill()
+                self.logger.log("Sandbox cleanup completed", level=LogLevel.INFO)
+                del self.sandbox
+        except Exception as e:
+            self.logger.log_error(f"Error during cleanup: {e}")
+class DockerExecutor(RemotePythonExecutor):
+    """
+    Executes Python code using Jupyter Kernel Gateway in a Docker container.
+    """
+    def __init__(
+        self,
+        additional_imports: list[str],
+        logger,
+        host: str = "127.0.0.1",
+        port: int = 8888,
+        image_name: str = "jupyter-kernel",
+        build_new_image: bool = True,
+        container_run_kwargs: dict[str, Any] | None = None,
+    ):
+        """
+        Initialize the Docker-based Jupyter Kernel Gateway executor.
+        Args:
+            additional_imports: Additional imports to install.
+            logger: Logger to use.
+            host: Host to bind to.
+            port: Port to bind to.
+            image_name: Name of the Docker image to use. If the image doesn't exist, it will be built.
+            build_new_image: If True, the image will be rebuilt even if it already exists.
+            container_run_kwargs: Additional keyword arguments to pass to the Docker container run command.
+        """
+        super().__init__(additional_imports, logger)
+        try:
+            import docker
+            from websocket import create_connection
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "Please install 'docker' extra to use DockerExecutor: `pip install 'smolagents[docker]'`"
+            )
+        self.host = host
+        self.port = port
+        self.image_name = image_name
+        # Initialize Docker
+        try:
+            self.client = docker.from_env()
+        except docker.errors.DockerException as e:
+            raise RuntimeError("Could not connect to Docker daemon: make sure Docker is running.") from e
+        # Build and start container
+        try:
+            # Check if image exists, unless forced to rebuild
+            if not build_new_image:
+                try:
+                    self.client.images.get(self.image_name)
+                    self.logger.log(f"Using existing Docker image: {self.image_name}", level=LogLevel.INFO)
+                except docker.errors.ImageNotFound:
+                    self.logger.log(f"Image {self.image_name} not found, building...", level=LogLevel.INFO)
+                    build_new_image = True
+            if build_new_image:
+                self.logger.log(f"Building Docker image {self.image_name}...", level=LogLevel.INFO)
+                dockerfile_path = Path(__file__).parent / "Dockerfile"
+                if not dockerfile_path.exists():
+                    with open(dockerfile_path, "w") as f:
+                        f.write(
+                            dedent(
+                                """\
+                                FROM python:3.12-slim
+                                RUN pip install jupyter_kernel_gateway jupyter_client
+                                EXPOSE 8888
+                                CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip='0.0.0.0'", "--KernelGatewayApp.port=8888", "--KernelGatewayApp.allow_origin='*'"]
+                                """
+                            )
+                        )
+                _, build_logs = self.client.images.build(
+                    path=str(dockerfile_path.parent), dockerfile=str(dockerfile_path), tag=self.image_name
+                )
+                for log_chunk in build_logs:
+                    # Only log non-empty messages
+                    if log_message := log_chunk.get("stream", "").rstrip():
+                        self.logger.log(log_message, level=LogLevel.DEBUG)
+            self.logger.log(f"Starting container on {host}:{port}...", level=LogLevel.INFO)
+            # Create base container parameters
+            container_kwargs = {}
+            if container_run_kwargs:
+                container_kwargs.update(container_run_kwargs)
+            # Ensure required port mapping and background running
+            if not isinstance(container_kwargs.get("ports"), dict):
+                container_kwargs["ports"] = {}
+            container_kwargs["ports"]["8888/tcp"] = (host, port)
+            container_kwargs["detach"] = True
+            self.container = self.client.containers.run(self.image_name, **container_kwargs)
+            retries = 0
+            while self.container.status != "running" and retries < 5:
+                self.logger.log(f"Container status: {self.container.status}, waiting...", level=LogLevel.INFO)
+                time.sleep(1)
+                self.container.reload()
+                retries += 1
+            self.base_url = f"http://{host}:{port}"
+            # Create new kernel via HTTP
+            r = requests.post(f"{self.base_url}/api/kernels")
+            if r.status_code != 201:
+                error_details = {
+                    "status_code": r.status_code,
+                    "headers": dict(r.headers),
+                    "url": r.url,
+                    "body": r.text,
+                    "request_method": r.request.method,
+                    "request_headers": dict(r.request.headers),
+                    "request_body": r.request.body,
+                }
+                self.logger.log_error(f"Failed to create kernel. Details: {json.dumps(error_details, indent=2)}")
+                raise RuntimeError(f"Failed to create kernel: Status {r.status_code}\nResponse: {r.text}") from None
+            self.kernel_id = r.json()["id"]
+            ws_url = f"ws://{host}:{port}/api/kernels/{self.kernel_id}/channels"
+            self.ws = create_connection(ws_url)
+            self.installed_packages = self.install_packages(additional_imports)
+            self.logger.log(
+                f"Container {self.container.short_id} is running with kernel {self.kernel_id}", level=LogLevel.INFO
+            )
+        except Exception as e:
+            self.cleanup()
+            raise RuntimeError(f"Failed to initialize Jupyter kernel: {e}") from e
+    def run_code_raise_errors(self, code_action: str) -> tuple[Any, str, bool]:
+        try:
+            # Send execute request
+            msg_id = self._send_execute_request(code_action)
+            # Collect output and results
+            outputs = []
+            result = None
+            is_final_answer = False
+            while True:
+                msg = json.loads(self.ws.recv())
+                parent_msg_id = msg.get("parent_header", {}).get("msg_id")
+                # Skip unrelated messages
+                if parent_msg_id != msg_id:
+                    continue
+                msg_type = msg.get("msg_type", "")
+                msg_content = msg.get("content", {})
+                if msg_type == "stream":
+                    outputs.append(msg_content["text"])
+                elif msg_type == "execute_result":
+                    result = msg_content["data"].get("text/plain", None)
+                elif msg_type == "error":
+                    if msg_content.get("ename", "") == RemotePythonExecutor.FINAL_ANSWER_EXCEPTION:
+                        result = pickle.loads(base64.b64decode(msg_content.get("evalue", "")))
+                        is_final_answer = True
+                    else:
+                        raise AgentError("\n".join(msg_content.get("traceback", [])), self.logger)
+                elif msg_type == "status" and msg_content["execution_state"] == "idle":
+                    break
+            return result, "".join(outputs), is_final_answer
+        except Exception as e:
+            self.logger.log_error(f"Code execution failed: {e}")
+            raise
+    def _send_execute_request(self, code: str) -> str:
+        """Send code execution request to kernel."""
+        import uuid
+        # Generate a unique message ID
+        msg_id = str(uuid.uuid4())
+        # Create execute request
+        execute_request = {
+            "header": {
+                "msg_id": msg_id,
+                "username": "anonymous",
+                "session": str(uuid.uuid4()),
+                "msg_type": "execute_request",
+                "version": "5.0",
+            },
+            "parent_header": {},
+            "metadata": {},
+            "content": {
+                "code": code,
+                "silent": False,
+                "store_history": True,
+                "user_expressions": {},
+                "allow_stdin": False,
+            },
+        }
+        self.ws.send(json.dumps(execute_request))
+        return msg_id
+    def cleanup(self):
+        """Clean up the Docker container and resources."""
+        try:
+            if hasattr(self, "container"):
+                self.logger.log(f"Stopping and removing container {self.container.short_id}...", level=LogLevel.INFO)
+                self.container.stop()
+                self.container.remove()
+                self.logger.log("Container cleanup completed", level=LogLevel.INFO)
+                del self.container
+        except Exception as e:
+            self.logger.log_error(f"Error during cleanup: {e}")
+    def delete(self):
+        """Ensure cleanup on deletion."""
+        self.cleanup()
+__all__ = ["E2BExecutor", "DockerExecutor"]

src/smolagents/tool_validation.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import ast
+import builtins
+from itertools import zip_longest
+from .utils import BASE_BUILTIN_MODULES, get_source, is_valid_name
+_BUILTIN_NAMES = set(vars(builtins))
+class MethodChecker(ast.NodeVisitor):
+    """
+    Checks that a method
+    - only uses defined names
+    - contains no local imports (e.g. numpy is ok but local_script is not)
+    """
+    def __init__(self, class_attributes: set[str], check_imports: bool = True):
+        self.undefined_names = set()
+        self.imports = {}
+        self.from_imports = {}
+        self.assigned_names = set()
+        self.arg_names = set()
+        self.class_attributes = class_attributes
+        self.errors = []
+        self.check_imports = check_imports
+        self.typing_names = {"Any"}
+        self.defined_classes = set()
+    def visit_arguments(self, node):
+        """Collect function arguments"""
+        self.arg_names = {arg.arg for arg in node.args}
+        if node.kwarg:
+            self.arg_names.add(node.kwarg.arg)
+        if node.vararg:
+            self.arg_names.add(node.vararg.arg)
+    def visit_Import(self, node):
+        for name in node.names:
+            actual_name = name.asname or name.name
+            self.imports[actual_name] = name.name
+    def visit_ImportFrom(self, node):
+        module = node.module or ""
+        for name in node.names:
+            actual_name = name.asname or name.name
+            self.from_imports[actual_name] = (module, name.name)
+    def visit_Assign(self, node):
+        for target in node.targets:
+            if isinstance(target, ast.Name):
+                self.assigned_names.add(target.id)
+            elif isinstance(target, (ast.Tuple, ast.List)):
+                for elt in target.elts:
+                    if isinstance(elt, ast.Name):
+                        self.assigned_names.add(elt.id)
+        self.visit(node.value)
+    def visit_With(self, node):
+        """Track aliases in 'with' statements (the 'y' in 'with X as y')"""
+        for item in node.items:
+            if item.optional_vars:  # This is the 'y' in 'with X as y'
+                if isinstance(item.optional_vars, ast.Name):
+                    self.assigned_names.add(item.optional_vars.id)
+        self.generic_visit(node)
+    def visit_ExceptHandler(self, node):
+        """Track exception aliases (the 'e' in 'except Exception as e')"""
+        if node.name:  # This is the 'e' in 'except Exception as e'
+            self.assigned_names.add(node.name)
+        self.generic_visit(node)
+    def visit_AnnAssign(self, node):
+        """Track annotated assignments."""
+        if isinstance(node.target, ast.Name):
+            self.assigned_names.add(node.target.id)
+        if node.value:
+            self.visit(node.value)
+    def visit_For(self, node):
+        target = node.target
+        if isinstance(target, ast.Name):
+            self.assigned_names.add(target.id)
+        elif isinstance(target, ast.Tuple):
+            for elt in target.elts:
+                if isinstance(elt, ast.Name):
+                    self.assigned_names.add(elt.id)
+        self.generic_visit(node)
+    def _handle_comprehension_generators(self, generators):
+        """Helper method to handle generators in all types of comprehensions"""
+        for generator in generators:
+            if isinstance(generator.target, ast.Name):
+                self.assigned_names.add(generator.target.id)
+            elif isinstance(generator.target, ast.Tuple):
+                for elt in generator.target.elts:
+                    if isinstance(elt, ast.Name):
+                        self.assigned_names.add(elt.id)
+    def visit_ListComp(self, node):
+        """Track variables in list comprehensions"""
+        self._handle_comprehension_generators(node.generators)
+        self.generic_visit(node)
+    def visit_DictComp(self, node):
+        """Track variables in dictionary comprehensions"""
+        self._handle_comprehension_generators(node.generators)
+        self.generic_visit(node)
+    def visit_SetComp(self, node):
+        """Track variables in set comprehensions"""
+        self._handle_comprehension_generators(node.generators)
+        self.generic_visit(node)
+    def visit_Attribute(self, node):
+        if not (isinstance(node.value, ast.Name) and node.value.id == "self"):
+            self.generic_visit(node)
+    def visit_ClassDef(self, node):
+        """Track class definitions"""
+        self.defined_classes.add(node.name)
+        self.generic_visit(node)
+    def visit_Name(self, node):
+        if isinstance(node.ctx, ast.Load):
+            if not (
+                node.id in _BUILTIN_NAMES
+                or node.id in BASE_BUILTIN_MODULES
+                or node.id in self.arg_names
+                or node.id == "self"
+                or node.id in self.class_attributes
+                or node.id in self.imports
+                or node.id in self.from_imports
+                or node.id in self.assigned_names
+                or node.id in self.typing_names
+                or node.id in self.defined_classes
+            ):
+                self.errors.append(f"Name '{node.id}' is undefined.")
+    def visit_Call(self, node):
+        if isinstance(node.func, ast.Name):
+            if not (
+                node.func.id in _BUILTIN_NAMES
+                or node.func.id in BASE_BUILTIN_MODULES
+                or node.func.id in self.arg_names
+                or node.func.id == "self"
+                or node.func.id in self.class_attributes
+                or node.func.id in self.imports
+                or node.func.id in self.from_imports
+                or node.func.id in self.assigned_names
+                or node.func.id in self.defined_classes
+            ):
+                self.errors.append(f"Name '{node.func.id}' is undefined.")
+        self.generic_visit(node)
+def validate_tool_attributes(cls, check_imports: bool = True) -> None:
+    """
+    Validates that a Tool class follows the proper patterns:
+    0. Any argument of __init__ should have a default.
+    Args chosen at init are not traceable, so we cannot rebuild the source code for them, thus any important arg should be defined as a class attribute.
+    1. About the class:
+        - Class attributes should only be strings or dicts
+        - Class attributes cannot be complex attributes
+    2. About all class methods:
+        - Imports must be from packages, not local files
+        - All methods must be self-contained
+    Raises all errors encountered, if no error returns None.
+    """
+    class ClassLevelChecker(ast.NodeVisitor):
+        def __init__(self):
+            self.imported_names = set()
+            self.complex_attributes = set()
+            self.class_attributes = set()
+            self.non_defaults = set()
+            self.non_literal_defaults = set()
+            self.in_method = False
+            self.invalid_attributes = []
+        def visit_FunctionDef(self, node):
+            if node.name == "__init__":
+                self._check_init_function_parameters(node)
+            old_context = self.in_method
+            self.in_method = True
+            self.generic_visit(node)
+            self.in_method = old_context
+        def visit_Assign(self, node):
+            if self.in_method:
+                return
+            # Track class attributes
+            for target in node.targets:
+                if isinstance(target, ast.Name):
+                    self.class_attributes.add(target.id)
+            # Check if the assignment is more complex than simple literals
+            if not all(
+                isinstance(val, (ast.Str, ast.Num, ast.Constant, ast.Dict, ast.List, ast.Set))
+                for val in ast.walk(node.value)
+            ):
+                for target in node.targets:
+                    if isinstance(target, ast.Name):
+                        self.complex_attributes.add(target.id)
+            # Check specific class attributes
+            if getattr(node.targets[0], "id", "") == "name":
+                if not isinstance(node.value, ast.Constant):
+                    self.invalid_attributes.append(f"Class attribute 'name' must be a constant, found '{node.value}'")
+                elif not isinstance(node.value.value, str):
+                    self.invalid_attributes.append(
+                        f"Class attribute 'name' must be a string, found '{node.value.value}'"
+                    )
+                elif not is_valid_name(node.value.value):
+                    self.invalid_attributes.append(
+                        f"Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found '{node.value.value}'"
+                    )
+        def _check_init_function_parameters(self, node):
+            # Check defaults in parameters
+            for arg, default in reversed(list(zip_longest(reversed(node.args.args), reversed(node.args.defaults)))):
+                if default is None:
+                    if arg.arg != "self":
+                        self.non_defaults.add(arg.arg)
+                elif not isinstance(default, (ast.Str, ast.Num, ast.Constant, ast.Dict, ast.List, ast.Set)):
+                    self.non_literal_defaults.add(arg.arg)
+    class_level_checker = ClassLevelChecker()
+    source = get_source(cls)
+    tree = ast.parse(source)
+    class_node = tree.body[0]
+    if not isinstance(class_node, ast.ClassDef):
+        raise ValueError("Source code must define a class")
+    class_level_checker.visit(class_node)
+    errors = []
+    # Check invalid class attributes
+    if class_level_checker.invalid_attributes:
+        errors += class_level_checker.invalid_attributes
+    if class_level_checker.complex_attributes:
+        errors.append(
+            f"Complex attributes should be defined in __init__, not as class attributes: "
+            f"{', '.join(class_level_checker.complex_attributes)}"
+        )
+    if class_level_checker.non_defaults:
+        errors.append(
+            f"Parameters in __init__ must have default values, found required parameters: "
+            f"{', '.join(class_level_checker.non_defaults)}"
+        )
+    if class_level_checker.non_literal_defaults:
+        errors.append(
+            f"Parameters in __init__ must have literal default values, found non-literal defaults: "
+            f"{', '.join(class_level_checker.non_literal_defaults)}"
+        )
+    # Run checks on all methods
+    for node in class_node.body:
+        if isinstance(node, ast.FunctionDef):
+            method_checker = MethodChecker(class_level_checker.class_attributes, check_imports=check_imports)
+            method_checker.visit(node)
+            errors += [f"- {node.name}: {error}" for error in method_checker.errors]
+    if errors:
+        raise ValueError(f"Tool validation failed for {cls.__name__}:\n" + "\n".join(errors))
+    return

src/smolagents/tools.py ADDED Viewed

	@@ -0,0 +1,1239 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import ast
+import inspect
+import json
+import logging
+import os
+import sys
+import tempfile
+import textwrap
+import types
+import warnings
+from collections.abc import Callable
+from contextlib import contextmanager
+from functools import wraps
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from huggingface_hub import (
+    CommitOperationAdd,
+    create_commit,
+    create_repo,
+    get_collection,
+    hf_hub_download,
+    metadata_update,
+)
+from ._function_type_hints_utils import (
+    TypeHintParsingException,
+    _convert_type_hints_to_json_schema,
+    _get_json_schema_type,
+    get_imports,
+    get_json_schema,
+)
+from .agent_types import handle_agent_input_types, handle_agent_output_types
+from .tool_validation import MethodChecker, validate_tool_attributes
+from .utils import (
+    BASE_BUILTIN_MODULES,
+    _is_package_available,
+    get_source,
+    instance_to_source,
+    is_valid_name,
+)
+if TYPE_CHECKING:
+    import mcp
+logger = logging.getLogger(__name__)
+def validate_after_init(cls):
+    original_init = cls.__init__
+    @wraps(original_init)
+    def new_init(self, *args, **kwargs):
+        original_init(self, *args, **kwargs)
+        self.validate_arguments()
+    cls.__init__ = new_init
+    return cls
+AUTHORIZED_TYPES = [
+    "string",
+    "boolean",
+    "integer",
+    "number",
+    "image",
+    "audio",
+    "array",
+    "object",
+    "any",
+    "null",
+]
+CONVERSION_DICT = {"str": "string", "int": "integer", "float": "number"}
+class Tool:
+    """
+    A base class for the functions used by the agent. Subclass this and implement the `forward` method as well as the
+    following class attributes:
+    - **description** (`str`) -- A short description of what your tool does, the inputs it expects and the output(s) it
+      will return. For instance 'This is a tool that downloads a file from a `url`. It takes the `url` as input, and
+      returns the text contained in the file'.
+    - **name** (`str`) -- A performative name that will be used for your tool in the prompt to the agent. For instance
+      `"text-classifier"` or `"image_generator"`.
+    - **inputs** (`Dict[str, Dict[str, Union[str, type, bool]]]`) -- The dict of modalities expected for the inputs.
+      It has one `type`key and a `description`key.
+      This is used by `launch_gradio_demo` or to make a nice space from your tool, and also can be used in the generated
+      description for your tool.
+    - **output_type** (`type`) -- The type of the tool output. This is used by `launch_gradio_demo`
+      or to make a nice space from your tool, and also can be used in the generated description for your tool.
+    You can also override the method [`~Tool.setup`] if your tool has an expensive operation to perform before being
+    usable (such as loading a model). [`~Tool.setup`] will be called the first time you use your tool, but not at
+    instantiation.
+    """
+    name: str
+    description: str
+    inputs: dict[str, dict[str, str | type | bool]]
+    output_type: str
+    def __init__(self, *args, **kwargs):
+        self.is_initialized = False
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        validate_after_init(cls)
+    def validate_arguments(self):
+        required_attributes = {
+            "description": str,
+            "name": str,
+            "inputs": dict,
+            "output_type": str,
+        }
+        # Validate class attributes
+        for attr, expected_type in required_attributes.items():
+            attr_value = getattr(self, attr, None)
+            if attr_value is None:
+                raise TypeError(f"You must set an attribute {attr}.")
+            if not isinstance(attr_value, expected_type):
+                raise TypeError(
+                    f"Attribute {attr} should have type {expected_type.__name__}, got {type(attr_value)} instead."
+                )
+        # - Validate name
+        if not is_valid_name(self.name):
+            raise Exception(
+                f"Invalid Tool name '{self.name}': must be a valid Python identifier and not a reserved keyword"
+            )
+        # Validate inputs
+        for input_name, input_content in self.inputs.items():
+            assert isinstance(input_content, dict), f"Input '{input_name}' should be a dictionary."
+            assert "type" in input_content and "description" in input_content, (
+                f"Input '{input_name}' should have keys 'type' and 'description', has only {list(input_content.keys())}."
+            )
+            if input_content["type"] not in AUTHORIZED_TYPES:
+                raise Exception(
+                    f"Input '{input_name}': type '{input_content['type']}' is not an authorized value, should be one of {AUTHORIZED_TYPES}."
+                )
+        # Validate output type
+        assert getattr(self, "output_type", None) in AUTHORIZED_TYPES
+        # Validate forward function signature, except for Tools that use a "generic" signature (PipelineTool, SpaceToolWrapper, LangChainToolWrapper)
+        if not (
+            hasattr(self, "skip_forward_signature_validation")
+            and getattr(self, "skip_forward_signature_validation") is True
+        ):
+            signature = inspect.signature(self.forward)
+            actual_keys = set(key for key in signature.parameters.keys() if key != "self")
+            expected_keys = set(self.inputs.keys())
+            if actual_keys != expected_keys:
+                raise Exception(
+                    f"In tool '{self.name}', 'forward' method parameters were {actual_keys}, but expected {expected_keys}. "
+                    f"It should take 'self' as its first argument, then its next arguments should match the keys of tool attribute 'inputs'."
+                )
+            json_schema = _convert_type_hints_to_json_schema(self.forward, error_on_missing_type_hints=False)[
+                "properties"
+            ]  # This function will not raise an error on missing docstrings, contrary to get_json_schema
+            for key, value in self.inputs.items():
+                assert key in json_schema, (
+                    f"Input '{key}' should be present in function signature, found only {json_schema.keys()}"
+                )
+                if "nullable" in value:
+                    assert "nullable" in json_schema[key], (
+                        f"Nullable argument '{key}' in inputs should have key 'nullable' set to True in function signature."
+                    )
+                if key in json_schema and "nullable" in json_schema[key]:
+                    assert "nullable" in value, (
+                        f"Nullable argument '{key}' in function signature should have key 'nullable' set to True in inputs."
+                    )
+    def forward(self, *args, **kwargs):
+        return NotImplementedError("Write this method in your subclass of `Tool`.")
+    def __call__(self, *args, sanitize_inputs_outputs: bool = False, **kwargs):
+        if not self.is_initialized:
+            self.setup()
+        # Handle the arguments might be passed as a single dictionary
+        if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], dict):
+            potential_kwargs = args[0]
+            # If the dictionary keys match our input parameters, convert it to kwargs
+            if all(key in self.inputs for key in potential_kwargs):
+                args = ()
+                kwargs = potential_kwargs
+        if sanitize_inputs_outputs:
+            args, kwargs = handle_agent_input_types(*args, **kwargs)
+        outputs = self.forward(*args, **kwargs)
+        if sanitize_inputs_outputs:
+            outputs = handle_agent_output_types(outputs, self.output_type)
+        return outputs
+    def setup(self):
+        """
+        Overwrite this method here for any operation that is expensive and needs to be executed before you start using
+        your tool. Such as loading a big model.
+        """
+        self.is_initialized = True
+    def to_dict(self) -> dict:
+        """Returns a dictionary representing the tool"""
+        class_name = self.__class__.__name__
+        if type(self).__name__ == "SimpleTool":
+            # Check that imports are self-contained
+            source_code = get_source(self.forward).replace("@tool", "")
+            forward_node = ast.parse(source_code)
+            # If tool was created using '@tool' decorator, it has only a forward pass, so it's simpler to just get its code
+            method_checker = MethodChecker(set())
+            method_checker.visit(forward_node)
+            if len(method_checker.errors) > 0:
+                errors = [f"- {error}" for error in method_checker.errors]
+                raise (ValueError(f"SimpleTool validation failed for {self.name}:\n" + "\n".join(errors)))
+            forward_source_code = get_source(self.forward)
+            tool_code = textwrap.dedent(
+                f"""
+            from smolagents import Tool
+            from typing import Any, Optional
+            class {class_name}(Tool):
+                name = "{self.name}"
+                description = {json.dumps(textwrap.dedent(self.description).strip())}
+                inputs = {repr(self.inputs)}
+                output_type = "{self.output_type}"
+            """
+            ).strip()
+            import re
+            def add_self_argument(source_code: str) -> str:
+                """Add 'self' as first argument to a function definition if not present."""
+                pattern = r"def forward\(((?!self)[^)]*)\)"
+                def replacement(match):
+                    args = match.group(1).strip()
+                    if args:  # If there are other arguments
+                        return f"def forward(self, {args})"
+                    return "def forward(self)"
+                return re.sub(pattern, replacement, source_code)
+            forward_source_code = forward_source_code.replace(self.name, "forward")
+            forward_source_code = add_self_argument(forward_source_code)
+            forward_source_code = forward_source_code.replace("@tool", "").strip()
+            tool_code += "\n\n" + textwrap.indent(forward_source_code, "    ")
+        else:  # If the tool was not created by the @tool decorator, it was made by subclassing Tool
+            if type(self).__name__ in [
+                "SpaceToolWrapper",
+                "LangChainToolWrapper",
+                "GradioToolWrapper",
+            ]:
+                raise ValueError(
+                    "Cannot save objects created with from_space, from_langchain or from_gradio, as this would create errors."
+                )
+            validate_tool_attributes(self.__class__)
+            tool_code = "from typing import Any, Optional\n" + instance_to_source(self, base_cls=Tool)
+        requirements = {el for el in get_imports(tool_code) if el not in sys.stdlib_module_names} | {"smolagents"}
+        return {"name": self.name, "code": tool_code, "requirements": sorted(requirements)}
+    @classmethod
+    def from_dict(cls, tool_dict: dict[str, Any], **kwargs) -> "Tool":
+        """
+        Create tool from a dictionary representation.
+        Args:
+            tool_dict (`dict[str, Any]`): Dictionary representation of the tool.
+            **kwargs: Additional keyword arguments to pass to the tool's constructor.
+        Returns:
+            `Tool`: Tool object.
+        """
+        if "code" not in tool_dict:
+            raise ValueError("Tool dictionary must contain 'code' key with the tool source code")
+        return cls.from_code(tool_dict["code"], **kwargs)
+    def save(self, output_dir: str | Path, tool_file_name: str = "tool", make_gradio_app: bool = True):
+        """
+        Saves the relevant code files for your tool so it can be pushed to the Hub. This will copy the code of your
+        tool in `output_dir` as well as autogenerate:
+        - a `{tool_file_name}.py` file containing the logic for your tool.
+        If you pass `make_gradio_app=True`, this will also write:
+        - an `app.py` file providing a UI for your tool when it is exported to a Space with `tool.push_to_hub()`
+        - a `requirements.txt` containing the names of the modules used by your tool (as detected when inspecting its
+          code)
+        Args:
+            output_dir (`str` or `Path`): The folder in which you want to save your tool.
+            tool_file_name (`str`, *optional*): The file name in which you want to save your tool.
+            make_gradio_app (`bool`, *optional*, defaults to True): Whether to also export a `requirements.txt` file and Gradio UI.
+        """
+        # Ensure output directory exists
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        # Save tool file
+        self._write_file(output_path / f"{tool_file_name}.py", self._get_tool_code())
+        if make_gradio_app:
+            #  Save app file
+            self._write_file(output_path / "app.py", self._get_gradio_app_code(tool_module_name=tool_file_name))
+            # Save requirements file
+            self._write_file(output_path / "requirements.txt", self._get_requirements())
+    def _write_file(self, file_path: Path, content: str) -> None:
+        """Writes content to a file with UTF-8 encoding."""
+        file_path.write_text(content, encoding="utf-8")
+    def push_to_hub(
+        self,
+        repo_id: str,
+        commit_message: str = "Upload tool",
+        private: bool | None = None,
+        token: bool | str | None = None,
+        create_pr: bool = False,
+    ) -> str:
+        """
+        Upload the tool to the Hub.
+        Parameters:
+            repo_id (`str`):
+                The name of the repository you want to push your tool to. It should contain your organization name when
+                pushing to a given organization.
+            commit_message (`str`, *optional*, defaults to `"Upload tool"`):
+                Message to commit while pushing.
+            private (`bool`, *optional*):
+                Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
+            token (`bool` or `str`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. If unset, will use the token generated
+                when running `huggingface-cli login` (stored in `~/.huggingface`).
+            create_pr (`bool`, *optional*, defaults to `False`):
+                Whether to create a PR with the uploaded files or directly commit.
+        """
+        # Initialize repository
+        repo_id = self._initialize_hub_repo(repo_id, token, private)
+        # Prepare files for commit
+        additions = self._prepare_hub_files()
+        # Create commit
+        return create_commit(
+            repo_id=repo_id,
+            operations=additions,
+            commit_message=commit_message,
+            token=token,
+            create_pr=create_pr,
+            repo_type="space",
+        )
+    @staticmethod
+    def _initialize_hub_repo(repo_id: str, token: bool | str | None, private: bool | None) -> str:
+        """Initialize repository on Hugging Face Hub."""
+        repo_url = create_repo(
+            repo_id=repo_id,
+            token=token,
+            private=private,
+            exist_ok=True,
+            repo_type="space",
+            space_sdk="gradio",
+        )
+        metadata_update(repo_url.repo_id, {"tags": ["smolagents", "tool"]}, repo_type="space", token=token)
+        return repo_url.repo_id
+    def _prepare_hub_files(self) -> list:
+        """Prepare files for Hub commit."""
+        additions = [
+            # Add tool code
+            CommitOperationAdd(
+                path_in_repo="tool.py",
+                path_or_fileobj=self._get_tool_code().encode(),
+            ),
+            # Add Gradio app
+            CommitOperationAdd(
+                path_in_repo="app.py",
+                path_or_fileobj=self._get_gradio_app_code().encode(),
+            ),
+            # Add requirements
+            CommitOperationAdd(
+                path_in_repo="requirements.txt",
+                path_or_fileobj=self._get_requirements().encode(),
+            ),
+        ]
+        return additions
+    def _get_tool_code(self) -> str:
+        """Get the tool's code."""
+        return self.to_dict()["code"]
+    def _get_gradio_app_code(self, tool_module_name: str = "tool") -> str:
+        """Get the Gradio app code."""
+        class_name = self.__class__.__name__
+        return textwrap.dedent(
+            f"""\
+            from smolagents import launch_gradio_demo
+            from {tool_module_name} import {class_name}
+            tool = {class_name}()
+            launch_gradio_demo(tool)
+            """
+        )
+    def _get_requirements(self) -> str:
+        """Get the requirements."""
+        return "\n".join(self.to_dict()["requirements"])
+    @classmethod
+    def from_hub(
+        cls,
+        repo_id: str,
+        token: str | None = None,
+        trust_remote_code: bool = False,
+        **kwargs,
+    ):
+        """
+        Loads a tool defined on the Hub.
+        <Tip warning={true}>
+        Loading a tool from the Hub means that you'll download the tool and execute it locally.
+        ALWAYS inspect the tool you're downloading before loading it within your runtime, as you would do when
+        installing a package using pip/npm/apt.
+        </Tip>
+        Args:
+            repo_id (`str`):
+                The name of the Space repo on the Hub where your tool is defined.
+            token (`str`, *optional*):
+                The token to identify you on hf.co. If unset, will use the token generated when running
+                `huggingface-cli login` (stored in `~/.huggingface`).
+            trust_remote_code(`str`, *optional*, defaults to False):
+                This flags marks that you understand the risk of running remote code and that you trust this tool.
+                If not setting this to True, loading the tool from Hub will fail.
+            kwargs (additional keyword arguments, *optional*):
+                Additional keyword arguments that will be split in two: all arguments relevant to the Hub (such as
+                `cache_dir`, `revision`, `subfolder`) will be used when downloading the files for your tool, and the
+                others will be passed along to its init.
+        """
+        if not trust_remote_code:
+            raise ValueError(
+                "Loading a tool from Hub requires to acknowledge you trust its code: to do so, pass `trust_remote_code=True`."
+            )
+        # Get the tool's tool.py file.
+        tool_file = hf_hub_download(
+            repo_id,
+            "tool.py",
+            token=token,
+            repo_type="space",
+            cache_dir=kwargs.get("cache_dir"),
+            force_download=kwargs.get("force_download"),
+            proxies=kwargs.get("proxies"),
+            revision=kwargs.get("revision"),
+            subfolder=kwargs.get("subfolder"),
+            local_files_only=kwargs.get("local_files_only"),
+        )
+        tool_code = Path(tool_file).read_text()
+        return Tool.from_code(tool_code, **kwargs)
+    @classmethod
+    def from_code(cls, tool_code: str, **kwargs):
+        module = types.ModuleType("dynamic_tool")
+        exec(tool_code, module.__dict__)
+        # Find the Tool subclass
+        tool_class = next(
+            (
+                obj
+                for _, obj in inspect.getmembers(module, inspect.isclass)
+                if issubclass(obj, Tool) and obj is not Tool
+            ),
+            None,
+        )
+        if tool_class is None:
+            raise ValueError("No Tool subclass found in the code.")
+        if not isinstance(tool_class.inputs, dict):
+            tool_class.inputs = ast.literal_eval(tool_class.inputs)
+        return tool_class(**kwargs)
+    @staticmethod
+    def from_space(
+        space_id: str,
+        name: str,
+        description: str,
+        api_name: str | None = None,
+        token: str | None = None,
+    ):
+        """
+        Creates a [`Tool`] from a Space given its id on the Hub.
+        Args:
+            space_id (`str`):
+                The id of the Space on the Hub.
+            name (`str`):
+                The name of the tool.
+            description (`str`):
+                The description of the tool.
+            api_name (`str`, *optional*):
+                The specific api_name to use, if the space has several tabs. If not precised, will default to the first available api.
+            token (`str`, *optional*):
+                Add your token to access private spaces or increase your GPU quotas.
+        Returns:
+            [`Tool`]:
+                The Space, as a tool.
+        Examples:
+        ```py
+        >>> image_generator = Tool.from_space(
+        ...     space_id="black-forest-labs/FLUX.1-schnell",
+        ...     name="image-generator",
+        ...     description="Generate an image from a prompt"
+        ... )
+        >>> image = image_generator("Generate an image of a cool surfer in Tahiti")
+        ```
+        ```py
+        >>> face_swapper = Tool.from_space(
+        ...     "tuan2308/face-swap",
+        ...     "face_swapper",
+        ...     "Tool that puts the face shown on the first image on the second image. You can give it paths to images.",
+        ... )
+        >>> image = face_swapper('./aymeric.jpeg', './ruth.jpg')
+        ```
+        """
+        from gradio_client import Client, handle_file
+        class SpaceToolWrapper(Tool):
+            skip_forward_signature_validation = True
+            def __init__(
+                self,
+                space_id: str,
+                name: str,
+                description: str,
+                api_name: str | None = None,
+                token: str | None = None,
+            ):
+                self.name = name
+                self.description = description
+                self.client = Client(space_id, hf_token=token)
+                space_description = self.client.view_api(return_format="dict", print_info=False)["named_endpoints"]
+                # If api_name is not defined, take the first of the available APIs for this space
+                if api_name is None:
+                    api_name = list(space_description.keys())[0]
+                    logger.warning(
+                        f"Since `api_name` was not defined, it was automatically set to the first available API: `{api_name}`."
+                    )
+                self.api_name = api_name
+                try:
+                    space_description_api = space_description[api_name]
+                except KeyError:
+                    raise KeyError(f"Could not find specified {api_name=} among available api names.")
+                self.inputs = {}
+                for parameter in space_description_api["parameters"]:
+                    if not parameter["parameter_has_default"]:
+                        parameter_type = parameter["type"]["type"]
+                        if parameter_type == "object":
+                            parameter_type = "any"
+                        self.inputs[parameter["parameter_name"]] = {
+                            "type": parameter_type,
+                            "description": parameter["python_type"]["description"],
+                        }
+                output_component = space_description_api["returns"][0]["component"]
+                if output_component == "Image":
+                    self.output_type = "image"
+                elif output_component == "Audio":
+                    self.output_type = "audio"
+                else:
+                    self.output_type = "any"
+                self.is_initialized = True
+            def sanitize_argument_for_prediction(self, arg):
+                from gradio_client.utils import is_http_url_like
+                from PIL.Image import Image
+                if isinstance(arg, Image):
+                    temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+                    arg.save(temp_file.name)
+                    arg = temp_file.name
+                if (
+                    (isinstance(arg, str) and os.path.isfile(arg))
+                    or (isinstance(arg, Path) and arg.exists() and arg.is_file())
+                    or is_http_url_like(arg)
+                ):
+                    arg = handle_file(arg)
+                return arg
+            def forward(self, *args, **kwargs):
+                # Preprocess args and kwargs:
+                args = list(args)
+                for i, arg in enumerate(args):
+                    args[i] = self.sanitize_argument_for_prediction(arg)
+                for arg_name, arg in kwargs.items():
+                    kwargs[arg_name] = self.sanitize_argument_for_prediction(arg)
+                output = self.client.predict(*args, api_name=self.api_name, **kwargs)
+                if isinstance(output, tuple) or isinstance(output, list):
+                    return output[
+                        0
+                    ]  # Sometime the space also returns the generation seed, in which case the result is at index 0
+                return output
+        return SpaceToolWrapper(
+            space_id=space_id,
+            name=name,
+            description=description,
+            api_name=api_name,
+            token=token,
+        )
+    @staticmethod
+    def from_gradio(gradio_tool):
+        """
+        Creates a [`Tool`] from a gradio tool.
+        """
+        import inspect
+        class GradioToolWrapper(Tool):
+            def __init__(self, _gradio_tool):
+                self.name = _gradio_tool.name
+                self.description = _gradio_tool.description
+                self.output_type = "string"
+                self._gradio_tool = _gradio_tool
+                func_args = list(inspect.signature(_gradio_tool.run).parameters.items())
+                self.inputs = {
+                    key: {"type": CONVERSION_DICT[value.annotation], "description": ""} for key, value in func_args
+                }
+                self.forward = self._gradio_tool.run
+        return GradioToolWrapper(gradio_tool)
+    @staticmethod
+    def from_langchain(langchain_tool):
+        """
+        Creates a [`Tool`] from a langchain tool.
+        """
+        class LangChainToolWrapper(Tool):
+            skip_forward_signature_validation = True
+            def __init__(self, _langchain_tool):
+                self.name = _langchain_tool.name.lower()
+                self.description = _langchain_tool.description
+                self.inputs = _langchain_tool.args.copy()
+                for input_content in self.inputs.values():
+                    if "title" in input_content:
+                        input_content.pop("title")
+                    input_content["description"] = ""
+                self.output_type = "string"
+                self.langchain_tool = _langchain_tool
+                self.is_initialized = True
+            def forward(self, *args, **kwargs):
+                tool_input = kwargs.copy()
+                for index, argument in enumerate(args):
+                    if index < len(self.inputs):
+                        input_key = next(iter(self.inputs))
+                        tool_input[input_key] = argument
+                return self.langchain_tool.run(tool_input)
+        return LangChainToolWrapper(langchain_tool)
+def launch_gradio_demo(tool: Tool):
+    """
+    Launches a gradio demo for a tool. The corresponding tool class needs to properly implement the class attributes
+    `inputs` and `output_type`.
+    Args:
+        tool (`Tool`): The tool for which to launch the demo.
+    """
+    try:
+        import gradio as gr
+    except ImportError:
+        raise ImportError("Gradio should be installed in order to launch a gradio demo.")
+    TYPE_TO_COMPONENT_CLASS_MAPPING = {
+        "boolean": gr.Checkbox,
+        "image": gr.Image,
+        "audio": gr.Audio,
+        "string": gr.Textbox,
+        "integer": gr.Textbox,
+        "number": gr.Textbox,
+    }
+    def tool_forward(*args, **kwargs):
+        return tool(*args, sanitize_inputs_outputs=True, **kwargs)
+    tool_forward.__signature__ = inspect.signature(tool.forward)
+    gradio_inputs = []
+    for input_name, input_details in tool.inputs.items():
+        input_gradio_component_class = TYPE_TO_COMPONENT_CLASS_MAPPING[input_details["type"]]
+        new_component = input_gradio_component_class(label=input_name)
+        gradio_inputs.append(new_component)
+    output_gradio_component_class = TYPE_TO_COMPONENT_CLASS_MAPPING[tool.output_type]
+    gradio_output = output_gradio_component_class(label="Output")
+    gr.Interface(
+        fn=tool_forward,
+        inputs=gradio_inputs,
+        outputs=gradio_output,
+        title=tool.name,
+        description=tool.description,
+        api_name=tool.name,
+    ).launch()
+def load_tool(
+    repo_id,
+    model_repo_id: str | None = None,
+    token: str | None = None,
+    trust_remote_code: bool = False,
+    **kwargs,
+):
+    """
+    Main function to quickly load a tool from the Hub.
+    <Tip warning={true}>
+    Loading a tool means that you'll download the tool and execute it locally.
+    ALWAYS inspect the tool you're downloading before loading it within your runtime, as you would do when
+    installing a package using pip/npm/apt.
+    </Tip>
+    Args:
+        repo_id (`str`):
+            Space repo ID of a tool on the Hub.
+        model_repo_id (`str`, *optional*):
+            Use this argument to use a different model than the default one for the tool you selected.
+        token (`str`, *optional*):
+            The token to identify you on hf.co. If unset, will use the token generated when running `huggingface-cli
+            login` (stored in `~/.huggingface`).
+        trust_remote_code (`bool`, *optional*, defaults to False):
+            This needs to be accepted in order to load a tool from Hub.
+        kwargs (additional keyword arguments, *optional*):
+            Additional keyword arguments that will be split in two: all arguments relevant to the Hub (such as
+            `cache_dir`, `revision`, `subfolder`) will be used when downloading the files for your tool, and the others
+            will be passed along to its init.
+    """
+    return Tool.from_hub(
+        repo_id,
+        model_repo_id=model_repo_id,
+        token=token,
+        trust_remote_code=trust_remote_code,
+        **kwargs,
+    )
+def add_description(description):
+    """
+    A decorator that adds a description to a function.
+    """
+    def inner(func):
+        func.description = description
+        func.name = func.__name__
+        return func
+    return inner
+class ToolCollection:
+    """
+    Tool collections enable loading a collection of tools in the agent's toolbox.
+    Collections can be loaded from a collection in the Hub or from an MCP server, see:
+    - [`ToolCollection.from_hub`]
+    - [`ToolCollection.from_mcp`]
+    For example and usage, see: [`ToolCollection.from_hub`] and [`ToolCollection.from_mcp`]
+    """
+    def __init__(self, tools: list[Tool]):
+        self.tools = tools
+    @classmethod
+    def from_hub(
+        cls,
+        collection_slug: str,
+        token: str | None = None,
+        trust_remote_code: bool = False,
+    ) -> "ToolCollection":
+        """Loads a tool collection from the Hub.
+        it adds a collection of tools from all Spaces in the collection to the agent's toolbox
+        > [!NOTE]
+        > Only Spaces will be fetched, so you can feel free to add models and datasets to your collection if you'd
+        > like for this collection to showcase them.
+        Args:
+            collection_slug (str): The collection slug referencing the collection.
+            token (str, *optional*): The authentication token if the collection is private.
+            trust_remote_code (bool, *optional*, defaults to False): Whether to trust the remote code.
+        Returns:
+            ToolCollection: A tool collection instance loaded with the tools.
+        Example:
+        ```py
+        >>> from smolagents import ToolCollection, CodeAgent
+        >>> image_tool_collection = ToolCollection.from_hub("huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+        >>> agent = CodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+        >>> agent.run("Please draw me a picture of rivers and lakes.")
+        ```
+        """
+        _collection = get_collection(collection_slug, token=token)
+        _hub_repo_ids = {item.item_id for item in _collection.items if item.item_type == "space"}
+        tools = {Tool.from_hub(repo_id, token, trust_remote_code) for repo_id in _hub_repo_ids}
+        return cls(tools)
+    @classmethod
+    @contextmanager
+    def from_mcp(
+        cls, server_parameters: "mcp.StdioServerParameters" | dict, trust_remote_code: bool = False
+    ) -> "ToolCollection":
+        """Automatically load a tool collection from an MCP server.
+        This method supports Stdio, Streamable HTTP, and legacy HTTP+SSE MCP servers. Look at the `server_parameters`
+        argument for more details on how to connect to each MCP server.
+        Note: a separate thread will be spawned to run an asyncio event loop handling
+        the MCP server.
+        Args:
+            server_parameters (`mcp.StdioServerParameters` or `dict`):
+                Configuration parameters to connect to the MCP server. This can be:
+                - An instance of `mcp.StdioServerParameters` for connecting a Stdio MCP server via standard input/output using a subprocess.
+                - A `dict` with at least:
+                  - "url": URL of the server.
+                  - "transport": Transport protocol to use, one of:
+                    - "streamable-http": (recommended) Streamable HTTP transport.
+                    - "sse": Legacy HTTP+SSE transport (deprecated).
+                  If "transport" is omitted, the legacy "sse" transport is assumed (a deprecation warning will be issued).
+                <Deprecated version="1.17.0">
+                The HTTP+SSE transport is deprecated and future behavior will default to the Streamable HTTP transport.
+                Please pass explicitly the "transport" key.
+                </Deprecated>
+            trust_remote_code (`bool`, *optional*, defaults to `False`):
+                Whether to trust the execution of code from tools defined on the MCP server.
+                This option should only be set to `True` if you trust the MCP server,
+                and undertand the risks associated with running remote code on your local machine.
+                If set to `False`, loading tools from MCP will fail.
+        Returns:
+            ToolCollection: A tool collection instance.
+        Example with a Stdio MCP server:
+        ```py
+        >>> import os
+        >>> from smolagents import ToolCollection, CodeAgent, InferenceClientModel
+        >>> from mcp import StdioServerParameters
+        >>> model = InferenceClientModel()
+        >>> server_parameters = StdioServerParameters(
+        >>>     command="uvx",
+        >>>     args=["--quiet", "[email protected]"],
+        >>>     env={"UV_PYTHON": "3.12", **os.environ},
+        >>> )
+        >>> with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection:
+        >>>     agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True, model=model)
+        >>>     agent.run("Please find a remedy for hangover.")
+        ```
+        Example with a Streamable HTTP MCP server:
+        ```py
+        >>> with ToolCollection.from_mcp({"url": "http://127.0.0.1:8000/mcp", "transport": "streamable-http"}, trust_remote_code=True) as tool_collection:
+        >>>     agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True, model=model)
+        >>>     agent.run("Please find a remedy for hangover.")
+        ```
+        """
+        try:
+            from mcpadapt.core import MCPAdapt
+            from mcpadapt.smolagents_adapter import SmolAgentsAdapter
+        except ImportError:
+            raise ImportError(
+                """Please install 'mcp' extra to use ToolCollection.from_mcp: `pip install "smolagents[mcp]"`."""
+            )
+        if isinstance(server_parameters, dict):
+            transport = server_parameters.get("transport")
+            if transport is None:
+                warnings.warn(
+                    "Passing a dict as server_parameters without specifying the 'transport' key is deprecated. "
+                    "For now, it defaults to the legacy 'sse' (HTTP+SSE) transport, but this default will change "
+                    "to 'streamable-http' in version 1.20. Please add the 'transport' key explicitly. ",
+                    FutureWarning,
+                )
+                transport = "sse"
+                server_parameters["transport"] = transport
+            if transport not in {"sse", "streamable-http"}:
+                raise ValueError(
+                    f"Unsupported transport: {transport}. Supported transports are 'streamable-http' and 'sse'."
+                )
+        if not trust_remote_code:
+            raise ValueError(
+                "Loading tools from MCP requires you to acknowledge you trust the MCP server, "
+                "as it will execute code on your local machine: pass `trust_remote_code=True`."
+            )
+        with MCPAdapt(server_parameters, SmolAgentsAdapter()) as tools:
+            yield cls(tools)
+def tool(tool_function: Callable) -> Tool:
+    """
+    Convert a function into an instance of a dynamically created Tool subclass.
+    Args:
+        tool_function (`Callable`): Function to convert into a Tool subclass.
+            Should have type hints for each input and a type hint for the output.
+            Should also have a docstring including the description of the function
+            and an 'Args:' part where each argument is described.
+    """
+    tool_json_schema = get_json_schema(tool_function)["function"]
+    if "return" not in tool_json_schema:
+        raise TypeHintParsingException("Tool return type not found: make sure your function has a return type hint!")
+    class SimpleTool(Tool):
+        def __init__(self):
+            self.is_initialized = True
+    # Set the class attributes
+    SimpleTool.name = tool_json_schema["name"]
+    SimpleTool.description = tool_json_schema["description"]
+    SimpleTool.inputs = tool_json_schema["parameters"]["properties"]
+    SimpleTool.output_type = tool_json_schema["return"]["type"]
+    @wraps(tool_function)
+    def wrapped_function(*args, **kwargs):
+        return tool_function(*args, **kwargs)
+    # Bind the copied function to the forward method
+    SimpleTool.forward = staticmethod(wrapped_function)
+    # Get the signature parameters of the tool function
+    sig = inspect.signature(tool_function)
+    # - Add "self" as first parameter to tool_function signature
+    new_sig = sig.replace(
+        parameters=[inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD)] + list(sig.parameters.values())
+    )
+    # - Set the signature of the forward method
+    SimpleTool.forward.__signature__ = new_sig
+    # Create and attach the source code of the dynamically created tool class and forward method
+    # - Get the source code of tool_function
+    tool_source = inspect.getsource(tool_function)
+    # - Remove the tool decorator and function definition line
+    tool_source_body = "\n".join(tool_source.split("\n")[2:])
+    # - Dedent
+    tool_source_body = textwrap.dedent(tool_source_body)
+    # - Create the forward method source, including def line and indentation
+    forward_method_source = f"def forward{str(new_sig)}:\n{textwrap.indent(tool_source_body, '    ')}"
+    # - Create the class source
+    class_source = (
+        textwrap.dedent(f"""
+        class SimpleTool(Tool):
+            name: str = "{tool_json_schema["name"]}"
+            description: str = {json.dumps(textwrap.dedent(tool_json_schema["description"]).strip())}
+            inputs: dict[str, dict[str, str]] = {tool_json_schema["parameters"]["properties"]}
+            output_type: str = "{tool_json_schema["return"]["type"]}"
+            def __init__(self):
+                self.is_initialized = True
+        """)
+        + textwrap.indent(forward_method_source, "    ")  # indent for class method
+    )
+    # - Store the source code on both class and method for inspection
+    SimpleTool.__source__ = class_source
+    SimpleTool.forward.__source__ = forward_method_source
+    simple_tool = SimpleTool()
+    return simple_tool
+class PipelineTool(Tool):
+    """
+    A [`Tool`] tailored towards Transformer models. On top of the class attributes of the base class [`Tool`], you will
+    need to specify:
+    - **model_class** (`type`) -- The class to use to load the model in this tool.
+    - **default_checkpoint** (`str`) -- The default checkpoint that should be used when the user doesn't specify one.
+    - **pre_processor_class** (`type`, *optional*, defaults to [`transformers.AutoProcessor`]) -- The class to use to load the
+      pre-processor
+    - **post_processor_class** (`type`, *optional*, defaults to [`transformers.AutoProcessor`]) -- The class to use to load the
+      post-processor (when different from the pre-processor).
+    Args:
+        model (`str` or [`transformers.PreTrainedModel`], *optional*):
+            The name of the checkpoint to use for the model, or the instantiated model. If unset, will default to the
+            value of the class attribute `default_checkpoint`.
+        pre_processor (`str` or `Any`, *optional*):
+            The name of the checkpoint to use for the pre-processor, or the instantiated pre-processor (can be a
+            tokenizer, an image processor, a feature extractor or a processor). Will default to the value of `model` if
+            unset.
+        post_processor (`str` or `Any`, *optional*):
+            The name of the checkpoint to use for the post-processor, or the instantiated pre-processor (can be a
+            tokenizer, an image processor, a feature extractor or a processor). Will default to the `pre_processor` if
+            unset.
+        device (`int`, `str` or `torch.device`, *optional*):
+            The device on which to execute the model. Will default to any accelerator available (GPU, MPS etc...), the
+            CPU otherwise.
+        device_map (`str` or `dict`, *optional*):
+            If passed along, will be used to instantiate the model.
+        model_kwargs (`dict`, *optional*):
+            Any keyword argument to send to the model instantiation.
+        token (`str`, *optional*):
+            The token to use as HTTP bearer authorization for remote files. If unset, will use the token generated when
+            running `huggingface-cli login` (stored in `~/.huggingface`).
+        hub_kwargs (additional keyword arguments, *optional*):
+            Any additional keyword argument to send to the methods that will load the data from the Hub.
+    """
+    pre_processor_class = None
+    model_class = None
+    post_processor_class = None
+    default_checkpoint = None
+    description = "This is a pipeline tool"
+    name = "pipeline"
+    inputs = {"prompt": str}
+    output_type = str
+    skip_forward_signature_validation = True
+    def __init__(
+        self,
+        model=None,
+        pre_processor=None,
+        post_processor=None,
+        device=None,
+        device_map=None,
+        model_kwargs=None,
+        token=None,
+        **hub_kwargs,
+    ):
+        if not _is_package_available("accelerate") or not _is_package_available("torch"):
+            raise ModuleNotFoundError(
+                "Please install 'transformers' extra to use a PipelineTool: `pip install 'smolagents[transformers]'`"
+            )
+        if model is None:
+            if self.default_checkpoint is None:
+                raise ValueError("This tool does not implement a default checkpoint, you need to pass one.")
+            model = self.default_checkpoint
+        if pre_processor is None:
+            pre_processor = model
+        self.model = model
+        self.pre_processor = pre_processor
+        self.post_processor = post_processor
+        self.device = device
+        self.device_map = device_map
+        self.model_kwargs = {} if model_kwargs is None else model_kwargs
+        if device_map is not None:
+            self.model_kwargs["device_map"] = device_map
+        self.hub_kwargs = hub_kwargs
+        self.hub_kwargs["token"] = token
+        super().__init__()
+    def setup(self):
+        """
+        Instantiates the `pre_processor`, `model` and `post_processor` if necessary.
+        """
+        if isinstance(self.pre_processor, str):
+            if self.pre_processor_class is None:
+                from transformers import AutoProcessor
+                self.pre_processor_class = AutoProcessor
+            self.pre_processor = self.pre_processor_class.from_pretrained(self.pre_processor, **self.hub_kwargs)
+        if isinstance(self.model, str):
+            self.model = self.model_class.from_pretrained(self.model, **self.model_kwargs, **self.hub_kwargs)
+        if self.post_processor is None:
+            self.post_processor = self.pre_processor
+        elif isinstance(self.post_processor, str):
+            if self.post_processor_class is None:
+                from transformers import AutoProcessor
+                self.post_processor_class = AutoProcessor
+            self.post_processor = self.post_processor_class.from_pretrained(self.post_processor, **self.hub_kwargs)
+        if self.device is None:
+            if self.device_map is not None:
+                self.device = list(self.model.hf_device_map.values())[0]
+            else:
+                from accelerate import PartialState
+                self.device = PartialState().default_device
+        if self.device_map is None:
+            self.model.to(self.device)
+        super().setup()
+    def encode(self, raw_inputs):
+        """
+        Uses the `pre_processor` to prepare the inputs for the `model`.
+        """
+        return self.pre_processor(raw_inputs)
+    def forward(self, inputs):
+        """
+        Sends the inputs through the `model`.
+        """
+        import torch
+        with torch.no_grad():
+            return self.model(**inputs)
+    def decode(self, outputs):
+        """
+        Uses the `post_processor` to decode the model output.
+        """
+        return self.post_processor(outputs)
+    def __call__(self, *args, sanitize_inputs_outputs: bool = False, **kwargs):
+        import torch
+        from accelerate.utils import send_to_device
+        if not self.is_initialized:
+            self.setup()
+        if sanitize_inputs_outputs:
+            args, kwargs = handle_agent_input_types(*args, **kwargs)
+        encoded_inputs = self.encode(*args, **kwargs)
+        tensor_inputs = {k: v for k, v in encoded_inputs.items() if isinstance(v, torch.Tensor)}
+        non_tensor_inputs = {k: v for k, v in encoded_inputs.items() if not isinstance(v, torch.Tensor)}
+        encoded_inputs = send_to_device(tensor_inputs, self.device)
+        outputs = self.forward({**encoded_inputs, **non_tensor_inputs})
+        outputs = send_to_device(outputs, "cpu")
+        decoded_outputs = self.decode(outputs)
+        if sanitize_inputs_outputs:
+            decoded_outputs = handle_agent_output_types(decoded_outputs, self.output_type)
+        return decoded_outputs
+def get_tools_definition_code(tools: dict[str, Tool]) -> str:
+    tool_codes = []
+    for tool in tools.values():
+        validate_tool_attributes(tool.__class__, check_imports=False)
+        tool_code = instance_to_source(tool, base_cls=Tool)
+        tool_code = tool_code.replace("from smolagents.tools import Tool", "")
+        tool_code += f"\n\n{tool.name} = {tool.__class__.__name__}()\n"
+        tool_codes.append(tool_code)
+    tool_definition_code = "\n".join([f"import {module}" for module in BASE_BUILTIN_MODULES])
+    tool_definition_code += textwrap.dedent(
+        """
+    from typing import Any
+    class Tool:
+        def __call__(self, *args, **kwargs):
+            return self.forward(*args, **kwargs)
+        def forward(self, *args, **kwargs):
+            pass # to be implemented in child class
+    """
+    )
+    tool_definition_code += "\n\n".join(tool_codes)
+    return tool_definition_code
+def validate_tool_arguments(tool: Tool, arguments: Any) -> str | None:
+    if isinstance(arguments, dict):
+        for key, value in arguments.items():
+            if key not in tool.inputs:
+                return f"Argument {key} is not in the tool's input schema."
+            parsed_type = _get_json_schema_type(type(value))["type"]
+            if parsed_type != tool.inputs[key]["type"] and not tool.inputs[key]["type"] == "any":
+                return f"Argument {key} has type '{parsed_type}' but should be '{tool.inputs[key]['type']}'."
+        for key in tool.inputs:
+            if key not in arguments:
+                return f"Argument {key} is required."
+        return None
+    else:
+        expected_type = list(tool.inputs.values())[0]["type"]
+        if _get_json_schema_type(type(arguments))["type"] != expected_type and not expected_type == "any":
+            return f"Argument has type '{type(arguments).__name__}' but should be '{expected_type}'."
+        return None
+__all__ = [
+    "AUTHORIZED_TYPES",
+    "Tool",
+    "tool",
+    "load_tool",
+    "launch_gradio_demo",
+    "ToolCollection",
+]

src/smolagents/utils.py ADDED Viewed

	@@ -0,0 +1,500 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ast
+import base64
+import importlib.metadata
+import importlib.util
+import inspect
+import json
+import keyword
+import os
+import re
+import types
+from functools import lru_cache
+from io import BytesIO
+from pathlib import Path
+from textwrap import dedent
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from smolagents.memory import AgentLogger
+__all__ = ["AgentError"]
+@lru_cache
+def _is_package_available(package_name: str) -> bool:
+    try:
+        importlib.metadata.version(package_name)
+        return True
+    except importlib.metadata.PackageNotFoundError:
+        return False
+BASE_BUILTIN_MODULES = [
+    "collections",
+    "datetime",
+    "itertools",
+    "math",
+    "queue",
+    "random",
+    "re",
+    "stat",
+    "statistics",
+    "time",
+    "unicodedata",
+]
+def escape_code_brackets(text: str) -> str:
+    """Escapes square brackets in code segments while preserving Rich styling tags."""
+    def replace_bracketed_content(match):
+        content = match.group(1)
+        cleaned = re.sub(
+            r"bold|red|green|blue|yellow|magenta|cyan|white|black|italic|dim|\s|#[0-9a-fA-F]{6}", "", content
+        )
+        return f"\\[{content}\\]" if cleaned.strip() else f"[{content}]"
+    return re.sub(r"\[([^\]]*)\]", replace_bracketed_content, text)
+class AgentError(Exception):
+    """Base class for other agent-related exceptions"""
+    def __init__(self, message, logger: "AgentLogger"):
+        super().__init__(message)
+        self.message = message
+        logger.log_error(message)
+    def dict(self) -> dict[str, str]:
+        return {"type": self.__class__.__name__, "message": str(self.message)}
+class AgentParsingError(AgentError):
+    """Exception raised for errors in parsing in the agent"""
+    pass
+class AgentExecutionError(AgentError):
+    """Exception raised for errors in execution in the agent"""
+    pass
+class AgentMaxStepsError(AgentError):
+    """Exception raised for errors in execution in the agent"""
+    pass
+class AgentToolCallError(AgentExecutionError):
+    """Exception raised for errors when incorrect arguments are passed to the tool"""
+    pass
+class AgentToolExecutionError(AgentExecutionError):
+    """Exception raised for errors when executing a tool"""
+    pass
+class AgentGenerationError(AgentError):
+    """Exception raised for errors in generation in the agent"""
+    pass
+def make_json_serializable(obj: Any) -> Any:
+    """Recursive function to make objects JSON serializable"""
+    if obj is None:
+        return None
+    elif isinstance(obj, (str, int, float, bool)):
+        # Try to parse string as JSON if it looks like a JSON object/array
+        if isinstance(obj, str):
+            try:
+                if (obj.startswith("{") and obj.endswith("}")) or (obj.startswith("[") and obj.endswith("]")):
+                    parsed = json.loads(obj)
+                    return make_json_serializable(parsed)
+            except json.JSONDecodeError:
+                pass
+        return obj
+    elif isinstance(obj, (list, tuple)):
+        return [make_json_serializable(item) for item in obj]
+    elif isinstance(obj, dict):
+        return {str(k): make_json_serializable(v) for k, v in obj.items()}
+    elif hasattr(obj, "__dict__"):
+        # For custom objects, convert their __dict__ to a serializable format
+        return {"_type": obj.__class__.__name__, **{k: make_json_serializable(v) for k, v in obj.__dict__.items()}}
+    else:
+        # For any other type, convert to string
+        return str(obj)
+def parse_json_blob(json_blob: str) -> tuple[dict[str, str], str]:
+    "Extracts the JSON blob from the input and returns the JSON data and the rest of the input."
+    try:
+        first_accolade_index = json_blob.find("{")
+        last_accolade_index = [a.start() for a in list(re.finditer("}", json_blob))][-1]
+        json_data = json_blob[first_accolade_index : last_accolade_index + 1]
+        json_data = json.loads(json_data, strict=False)
+        return json_data, json_blob[:first_accolade_index]
+    except IndexError:
+        raise ValueError("The model output does not contain any JSON blob.")
+    except json.JSONDecodeError as e:
+        place = e.pos
+        if json_blob[place - 1 : place + 2] == "},\n":
+            raise ValueError(
+                "JSON is invalid: you probably tried to provide multiple tool calls in one action. PROVIDE ONLY ONE TOOL CALL."
+            )
+        raise ValueError(
+            f"The JSON blob you used is invalid due to the following error: {e}.\n"
+            f"JSON blob was: {json_blob}, decoding failed on that specific part of the blob:\n"
+            f"'{json_blob[place - 4 : place + 5]}'."
+        )
+def extract_code_from_text(text: str) -> str | None:
+    """Extract code from the LLM's output."""
+    pattern = r"<code>(.*?)</code>"
+    matches = re.findall(pattern, text, re.DOTALL)
+    if matches:
+        return "\n\n".join(match.strip() for match in matches)
+    return None
+def parse_code_blobs(text: str) -> str:
+    """Extract code blocs from the LLM's output.
+    If a valid code block is passed, it returns it directly.
+    Args:
+        text (`str`): LLM's output text to parse.
+    Returns:
+        `str`: Extracted code block.
+    Raises:
+        ValueError: If no valid code block is found in the text.
+    """
+    matches = extract_code_from_text(text)
+    if matches:
+        return matches
+    # Maybe the LLM outputted a code blob directly
+    try:
+        ast.parse(text)
+        return text
+    except SyntaxError:
+        pass
+    if "final" in text and "answer" in text:
+        raise ValueError(
+            dedent(
+                f"""
+                Your code snippet is invalid, because the regex pattern <code>(.*?)</code> was not found in it.
+                Here is your code snippet:
+                {text}
+                It seems like you're trying to return the final answer, you can do it as follows:
+                <code>
+                final_answer("YOUR FINAL ANSWER HERE")
+                </code>
+                """
+            ).strip()
+        )
+    raise ValueError(
+        dedent(
+            f"""
+            Your code snippet is invalid, because the regex pattern <code>(.*?)</code> was not found in it.
+            Here is your code snippet:
+            {text}
+            Make sure to include code with the correct pattern, for instance:
+            Thoughts: Your thoughts
+            <code>
+            # Your python code here
+            </code>
+            """
+        ).strip()
+    )
+MAX_LENGTH_TRUNCATE_CONTENT = 20000
+def truncate_content(content: str, max_length: int = MAX_LENGTH_TRUNCATE_CONTENT) -> str:
+    if len(content) <= max_length:
+        return content
+    else:
+        return (
+            content[: max_length // 2]
+            + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
+            + content[-max_length // 2 :]
+        )
+class ImportFinder(ast.NodeVisitor):
+    def __init__(self):
+        self.packages = set()
+    def visit_Import(self, node):
+        for alias in node.names:
+            # Get the base package name (before any dots)
+            base_package = alias.name.split(".")[0]
+            self.packages.add(base_package)
+    def visit_ImportFrom(self, node):
+        if node.module:  # for "from x import y" statements
+            # Get the base package name (before any dots)
+            base_package = node.module.split(".")[0]
+            self.packages.add(base_package)
+def get_method_source(method):
+    """Get source code for a method, including bound methods."""
+    if isinstance(method, types.MethodType):
+        method = method.__func__
+    return get_source(method)
+def is_same_method(method1, method2):
+    """Compare two methods by their source code."""
+    try:
+        source1 = get_method_source(method1)
+        source2 = get_method_source(method2)
+        # Remove method decorators if any
+        source1 = "\n".join(line for line in source1.split("\n") if not line.strip().startswith("@"))
+        source2 = "\n".join(line for line in source2.split("\n") if not line.strip().startswith("@"))
+        return source1 == source2
+    except (TypeError, OSError):
+        return False
+def is_same_item(item1, item2):
+    """Compare two class items (methods or attributes) for equality."""
+    if callable(item1) and callable(item2):
+        return is_same_method(item1, item2)
+    else:
+        return item1 == item2
+def instance_to_source(instance, base_cls=None):
+    """Convert an instance to its class source code representation."""
+    cls = instance.__class__
+    class_name = cls.__name__
+    # Start building class lines
+    class_lines = []
+    if base_cls:
+        class_lines.append(f"class {class_name}({base_cls.__name__}):")
+    else:
+        class_lines.append(f"class {class_name}:")
+    # Add docstring if it exists and differs from base
+    if cls.__doc__ and (not base_cls or cls.__doc__ != base_cls.__doc__):
+        class_lines.append(f'    """{cls.__doc__}"""')
+    # Add class-level attributes
+    class_attrs = {
+        name: value
+        for name, value in cls.__dict__.items()
+        if not name.startswith("__")
+        and not callable(value)
+        and not (base_cls and hasattr(base_cls, name) and getattr(base_cls, name) == value)
+    }
+    for name, value in class_attrs.items():
+        if isinstance(value, str):
+            # multiline value
+            if "\n" in value:
+                escaped_value = value.replace('"""', r"\"\"\"")  # Escape triple quotes
+                class_lines.append(f'    {name} = """{escaped_value}"""')
+            else:
+                class_lines.append(f"    {name} = {json.dumps(value)}")
+        else:
+            class_lines.append(f"    {name} = {repr(value)}")
+    if class_attrs:
+        class_lines.append("")
+    # Add methods
+    methods = {
+        name: func.__wrapped__ if hasattr(func, "__wrapped__") else func
+        for name, func in cls.__dict__.items()
+        if callable(func)
+        and (
+            not base_cls
+            or not hasattr(base_cls, name)
+            or (
+                isinstance(func, (staticmethod, classmethod))
+                or (getattr(base_cls, name).__code__.co_code != func.__code__.co_code)
+            )
+        )
+    }
+    for name, method in methods.items():
+        method_source = get_source(method)
+        # Clean up the indentation
+        method_lines = method_source.split("\n")
+        first_line = method_lines[0]
+        indent = len(first_line) - len(first_line.lstrip())
+        method_lines = [line[indent:] for line in method_lines]
+        method_source = "\n".join(["    " + line if line.strip() else line for line in method_lines])
+        class_lines.append(method_source)
+        class_lines.append("")
+    # Find required imports using ImportFinder
+    import_finder = ImportFinder()
+    import_finder.visit(ast.parse("\n".join(class_lines)))
+    required_imports = import_finder.packages
+    # Build final code with imports
+    final_lines = []
+    # Add base class import if needed
+    if base_cls:
+        final_lines.append(f"from {base_cls.__module__} import {base_cls.__name__}")
+    # Add discovered imports
+    for package in required_imports:
+        final_lines.append(f"import {package}")
+    if final_lines:  # Add empty line after imports
+        final_lines.append("")
+    # Add the class code
+    final_lines.extend(class_lines)
+    return "\n".join(final_lines)
+def get_source(obj) -> str:
+    """Get the source code of a class or callable object (e.g.: function, method).
+    First attempts to get the source code using `inspect.getsource`.
+    In a dynamic environment (e.g.: Jupyter, IPython), if this fails,
+    falls back to retrieving the source code from the current interactive shell session.
+    Args:
+        obj: A class or callable object (e.g.: function, method)
+    Returns:
+        str: The source code of the object, dedented and stripped
+    Raises:
+        TypeError: If object is not a class or callable
+        OSError: If source code cannot be retrieved from any source
+        ValueError: If source cannot be found in IPython history
+    Note:
+        TODO: handle Python standard REPL
+    """
+    if not (isinstance(obj, type) or callable(obj)):
+        raise TypeError(f"Expected class or callable, got {type(obj)}")
+    inspect_error = None
+    try:
+        # Handle dynamically created classes
+        source = getattr(obj, "__source__", None) or inspect.getsource(obj)
+        return dedent(source).strip()
+    except OSError as e:
+        # let's keep track of the exception to raise it if all further methods fail
+        inspect_error = e
+    try:
+        import IPython
+        shell = IPython.get_ipython()
+        if not shell:
+            raise ImportError("No active IPython shell found")
+        all_cells = "\n".join(shell.user_ns.get("In", [])).strip()
+        if not all_cells:
+            raise ValueError("No code cells found in IPython session")
+        tree = ast.parse(all_cells)
+        for node in ast.walk(tree):
+            if isinstance(node, (ast.ClassDef, ast.FunctionDef)) and node.name == obj.__name__:
+                return dedent("\n".join(all_cells.split("\n")[node.lineno - 1 : node.end_lineno])).strip()
+        raise ValueError(f"Could not find source code for {obj.__name__} in IPython history")
+    except ImportError:
+        # IPython is not available, let's just raise the original inspect error
+        raise inspect_error
+    except ValueError as e:
+        # IPython is available but we couldn't find the source code, let's raise the error
+        raise e from inspect_error
+def encode_image_base64(image):
+    buffered = BytesIO()
+    image.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
+def make_image_url(base64_image):
+    return f"data:image/png;base64,{base64_image}"
+def make_init_file(folder: str | Path):
+    os.makedirs(folder, exist_ok=True)
+    # Create __init__
+    with open(os.path.join(folder, "__init__.py"), "w"):
+        pass
+def is_valid_name(name: str) -> bool:
+    return name.isidentifier() and not keyword.iskeyword(name) if isinstance(name, str) else False
+AGENT_GRADIO_APP_TEMPLATE = """import yaml
+import os
+from smolagents import GradioUI, {{ class_name }}, {{ agent_dict['model']['class'] }}
+# Get current directory path
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+{% for tool in tools.values() -%}
+from {{managed_agent_relative_path}}tools.{{ tool.name }} import {{ tool.__class__.__name__ }} as {{ tool.name | camelcase }}
+{% endfor %}
+{% for managed_agent in managed_agents.values() -%}
+from {{managed_agent_relative_path}}managed_agents.{{ managed_agent.name }}.app import agent_{{ managed_agent.name }}
+{% endfor %}
+model = {{ agent_dict['model']['class'] }}(
+{% for key in agent_dict['model']['data'] if key not in ['class', 'last_input_token_count', 'last_output_token_count'] -%}
+    {{ key }}={{ agent_dict['model']['data'][key]|repr }},
+{% endfor %})
+{% for tool in tools.values() -%}
+{{ tool.name }} = {{ tool.name | camelcase }}()
+{% endfor %}
+with open(os.path.join(CURRENT_DIR, "prompts.yaml"), 'r') as stream:
+    prompt_templates = yaml.safe_load(stream)
+{{ agent_name }} = {{ class_name }}(
+    model=model,
+    tools=[{% for tool_name in tools.keys() if tool_name != "final_answer" %}{{ tool_name }}{% if not loop.last %}, {% endif %}{% endfor %}],
+    managed_agents=[{% for subagent_name in managed_agents.keys() %}agent_{{ subagent_name }}{% if not loop.last %}, {% endif %}{% endfor %}],
+    {% for attribute_name, value in agent_dict.items() if attribute_name not in ["model", "tools", "prompt_templates", "authorized_imports", "managed_agents", "requirements"] -%}
+    {{ attribute_name }}={{ value|repr }},
+    {% endfor %}prompt_templates=prompt_templates
+)
+if __name__ == "__main__":
+    GradioUI({{ agent_name }}).launch()
+""".strip()

src/smolagents/vision_web_browser.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import argparse
+from io import BytesIO
+from time import sleep
+import helium
+import PIL.Image
+from dotenv import load_dotenv
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from smolagents import CodeAgent, WebSearchTool, tool
+from smolagents.agents import ActionStep
+from smolagents.cli import load_model
+github_request = """
+I'm trying to find how hard I have to work to get a repo in github.com/trending.
+Can you navigate to the profile for the top author of the top trending repo, and give me their total number of commits over the last year?
+"""  # The agent is able to achieve this request only when powered by GPT-4o or Claude-3.5-sonnet.
+search_request = """
+Please navigate to https://en.wikipedia.org/wiki/Chicago and give me a sentence containing the word "1992" that mentions a construction accident.
+"""
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Run a web browser automation script with a specified model.")
+    parser.add_argument(
+        "prompt",
+        type=str,
+        nargs="?",  # Makes it optional
+        default=search_request,
+        help="The prompt to run with the agent",
+    )
+    parser.add_argument(
+        "--model-type",
+        type=str,
+        default="LiteLLMModel",
+        help="The model type to use (e.g., OpenAIServerModel, LiteLLMModel, TransformersModel, InferenceClientModel)",
+    )
+    parser.add_argument(
+        "--model-id",
+        type=str,
+        default="gpt-4o",
+        help="The model ID to use for the specified model type",
+    )
+    parser.add_argument(
+        "--provider",
+        type=str,
+        help="The inference provider to use for the model",
+    )
+    parser.add_argument(
+        "--api-base",
+        type=str,
+        help="The API base to use for the model",
+    )
+    parser.add_argument(
+        "--api-key",
+        type=str,
+        help="The API key to use for the model",
+    )
+    return parser.parse_args()
+def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
+    sleep(1.0)  # Let JavaScript animations happen before taking the screenshot
+    driver = helium.get_driver()
+    current_step = memory_step.step_number
+    if driver is not None:
+        for previous_memory_step in agent.memory.steps:  # Remove previous screenshots from logs for lean processing
+            if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
+                previous_memory_step.observations_images = None
+        png_bytes = driver.get_screenshot_as_png()
+        image = PIL.Image.open(BytesIO(png_bytes))
+        print(f"Captured a browser screenshot: {image.size} pixels")
+        memory_step.observations_images = [image.copy()]  # Create a copy to ensure it persists, important!
+    # Update observations with current URL
+    url_info = f"Current url: {driver.current_url}"
+    memory_step.observations = (
+        url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
+    )
+    return
+@tool
+def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
+    """
+    Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
+    Args:
+        text: The text to search for
+        nth_result: Which occurrence to jump to (default: 1)
+    """
+    elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
+    if nth_result > len(elements):
+        raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
+    result = f"Found {len(elements)} matches for '{text}'."
+    elem = elements[nth_result - 1]
+    driver.execute_script("arguments[0].scrollIntoView(true);", elem)
+    result += f"Focused on element {nth_result} of {len(elements)}"
+    return result
+@tool
+def go_back() -> None:
+    """Goes back to previous page."""
+    driver.back()
+@tool
+def close_popups() -> str:
+    """
+    Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! This does not work on cookie consent banners.
+    """
+    webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
+def initialize_driver():
+    """Initialize the Selenium WebDriver."""
+    chrome_options = webdriver.ChromeOptions()
+    chrome_options.add_argument("--force-device-scale-factor=1")
+    chrome_options.add_argument("--window-size=1000,1350")
+    chrome_options.add_argument("--disable-pdf-viewer")
+    chrome_options.add_argument("--window-position=0,0")
+    return helium.start_chrome(headless=False, options=chrome_options)
+def initialize_agent(model):
+    """Initialize the CodeAgent with the specified model."""
+    return CodeAgent(
+        tools=[WebSearchTool(), go_back, close_popups, search_item_ctrl_f],
+        model=model,
+        additional_authorized_imports=["helium"],
+        step_callbacks=[save_screenshot],
+        max_steps=20,
+        verbosity_level=2,
+    )
+helium_instructions = """
+Use your web_search tool when you want to get Google search results.
+Then you can use helium to access websites. Don't use helium for Google search, only for navigating websites!
+Don't bother about the helium driver, it's already managed.
+We've already ran "from helium import *"
+Then you can go to pages!
+<code>
+go_to('github.com/trending')
+</code>
+You can directly click clickable elements by inputting the text that appears on them.
+<code>
+click("Top products")
+</code>
+If it's a link:
+<code>
+click(Link("Top products"))
+</code>
+If you try to interact with an element and it's not found, you'll get a LookupError.
+In general stop your action after each button click to see what happens on your screenshot.
+Never try to login in a page.
+To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from.
+<code>
+scroll_down(num_pixels=1200) # This will scroll one viewport down
+</code>
+When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
+Just use your built-in tool `close_popups` to close them:
+<code>
+close_popups()
+</code>
+You can use .exists() to check for the existence of an element. For example:
+<code>
+if Text('Accept cookies?').exists():
+    click('I accept')
+</code>
+Proceed in several steps rather than trying to solve the task in one shot.
+And at the end, only when you have your answer, return your final answer.
+<code>
+final_answer("YOUR_ANSWER_HERE")
+</code>
+If pages seem stuck on loading, you might have to wait, for instance `import time` and run `time.sleep(5.0)`. But don't overuse this!
+To list elements on page, DO NOT try code-based element searches like 'contributors = find_all(S("ol > li"))': just look at the latest screenshot you have and read it visually, or use your tool search_item_ctrl_f.
+Of course, you can act on buttons like a user would do when navigating.
+After each code blob you write, you will be automatically provided with an updated screenshot of the browser and the current browser url.
+But beware that the screenshot will only be taken at the end of the whole action, it won't see intermediate states.
+Don't kill the browser.
+When you have modals or cookie banners on screen, you should get rid of them before you can click anything else.
+"""
+def run_webagent(
+    prompt: str,
+    model_type: str,
+    model_id: str,
+    provider: str | None = None,
+    api_base: str | None = None,
+    api_key: str | None = None,
+) -> None:
+    # Load environment variables
+    load_dotenv()
+    # Initialize the model based on the provided arguments
+    model = load_model(model_type, model_id, provider=provider, api_base=api_base, api_key=api_key)
+    global driver
+    driver = initialize_driver()
+    agent = initialize_agent(model)
+    # Run the agent with the provided prompt
+    agent.python_executor("from helium import *")
+    agent.run(prompt + helium_instructions)
+def main() -> None:
+    # Parse command line arguments
+    args = parse_arguments()
+    run_webagent(args.prompt, args.model_type, args.model_id, args.provider, args.api_base, args.api_key)
+if __name__ == "__main__":
+    main()