Spaces:

mozilla-ai
/

any-agent-demo

Running

App Files Files

xet

Community

github-actions[bot] commited on Jul 28

Commit

763ec84

1 Parent(s): 5ddb0f0

Sync with https://github.com/mozilla-ai/any-agent-demo

Browse files

Files changed (12) hide show

.streamlit/config.toml +6 -0
Dockerfile +3 -3
README.md +7 -6
app.py +150 -0
components/__init__.py +0 -0
components/agent_status.py +47 -0
components/inputs.py +152 -0
components/sidebar.py +9 -0
constants.py +74 -0
requirements.txt +5 -3
services/__init__.py +0 -0
services/agent.py +227 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[theme]
+primaryColor="#00d230"
+backgroundColor="#FFFFFF"
+secondaryBackgroundColor="#F0F2F6"
+textColor="#161616"
+font="sans serif"

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.9-slim
 WORKDIR /app
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
-COPY src/ ./src/
 RUN pip3 install -r requirements.txt
@@ -18,4 +18,4 @@ EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.12-slim
 WORKDIR /app
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
+COPY . ./demo/
 RUN pip3 install -r requirements.txt
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md CHANGED Viewed

@@ -1,19 +1,20 @@
 ---
-title: Any Agent Demo
-emoji: 🚀
-colorFrom: red
-colorTo: red
 sdk: docker
 app_port: 8501
 tags:
 - streamlit
 pinned: false
-short_description: Streamlit template space
 ---
 # Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
 forums](https://discuss.streamlit.io).

 ---
+title: Surf Spot Finder
+emoji: 🏄🏼‍♂️
+colorFrom: blue
+colorTo: indigo
 sdk: docker
 app_port: 8501
 tags:
 - streamlit
 pinned: false
+short_description: Find a surf spot near you
+license: apache-2.0
 ---
 # Welcome to Streamlit!
+Edit `/src/app.py` to customize this app to your heart's desire. :heart:
 If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
 forums](https://discuss.streamlit.io).

app.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from components.sidebar import ssf_sidebar
+from constants import DEFAULT_TOOLS
+import streamlit as st
+import asyncio
+import nest_asyncio
+from services.agent import (
+    configure_agent,
+    display_evaluation_results,
+    display_output,
+    evaluate_agent,
+    run_agent,
+)
+nest_asyncio.apply()
+# Set page config
+st.set_page_config(page_title="Surf Spot Finder", page_icon="🏄", layout="wide")
+# Allow a user to resize the sidebar to take up most of the screen to make editing eval cases easier
+st.markdown(
+    """
+    <style>
+        /* When sidebar is expanded, adjust main content */
+        section[data-testid="stSidebar"][aria-expanded="true"] {
+            max-width: 99% !important;
+        }
+    </style>
+""",
+    unsafe_allow_html=True,
+)
+with st.sidebar:
+    user_inputs = ssf_sidebar()
+    is_valid = user_inputs is not None
+    run_button = st.button("Run Agent 🤖", disabled=not is_valid, type="primary")
+# Main content
+async def main():
+    # Handle agent execution button click
+    if run_button:
+        agent, agent_config = await configure_agent(user_inputs)
+        agent_trace = await run_agent(agent, agent_config)
+        await display_output(agent_trace)
+        evaluation_result = await evaluate_agent(agent_config, agent_trace)
+        await display_evaluation_results(evaluation_result)
+    else:
+        st.title("🏄 Surf Spot Finder")
+        st.markdown(
+            "Find the best surfing spots based on your location and preferences! [Github Repo](https://github.com/mozilla-ai/surf-spot-finder)"
+        )
+        st.info(
+            "👈 Configure your search parameters in the sidebar and click Run to start!"
+        )
+        # Display tools in a more organized way
+        st.markdown("### 🛠️ Available Tools")
+        st.markdown("""
+        The AI Agent built for this project has a few tools available for use in order to find the perfect surf spot.
+        The agent is given the freedom to use (or not use) these tools in order to accomplish the task.
+        """)
+        weather_tools = [
+            tool
+            for tool in DEFAULT_TOOLS
+            if "forecast" in tool.__name__ or "weather" in tool.__name__
+        ]
+        for tool in weather_tools:
+            with st.expander(f"🌤️ {tool.__name__}"):
+                st.markdown(tool.__doc__ or "No description available")
+        location_tools = [
+            tool
+            for tool in DEFAULT_TOOLS
+            if "lat" in tool.__name__
+            or "lon" in tool.__name__
+            or "area" in tool.__name__
+        ]
+        for tool in location_tools:
+            with st.expander(f"📍 {tool.__name__}"):
+                st.markdown(tool.__doc__ or "No description available")
+        web_tools = [
+            tool
+            for tool in DEFAULT_TOOLS
+            if "web" in tool.__name__ or "search" in tool.__name__
+        ]
+        for tool in web_tools:
+            with st.expander(f"🌐 {tool.__name__}"):
+                st.markdown(tool.__doc__ or "No description available")
+        # add a check that all tools were listed
+        if len(weather_tools) + len(location_tools) + len(web_tools) != len(
+            DEFAULT_TOOLS
+        ):
+            st.warning(
+                "Some tools are not listed. Please check the code for more details."
+            )
+        # Add Custom Evaluation explanation section
+        st.markdown("### 📊 Custom Evaluation")
+        st.markdown("""
+        The Surf Spot Finder includes a powerful evaluation system that allows you to customize how the agent's performance is assessed.
+        You can find these settings in the sidebar under the "Custom Evaluation" expander.
+        """)
+        with st.expander("Learn more about Custom Evaluation"):
+            st.markdown("""
+            #### What is Custom Evaluation?
+            The Custom Evaluation feature uses an LLM-as-a-Judge approach to evaluate how well the agent performs its task.
+            An LLM will be given the complete agent trace (not just the final answer), and will assess the agent's performance based on the criteria you set.
+            You can customize:
+            - **Evaluation Model**: Choose which LLM should act as the judge
+            - **Evaluation Criteria**: Define specific checkpoints that the agent should meet
+            - **Scoring System**: Assign points to each criterion
+            #### How to Use Custom Evaluation
+            1. **Select an Evaluation Model**: Choose which LLM you want to use as the judge
+            2. **Edit Checkpoints**: Use the data editor to:
+               - Add new evaluation criteria
+               - Modify existing criteria
+               - Adjust point values
+               - Remove criteria you don't want to evaluate
+            #### Example Criteria
+            You can evaluate things like:
+            - Tool usage and success
+            - Order of operations
+            - Quality of final recommendations
+            - Response completeness
+            - Number of steps taken
+            #### Tips for Creating Good Evaluation Criteria
+            - Be specific about what you want to evaluate
+            - Use clear, unambiguous language
+            - Consider both process (how the agent works) and outcome (what it produces)
+            - Assign appropriate point values based on importance
+            The evaluation results will be displayed after each agent run, showing how well the agent met your custom criteria.
+            """)
+if __name__ == "__main__":
+    loop = asyncio.new_event_loop()
+    loop.run_until_complete(main())

components/__init__.py ADDED Viewed

File without changes

components/agent_status.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from any_agent import AnyAgent
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Callable
+from opentelemetry.sdk.trace.export import (
+    SpanExporter,
+    SpanExportResult,
+)
+from any_agent import AgentFramework
+from any_agent.tracing import TracingProcessor
+from any_agent.tracing.trace import AgentSpan
+if TYPE_CHECKING:
+    from opentelemetry.sdk.trace import ReadableSpan
+class StreamlitExporter(SpanExporter):
+    """Build an `AgentTrace` and export to the different outputs."""
+    def __init__(  # noqa: D107
+        self, agent_framework: AgentFramework, callback: Callable
+    ):
+        self.agent_framework = agent_framework
+        self.processor: TracingProcessor | None = TracingProcessor.create(
+            agent_framework
+        )
+        self.callback = callback
+    def export(self, spans: Sequence["ReadableSpan"]) -> SpanExportResult:  # noqa: D102
+        if not self.processor:
+            return SpanExportResult.SUCCESS
+        for readable_span in spans:
+            # Check if this span belongs to our run
+            span = AgentSpan.from_readable_span(readable_span)
+            self.callback(span)
+        return SpanExportResult.SUCCESS
+def export_logs(agent: AnyAgent, callback: Callable) -> None:
+    exporter = StreamlitExporter(agent.framework, callback)
+    span_processor = SimpleSpanProcessor(exporter)
+    agent._tracer_provider.add_span_processor(span_processor)

components/inputs.py ADDED Viewed

	@@ -0,0 +1,152 @@

+from datetime import datetime, timedelta
+import json
+import requests
+import streamlit as st
+from any_agent import AgentFramework
+from any_agent.tracing.trace import _is_tracing_supported
+from any_agent.evaluation import EvaluationCase
+from any_agent.evaluation.schemas import CheckpointCriteria
+import pandas as pd
+from constants import DEFAULT_EVALUATION_CASE, MODEL_OPTIONS
+import copy
+from pydantic import BaseModel, ConfigDict
+class UserInputs(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    model_id: str
+    location: str
+    max_driving_hours: int
+    date: datetime
+    framework: str
+    evaluation_case: EvaluationCase
+    run_evaluation: bool
+@st.cache_resource
+def get_area(area_name: str) -> dict:
+    """Get the area from Nominatim.
+    Uses the [Nominatim API](https://nominatim.org/release-docs/develop/api/Search/).
+    Args:
+        area_name (str): The name of the area.
+    Returns:
+        dict: The area found.
+    """
+    response = requests.get(
+        f"https://nominatim.openstreetmap.org/search?q={area_name}&format=json",
+        headers={"User-Agent": "Mozilla/5.0"},
+        timeout=5,
+    )
+    response.raise_for_status()
+    response_json = json.loads(response.content.decode())
+    return response_json
+def get_user_inputs() -> UserInputs:
+    default_val = "Los Angeles California, US"
+    location = st.text_input("Enter a location", value=default_val)
+    if location:
+        location_check = get_area(location)
+        if not location_check:
+            st.error("❌ Invalid location")
+    max_driving_hours = st.number_input(
+        "Enter the maximum driving hours", min_value=1, value=2
+    )
+    col_date, col_time = st.columns([2, 1])
+    with col_date:
+        date = st.date_input(
+            "Select a date in the future", value=datetime.now() + timedelta(days=1)
+        )
+    with col_time:
+        # default to 9am
+        time = st.selectbox(
+            "Select a time",
+            [datetime.strptime(f"{i:02d}:00", "%H:%M").time() for i in range(24)],
+            index=9,
+        )
+    date = datetime.combine(date, time)
+    supported_frameworks = [
+        framework for framework in AgentFramework if _is_tracing_supported(framework)
+    ]
+    framework = st.selectbox(
+        "Select the agent framework to use",
+        supported_frameworks,
+        index=2,
+        format_func=lambda x: x.name,
+    )
+    model_id = st.selectbox(
+        "Select the model to use",
+        MODEL_OPTIONS,
+        index=1,
+        format_func=lambda x: "/".join(x.split("/")[-3:]),
+    )
+    # Add evaluation case section
+    with st.expander("Custom Evaluation"):
+        evaluation_model_id = st.selectbox(
+            "Select the model to use for LLM-as-a-Judge evaluation",
+            MODEL_OPTIONS,
+            index=2,
+            format_func=lambda x: "/".join(x.split("/")[-3:]),
+        )
+        evaluation_case = copy.deepcopy(DEFAULT_EVALUATION_CASE)
+        evaluation_case.llm_judge = evaluation_model_id
+        # make this an editable json section
+        # convert the checkpoints to a df series so that it can be edited
+        checkpoints = evaluation_case.checkpoints
+        checkpoints_df = pd.DataFrame(
+            [checkpoint.model_dump() for checkpoint in checkpoints]
+        )
+        checkpoints_df = st.data_editor(
+            checkpoints_df,
+            column_config={
+                "points": st.column_config.NumberColumn(label="Points"),
+                "criteria": st.column_config.TextColumn(label="Criteria"),
+            },
+            hide_index=True,
+            num_rows="dynamic",
+        )
+        # for each checkpoint, convert it back to a CheckpointCriteria object
+        new_ckpts = []
+        # don't let a user add more than 20 checkpoints
+        if len(checkpoints_df) > 20:
+            st.error(
+                "You can only add up to 20 checkpoints for the purpose of this demo."
+            )
+            checkpoints_df = checkpoints_df[:20]
+        for _, row in checkpoints_df.iterrows():
+            if row["criteria"] == "":
+                continue
+            try:
+                # Don't let people write essays for criteria in this demo
+                if len(row["criteria"].split(" ")) > 100:
+                    raise ValueError("Criteria is too long")
+                new_crit = CheckpointCriteria(
+                    criteria=row["criteria"], points=row["points"]
+                )
+                new_ckpts.append(new_crit)
+            except Exception as e:
+                st.error(f"Error creating checkpoint: {e}")
+        evaluation_case.checkpoints = new_ckpts
+    return UserInputs(
+        model_id=model_id,
+        location=location,
+        max_driving_hours=max_driving_hours,
+        date=date,
+        framework=framework,
+        evaluation_case=evaluation_case,
+        run_evaluation=st.checkbox("Run Evaluation", value=True),
+    )

components/sidebar.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from components.inputs import UserInputs, get_user_inputs
+import streamlit as st
+def ssf_sidebar() -> UserInputs:
+    st.markdown("### Configuration")
+    st.markdown("Built using [Any-Agent](https://github.com/mozilla-ai/any-agent)")
+    user_inputs = get_user_inputs()
+    return user_inputs

constants.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+from any_agent.evaluation import EvaluationCase
+from surf_spot_finder.tools import (
+    get_area_lat_lon,
+    get_wave_forecast,
+    get_wind_forecast,
+)
+from any_agent.logging import logger
+from any_agent.tools.web_browsing import search_web, visit_webpage, search_tavily
+MODEL_OPTIONS = [
+    # "huggingface/novita/deepseek-ai/DeepSeek-V3",
+    # "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
+    "openai/gpt-4.1-nano",
+    "openai/gpt-4.1-mini",
+    "openai/gpt-4o",
+    "gemini/gemini-2.0-flash-lite",
+    "gemini/gemini-2.0-flash",
+    # "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling
+]
+# Novita was the only HF based provider that worked.
+# Hugginface API Provider Error:
+# Must alternate between assistant/user, which meant that the 'tool' role made it puke
+DEFAULT_EVALUATION_CASE = EvaluationCase(
+    llm_judge=MODEL_OPTIONS[0],
+    checkpoints=[
+        {
+            "criteria": "Check if the agent considered at least three surf spot options",
+            "points": 1,
+        },
+        {
+            "criteria": "Check if the agent gathered wind forecasts for each surf spot being evaluated.",
+            "points": 1,
+        },
+        {
+            "criteria": "Check if the agent gathered wave forecasts for each surf spot being evaluated.",
+            "points": 1,
+        },
+        {
+            "criteria": "Check if the agent used any web search tools to explore which surf spots should be considered",
+            "points": 1,
+        },
+        {
+            "criteria": "Check if the final answer contains any description about the weather (air temp, chance of rain, etc) at the chosen location",
+            "points": 1,
+        },
+        {
+            "criteria": "Check if the final answer includes one of the surf spots evaluated by tools",
+            "points": 1,
+        },
+        {
+            "criteria": "Check if the final answer includes information about some alternative surf spots if the user is not satisfied with the chosen one",
+            "points": 1,
+        },
+    ],
+)
+DEFAULT_TOOLS = [
+    get_wind_forecast,
+    get_wave_forecast,
+    get_area_lat_lon,
+    search_web,
+    visit_webpage,
+]
+if os.getenv("TAVILY_API_KEY"):
+    DEFAULT_TOOLS.append(search_tavily)
+else:
+    logger.warning("TAVILY_API_KEY not set, skipping Tavily search tool")

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
-altair
-pandas
-streamlit

+streamlit
+openai-agents>=0.0.14
+any-agent[all]==0.15.0
+surf-spot-finder @ git+https://github.com/mozilla-ai/surf-spot-finder@7953016f71e7a96870233524b7a75878bd38f214
+nest_asyncio

services/__init__.py ADDED Viewed

File without changes

services/agent.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import json
+from components.inputs import UserInputs
+from constants import DEFAULT_TOOLS
+from components.agent_status import export_logs
+import streamlit as st
+from surf_spot_finder.config import Config
+from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
+from any_agent.tracing.trace import AgentTrace, AgentSpan
+from any_agent.tracing.otel_types import StatusCode
+from any_agent.evaluation import evaluate, TraceEvaluationResult
+async def display_evaluation_results(result: TraceEvaluationResult):
+    if result.ground_truth_result is not None:
+        all_results = [*result.checkpoint_results, result.ground_truth_result]
+    else:
+        all_results = result.checkpoint_results
+    # Create columns for better layout
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("#### Criteria Results")
+        for checkpoint in all_results:
+            if checkpoint.passed:
+                st.success(f"✅ {checkpoint.criteria}")
+            else:
+                st.error(f"❌ {checkpoint.criteria}")
+    with col2:
+        st.markdown("#### Overall Score")
+        total_points = sum([result.points for result in all_results])
+        if total_points == 0:
+            msg = "Total points is 0, cannot calculate score."
+            raise ValueError(msg)
+        passed_points = sum([result.points for result in all_results if result.passed])
+        # Create a nice score display
+        st.markdown(f"### {passed_points}/{total_points}")
+        percentage = (passed_points / total_points) * 100
+        st.progress(percentage / 100)
+        st.markdown(f"**{percentage:.1f}%**")
+async def evaluate_agent(
+    config: Config, agent_trace: AgentTrace
+) -> TraceEvaluationResult:
+    assert (
+        len(config.evaluation_cases) == 1
+    ), "Only one evaluation case is supported in the demo"
+    st.markdown("### 📊 Evaluation Results")
+    with st.spinner("Evaluating results..."):
+        case = config.evaluation_cases[0]
+        result: TraceEvaluationResult = evaluate(
+            evaluation_case=case,
+            trace=agent_trace,
+            agent_framework=config.framework,
+        )
+    return result
+async def configure_agent(user_inputs: UserInputs) -> tuple[AnyAgent, Config]:
+    if "huggingface" in user_inputs.model_id:
+        model_args = {
+            "extra_headers": {"X-HF-Bill-To": "mozilla-ai"},
+            "temperature": 0.0,
+        }
+    else:
+        model_args = {}
+    if user_inputs.framework == AgentFramework.AGNO:
+        agent_args = {"tool_call_limit": 20}
+    else:
+        agent_args = {}
+    agent_config = AgentConfig(
+        model_id=user_inputs.model_id,
+        model_args=model_args,
+        agent_args=agent_args,
+        tools=DEFAULT_TOOLS,
+    )
+    config = Config(
+        location=user_inputs.location,
+        max_driving_hours=user_inputs.max_driving_hours,
+        date=user_inputs.date,
+        framework=user_inputs.framework,
+        main_agent=agent_config,
+        managed_agents=[],
+        evaluation_cases=[user_inputs.evaluation_case],
+    )
+    agent = await AnyAgent.create_async(
+        agent_framework=config.framework,
+        agent_config=config.main_agent,
+        managed_agents=config.managed_agents,
+        tracing=TracingConfig(console=True, cost_info=True),
+    )
+    return agent, config
+async def display_output(agent_trace: AgentTrace):
+    # Display the agent trace in a more organized way
+    with st.expander("### 🧩 Agent Trace"):
+        for span in agent_trace.spans:
+            # Header with name and status
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                st.markdown(f"**{span.name}**")
+                if span.attributes:
+                    # st.json(span.attributes, expanded=False)
+                    if "input.value" in span.attributes:
+                        try:
+                            input_value = json.loads(span.attributes["input.value"])
+                            if isinstance(input_value, list) and len(input_value) > 0:
+                                st.write(f"Input: {input_value[-1]}")
+                            else:
+                                st.write(f"Input: {input_value}")
+                        except Exception:  # noqa: E722
+                            st.write(f"Input: {span.attributes['input.value']}")
+                    if "output.value" in span.attributes:
+                        try:
+                            output_value = json.loads(span.attributes["output.value"])
+                            if isinstance(output_value, list) and len(output_value) > 0:
+                                st.write(f"Output: {output_value[-1]}")
+                            else:
+                                st.write(f"Output: {output_value}")
+                        except Exception:  # noqa: E722
+                            st.write(f"Output: {span.attributes['output.value']}")
+            with col2:
+                status_color = (
+                    "green" if span.status.status_code == StatusCode.OK else "red"
+                )
+                st.markdown(
+                    f"<span style='color: {status_color}'>● {span.status.status_code.name}</span>",
+                    unsafe_allow_html=True,
+                )
+    with st.expander("### 🏄 Results", expanded=True):
+        time_col, cost_col, tokens_col = st.columns(3)
+        duration = agent_trace.duration.total_seconds()
+        with time_col:
+            st.info(f"⏱️ Execution Time: {duration:0.2f} seconds")
+        with cost_col:
+            st.info(f"💰 Estimated Cost: ${agent_trace.cost.total_cost:.6f}")
+        with tokens_col:
+            st.info(f"📦 Total Tokens: {agent_trace.usage.total_tokens:,}")
+        st.markdown("#### Final Output")
+        st.info(agent_trace.final_output)
+async def run_agent(agent, config) -> AgentTrace:
+    st.markdown("#### 🔍 Running Surf Spot Finder with query")
+    query = config.input_prompt_template.format(
+        LOCATION=config.location,
+        MAX_DRIVING_HOURS=config.max_driving_hours,
+        DATE=config.date,
+    )
+    st.code(query, language="text")
+    kwargs = {}
+    if (
+        config.framework == AgentFramework.OPENAI
+        or config.framework == AgentFramework.TINYAGENT
+    ):
+        kwargs["max_turns"] = 20
+    elif config.framework == AgentFramework.SMOLAGENTS:
+        kwargs["max_steps"] = 20
+    if config.framework == AgentFramework.LANGCHAIN:
+        from langchain_core.runnables import RunnableConfig
+        kwargs["config"] = RunnableConfig(recursion_limit=20)
+    elif config.framework == AgentFramework.GOOGLE:
+        from google.adk.agents.run_config import RunConfig
+        kwargs["run_config"] = RunConfig(max_llm_calls=20)
+    with st.status("Agent is running...", expanded=False, state="running") as status:
+        def update_span(span: AgentSpan):
+            # Process input value
+            input_value = span.attributes.get("input.value", "")
+            if input_value:
+                try:
+                    parsed_input = json.loads(input_value)
+                    if isinstance(parsed_input, list) and len(parsed_input) > 0:
+                        input_value = str(parsed_input[-1])
+                except Exception:
+                    pass
+            # Process output value
+            output_value = span.attributes.get("output.value", "")
+            if output_value:
+                try:
+                    parsed_output = json.loads(output_value)
+                    if isinstance(parsed_output, list) and len(parsed_output) > 0:
+                        output_value = str(parsed_output[-1])
+                except Exception:
+                    pass
+            # Truncate long values
+            max_length = 800
+            if len(input_value) > max_length:
+                input_value = f"[Truncated]...{input_value[-max_length:]}"
+            if len(output_value) > max_length:
+                output_value = f"[Truncated]...{output_value[-max_length:]}"
+            # Create a cleaner message format
+            if input_value or output_value:
+                message = f"Step: {span.name}\n"
+                if input_value:
+                    message += f"Input: {input_value}\n"
+                if output_value:
+                    message += f"Output: {output_value}"
+            else:
+                message = f"Step: {span.name}\n{span}"
+            status.update(label=message, expanded=False, state="running")
+        export_logs(agent, update_span)
+        agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
+        status.update(label="Finished!", expanded=False, state="complete")
+        agent.exit()
+        return agent_trace