Spaces:
Running
Running
File size: 4,530 Bytes
6fdc19a 62cf4ef 6fdc19a 62cf4ef 6fdc19a 62cf4ef 6fdc19a 62cf4ef 6fdc19a 62cf4ef 6fdc19a 62cf4ef 6fdc19a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
from datetime import datetime, timedelta
import json
import requests
import streamlit as st
from any_agent import AgentFramework
from any_agent.tracing.trace import _is_tracing_supported
from any_agent.evaluation import EvaluationCase
from constants import MODEL_OPTIONS
def create_evaluation_case() -> EvaluationCase:
"""Create an EvaluationCase from the user configuration.
Args:
case_config (dict): The evaluation case configuration from the user
Returns:
EvaluationCase: The created evaluation case
"""
return EvaluationCase(
llm_judge="openai/gpt-4.1-mini",
checkpoints=[
{
"criteria": "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools",
"points": 1,
},
{
"criteria": "Check if the agent used the get_wave_forecast tool and it succeeded",
"points": 1,
},
{
"criteria": "Check if the agent used the get_wind_forecast tool and it succeeded",
"points": 1,
},
{
"criteria": "Check if the agent used the get_area_lat_lon tool and it succeeded",
"points": 1,
},
{
"criteria": "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded",
"points": 1,
},
{
"criteria": "Check if the final answer contains any description about the weather at the chosen location",
"points": 1,
},
{
"criteria": "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool",
"points": 1,
},
{
"criteria": "Check that the agent completed in fewer than 10 steps",
"points": 1,
},
],
)
@st.cache_resource
def get_area(area_name: str) -> dict:
"""Get the area from Nominatim.
Uses the [Nominatim API](https://nominatim.org/release-docs/develop/api/Search/).
Args:
area_name (str): The name of the area.
Returns:
dict: The area found.
"""
response = requests.get(
f"https://nominatim.openstreetmap.org/search?q={area_name}&format=json",
headers={"User-Agent": "Mozilla/5.0"},
timeout=5,
)
response.raise_for_status()
response_json = json.loads(response.content.decode())
return response_json
def get_user_inputs() -> dict:
default_val = "Los Angeles California, US"
col1, col2 = st.columns([3, 1])
with col1:
location = st.text_input("Enter a location", value=default_val)
with col2:
if location:
location_check = get_area(location)
if not location_check:
st.error("β")
else:
st.success("β
")
max_driving_hours = st.number_input(
"Enter the maximum driving hours", min_value=1, value=2
)
col_date, col_time = st.columns([2, 1])
with col_date:
date = st.date_input(
"Select a date in the future", value=datetime.now() + timedelta(days=1)
)
with col_time:
# default to 9am
time = st.time_input(
"Select a time", value=datetime.now().time().replace(hour=9, minute=0)
)
date = datetime.combine(date, time)
supported_frameworks = [
framework for framework in AgentFramework if _is_tracing_supported(framework)
]
framework = st.selectbox(
"Select the agent framework to use",
supported_frameworks,
index=2,
format_func=lambda x: x.name,
)
model_id = st.selectbox(
"Select the model to use",
MODEL_OPTIONS,
index=0,
format_func=lambda x: "/".join(x.split("/")[-3:]),
)
# Add evaluation case section
with st.expander("Evaluation Case"):
evaluation_case = create_evaluation_case()
st.write(evaluation_case.model_dump(), expanded=True)
return {
"location": location,
"max_driving_hours": max_driving_hours,
"date": date,
"framework": framework,
"model_id": model_id,
"evaluation_case": evaluation_case
if st.checkbox("Run Evaluation", value=True)
else None,
}
|