Spaces:
Running
Running
Nathan Brake
commited on
indexed yaml (this is going to support the eval config framework) (#29)
Browse files- examples/langchain_single_agent.yaml +11 -9
- examples/langchain_single_agent_vertical.yaml +16 -14
- examples/openai_multi_agent.yaml +7 -5
- examples/openai_single_agent.yaml +11 -9
- examples/openai_single_agent_vertical.yaml +17 -15
- examples/smolagents_single_agent.yaml +9 -7
- examples/smolagents_single_agent_mcp.yaml +12 -11
- examples/smolagents_single_agent_vertical.yaml +19 -17
- src/surf_spot_finder/cli.py +1 -3
- src/surf_spot_finder/config.py +28 -0
- src/surf_spot_finder/evaluation/evaluate.py +6 -5
- src/surf_spot_finder/evaluation/test_case.py +5 -1
- src/surf_spot_finder/evaluation/test_cases/alpha.yaml +8 -26
examples/langchain_single_agent.yaml
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
# input_prompt_template:
|
6 |
+
agent:
|
7 |
+
model_id: o3-mini
|
8 |
+
agent_type: langchain
|
9 |
+
tools:
|
10 |
+
- "surf_spot_finder.tools.search_web"
|
11 |
+
- "surf_spot_finder.tools.visit_webpage"
|
examples/langchain_single_agent_vertical.yaml
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
# input_prompt_template:
|
6 |
+
agent:
|
7 |
+
model_id: o3-mini
|
8 |
+
agent_type: langchain
|
9 |
+
tools:
|
10 |
+
- "surf_spot_finder.tools.driving_hours_to_meters"
|
11 |
+
- "surf_spot_finder.tools.get_area_lat_lon"
|
12 |
+
- "surf_spot_finder.tools.get_surfing_spots"
|
13 |
+
- "surf_spot_finder.tools.get_wave_forecast"
|
14 |
+
- "surf_spot_finder.tools.get_wind_forecast"
|
15 |
+
- "surf_spot_finder.tools.search_web"
|
16 |
+
- "surf_spot_finder.tools.visit_webpage"
|
examples/openai_multi_agent.yaml
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
6 |
# input_prompt_template:
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
agent:
|
6 |
+
model_id: o3-mini
|
7 |
+
agent_type: openai_multi_agent
|
8 |
# input_prompt_template:
|
examples/openai_single_agent.yaml
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
# input_prompt_template:
|
6 |
+
agent:
|
7 |
+
model_id: o3-mini
|
8 |
+
agent_type: openai
|
9 |
+
tools:
|
10 |
+
- "surf_spot_finder.tools.search_web"
|
11 |
+
- "surf_spot_finder.tools.visit_webpage"
|
examples/openai_single_agent_vertical.yaml
CHANGED
@@ -1,15 +1,17 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-26 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
# input_prompt_template:
|
6 |
+
agent:
|
7 |
+
model_id: o3-mini
|
8 |
+
agent_type: openai
|
9 |
+
tools:
|
10 |
+
- "surf_spot_finder.tools.driving_hours_to_meters"
|
11 |
+
- "surf_spot_finder.tools.get_area_lat_lon"
|
12 |
+
- "surf_spot_finder.tools.get_surfing_spots"
|
13 |
+
- "surf_spot_finder.tools.get_wave_forecast"
|
14 |
+
- "surf_spot_finder.tools.get_wind_forecast"
|
15 |
+
- "surf_spot_finder.tools.search_web"
|
16 |
+
- "surf_spot_finder.tools.show_plan"
|
17 |
+
- "surf_spot_finder.tools.visit_webpage"
|
examples/smolagents_single_agent.yaml
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
# input_prompt_template:
|
6 |
+
agent:
|
7 |
+
model_id: openai/o3-mini
|
8 |
+
api_key_var: OPENAI_API_KEY
|
9 |
+
agent_type: smolagents
|
examples/smolagents_single_agent_mcp.yaml
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
# input_prompt_template:
|
6 |
+
agent:
|
7 |
+
model_id: openai/gpt-3.5-turbo
|
8 |
+
api_key_var: OPENAI_API_KEY
|
9 |
+
agent_type: smolagents
|
10 |
+
tools:
|
11 |
+
- "smolagents.DuckDuckGoSearchTool"
|
12 |
+
- "mcp/fetch"
|
examples/smolagents_single_agent_vertical.yaml
CHANGED
@@ -1,17 +1,19 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
1 |
+
input:
|
2 |
+
location: Pontevedra
|
3 |
+
date: 2025-03-27 12:00
|
4 |
+
max_driving_hours: 2
|
5 |
+
# input_prompt_template:
|
6 |
+
agent:
|
7 |
+
model_id: openai/o1
|
8 |
+
api_key_var: OPENAI_API_KEY
|
9 |
+
agent_type: smolagents
|
10 |
+
tools:
|
11 |
+
- "surf_spot_finder.tools.driving_hours_to_meters"
|
12 |
+
- "surf_spot_finder.tools.get_area_lat_lon"
|
13 |
+
- "surf_spot_finder.tools.get_surfing_spots"
|
14 |
+
- "surf_spot_finder.tools.get_wave_forecast"
|
15 |
+
- "surf_spot_finder.tools.get_wind_forecast"
|
16 |
+
- "surf_spot_finder.tools.search_web"
|
17 |
+
- "surf_spot_finder.tools.visit_webpage"
|
18 |
+
- "smolagents.PythonInterpreterTool"
|
19 |
+
- "smolagents.FinalAnswerTool"
|
src/surf_spot_finder/cli.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
from pathlib import Path
|
2 |
from typing import Optional
|
3 |
|
4 |
-
import yaml
|
5 |
from fire import Fire
|
6 |
from loguru import logger
|
7 |
|
@@ -55,7 +53,7 @@ def find_surf_spot(
|
|
55 |
"""
|
56 |
if from_config:
|
57 |
logger.info(f"Loading {from_config}")
|
58 |
-
config = Config.
|
59 |
else:
|
60 |
config = Config(
|
61 |
location=location,
|
|
|
|
|
1 |
from typing import Optional
|
2 |
|
|
|
3 |
from fire import Fire
|
4 |
from loguru import logger
|
5 |
|
|
|
53 |
"""
|
54 |
if from_config:
|
55 |
logger.info(f"Loading {from_config}")
|
56 |
+
config = Config.from_yaml(from_config)
|
57 |
else:
|
58 |
config = Config(
|
59 |
location=location,
|
src/surf_spot_finder/config.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from typing import Annotated, Optional
|
2 |
from pydantic import AfterValidator, BaseModel, FutureDatetime, PositiveInt
|
|
|
3 |
|
4 |
from surf_spot_finder.prompts.shared import INPUT_PROMPT
|
5 |
|
@@ -31,3 +32,30 @@ class Config(BaseModel):
|
|
31 |
json_tracer: bool = True
|
32 |
api_base: Optional[str] = None
|
33 |
tools: Optional[list[str]] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from typing import Annotated, Optional
|
2 |
from pydantic import AfterValidator, BaseModel, FutureDatetime, PositiveInt
|
3 |
+
import yaml
|
4 |
|
5 |
from surf_spot_finder.prompts.shared import INPUT_PROMPT
|
6 |
|
|
|
32 |
json_tracer: bool = True
|
33 |
api_base: Optional[str] = None
|
34 |
tools: Optional[list[str]] = None
|
35 |
+
|
36 |
+
@classmethod
|
37 |
+
def from_yaml(cls, yaml_path: str) -> "Config":
|
38 |
+
"""
|
39 |
+
Create a Config instance from a YAML file.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
yaml_path: Path to the YAML configuration file
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
Config: A new Config instance populated with values from the YAML file
|
46 |
+
"""
|
47 |
+
with open(yaml_path, "r") as f:
|
48 |
+
data = yaml.safe_load(f)
|
49 |
+
|
50 |
+
# Extract and flatten the nested structure
|
51 |
+
config_dict = {}
|
52 |
+
|
53 |
+
# Add input parameters
|
54 |
+
if "input" in data:
|
55 |
+
config_dict.update(data["input"])
|
56 |
+
|
57 |
+
# Add agent parameters
|
58 |
+
if "agent" in data:
|
59 |
+
config_dict.update(data["agent"])
|
60 |
+
# Create instance from the flattened dictionary
|
61 |
+
return cls(**config_dict)
|
src/surf_spot_finder/evaluation/evaluate.py
CHANGED
@@ -24,18 +24,19 @@ logger.add(sys.stdout, colorize=True, format="{message}")
|
|
24 |
|
25 |
def run_agent(test_case: TestCase) -> str:
|
26 |
input_data = test_case.input
|
|
|
27 |
logger.info("Loading config")
|
28 |
config = Config(
|
29 |
location=input_data.location,
|
30 |
date=input_data.date,
|
31 |
max_driving_hours=input_data.max_driving_hours,
|
32 |
-
model_id=
|
33 |
-
api_key_var=
|
34 |
prompt=INPUT_PROMPT,
|
35 |
json_tracer=input_data.json_tracer,
|
36 |
-
api_base=
|
37 |
-
agent_type=
|
38 |
-
tools=
|
39 |
)
|
40 |
return find_surf_spot(
|
41 |
location=config.location,
|
|
|
24 |
|
25 |
def run_agent(test_case: TestCase) -> str:
|
26 |
input_data = test_case.input
|
27 |
+
agent_config = test_case.agent
|
28 |
logger.info("Loading config")
|
29 |
config = Config(
|
30 |
location=input_data.location,
|
31 |
date=input_data.date,
|
32 |
max_driving_hours=input_data.max_driving_hours,
|
33 |
+
model_id=agent_config.model_id,
|
34 |
+
api_key_var=agent_config.api_key_var,
|
35 |
prompt=INPUT_PROMPT,
|
36 |
json_tracer=input_data.json_tracer,
|
37 |
+
api_base=agent_config.api_base,
|
38 |
+
agent_type=agent_config.agent_type,
|
39 |
+
tools=agent_config.tools,
|
40 |
)
|
41 |
return find_surf_spot(
|
42 |
location=config.location,
|
src/surf_spot_finder/evaluation/test_case.py
CHANGED
@@ -10,9 +10,12 @@ class InputModel(BaseModel):
|
|
10 |
location: str
|
11 |
date: str
|
12 |
max_driving_hours: int
|
|
|
|
|
|
|
|
|
13 |
model_id: str
|
14 |
api_key_var: str
|
15 |
-
json_tracer: bool
|
16 |
api_base: Optional[str] = None
|
17 |
agent_type: str
|
18 |
tools: Optional[List[str]] = None
|
@@ -29,6 +32,7 @@ class CheckpointCriteria(BaseModel):
|
|
29 |
class TestCase(BaseModel):
|
30 |
model_config = ConfigDict(extra="forbid")
|
31 |
input: InputModel
|
|
|
32 |
ground_truth: List[Dict[str, Any]] = Field(default_factory=list)
|
33 |
checkpoints: List[CheckpointCriteria] = Field(default_factory=list)
|
34 |
final_answer_criteria: List[CheckpointCriteria] = Field(default_factory=list)
|
|
|
10 |
location: str
|
11 |
date: str
|
12 |
max_driving_hours: int
|
13 |
+
json_tracer: bool
|
14 |
+
|
15 |
+
|
16 |
+
class AgentModel(BaseModel):
|
17 |
model_id: str
|
18 |
api_key_var: str
|
|
|
19 |
api_base: Optional[str] = None
|
20 |
agent_type: str
|
21 |
tools: Optional[List[str]] = None
|
|
|
32 |
class TestCase(BaseModel):
|
33 |
model_config = ConfigDict(extra="forbid")
|
34 |
input: InputModel
|
35 |
+
agent: AgentModel
|
36 |
ground_truth: List[Dict[str, Any]] = Field(default_factory=list)
|
37 |
checkpoints: List[CheckpointCriteria] = Field(default_factory=list)
|
38 |
final_answer_criteria: List[CheckpointCriteria] = Field(default_factory=list)
|
src/surf_spot_finder/evaluation/test_cases/alpha.yaml
CHANGED
@@ -6,23 +6,12 @@ input:
|
|
6 |
location: "Vigo"
|
7 |
date: "2025-03-27 22:00"
|
8 |
max_driving_hours: 3
|
9 |
-
api_key_var: "OPENAI_API_KEY"
|
10 |
json_tracer: true
|
|
|
|
|
11 |
api_base: null
|
12 |
-
|
13 |
-
|
14 |
-
# tools:
|
15 |
-
# - "surf_spot_finder.tools.driving_hours_to_meters"
|
16 |
-
# - "surf_spot_finder.tools.get_area_lat_lon"
|
17 |
-
# - "surf_spot_finder.tools.get_surfing_spots"
|
18 |
-
# - "surf_spot_finder.tools.get_wave_forecast"
|
19 |
-
# - "surf_spot_finder.tools.get_wind_forecast"
|
20 |
-
# - "surf_spot_finder.tools.search_web"
|
21 |
-
# - "surf_spot_finder.tools.visit_webpage"
|
22 |
-
# - "smolagents.PythonInterpreterTool"
|
23 |
-
# - "smolagents.FinalAnswerTool"
|
24 |
-
agent_type: langchain
|
25 |
-
model_id: o1
|
26 |
tools:
|
27 |
- "surf_spot_finder.tools.driving_hours_to_meters"
|
28 |
- "surf_spot_finder.tools.get_area_lat_lon"
|
@@ -31,17 +20,10 @@ input:
|
|
31 |
- "surf_spot_finder.tools.get_wind_forecast"
|
32 |
- "surf_spot_finder.tools.search_web"
|
33 |
- "surf_spot_finder.tools.visit_webpage"
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
# - "surf_spot_finder.tools.get_area_lat_lon"
|
39 |
-
# - "surf_spot_finder.tools.get_surfing_spots"
|
40 |
-
# - "surf_spot_finder.tools.get_wave_forecast"
|
41 |
-
# - "surf_spot_finder.tools.get_wind_forecast"
|
42 |
-
# - "surf_spot_finder.tools.search_web"
|
43 |
-
# - "surf_spot_finder.tools.show_plan"
|
44 |
-
# - "surf_spot_finder.tools.visit_webpage"
|
45 |
ground_truth:
|
46 |
- name: "Surf location"
|
47 |
points: 5
|
|
|
6 |
location: "Vigo"
|
7 |
date: "2025-03-27 22:00"
|
8 |
max_driving_hours: 3
|
|
|
9 |
json_tracer: true
|
10 |
+
agent:
|
11 |
+
api_key_var: "OPENAI_API_KEY"
|
12 |
api_base: null
|
13 |
+
model_id: "openai/o1"
|
14 |
+
agent_type: "smolagents"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
tools:
|
16 |
- "surf_spot_finder.tools.driving_hours_to_meters"
|
17 |
- "surf_spot_finder.tools.get_area_lat_lon"
|
|
|
20 |
- "surf_spot_finder.tools.get_wind_forecast"
|
21 |
- "surf_spot_finder.tools.search_web"
|
22 |
- "surf_spot_finder.tools.visit_webpage"
|
23 |
+
- "smolagents.PythonInterpreterTool"
|
24 |
+
- "smolagents.FinalAnswerTool"
|
25 |
+
|
26 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
ground_truth:
|
28 |
- name: "Surf location"
|
29 |
points: 5
|