Nathan Brake commited on
Commit
ffb4e87
·
unverified ·
1 Parent(s): 7c69831

indexed yaml (this is going to support the eval config framework) (#29)

Browse files
examples/langchain_single_agent.yaml CHANGED
@@ -1,9 +1,11 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: o3-mini
5
- agent_type: langchain
6
- tools:
7
- - "surf_spot_finder.tools.search_web"
8
- - "surf_spot_finder.tools.visit_webpage"
9
- # input_prompt_template:
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ # input_prompt_template:
6
+ agent:
7
+ model_id: o3-mini
8
+ agent_type: langchain
9
+ tools:
10
+ - "surf_spot_finder.tools.search_web"
11
+ - "surf_spot_finder.tools.visit_webpage"
examples/langchain_single_agent_vertical.yaml CHANGED
@@ -1,14 +1,16 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: o3-mini
5
- agent_type: langchain
6
- tools:
7
- - "surf_spot_finder.tools.driving_hours_to_meters"
8
- - "surf_spot_finder.tools.get_area_lat_lon"
9
- - "surf_spot_finder.tools.get_surfing_spots"
10
- - "surf_spot_finder.tools.get_wave_forecast"
11
- - "surf_spot_finder.tools.get_wind_forecast"
12
- - "surf_spot_finder.tools.search_web"
13
- - "surf_spot_finder.tools.visit_webpage"
14
- # input_prompt_template:
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ # input_prompt_template:
6
+ agent:
7
+ model_id: o3-mini
8
+ agent_type: langchain
9
+ tools:
10
+ - "surf_spot_finder.tools.driving_hours_to_meters"
11
+ - "surf_spot_finder.tools.get_area_lat_lon"
12
+ - "surf_spot_finder.tools.get_surfing_spots"
13
+ - "surf_spot_finder.tools.get_wave_forecast"
14
+ - "surf_spot_finder.tools.get_wind_forecast"
15
+ - "surf_spot_finder.tools.search_web"
16
+ - "surf_spot_finder.tools.visit_webpage"
examples/openai_multi_agent.yaml CHANGED
@@ -1,6 +1,8 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: o3-mini
5
- agent_type: openai_multi_agent
 
 
6
  # input_prompt_template:
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ agent:
6
+ model_id: o3-mini
7
+ agent_type: openai_multi_agent
8
  # input_prompt_template:
examples/openai_single_agent.yaml CHANGED
@@ -1,9 +1,11 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: o3-mini
5
- agent_type: openai
6
- tools:
7
- - "surf_spot_finder.tools.search_web"
8
- - "surf_spot_finder.tools.visit_webpage"
9
- # input_prompt_template:
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ # input_prompt_template:
6
+ agent:
7
+ model_id: o3-mini
8
+ agent_type: openai
9
+ tools:
10
+ - "surf_spot_finder.tools.search_web"
11
+ - "surf_spot_finder.tools.visit_webpage"
examples/openai_single_agent_vertical.yaml CHANGED
@@ -1,15 +1,17 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: o3-mini
5
- agent_type: openai
6
- tools:
7
- - "surf_spot_finder.tools.driving_hours_to_meters"
8
- - "surf_spot_finder.tools.get_area_lat_lon"
9
- - "surf_spot_finder.tools.get_surfing_spots"
10
- - "surf_spot_finder.tools.get_wave_forecast"
11
- - "surf_spot_finder.tools.get_wind_forecast"
12
- - "surf_spot_finder.tools.search_web"
13
- - "surf_spot_finder.tools.show_plan"
14
- - "surf_spot_finder.tools.visit_webpage"
15
- # input_prompt_template:
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-26 12:00
4
+ max_driving_hours: 2
5
+ # input_prompt_template:
6
+ agent:
7
+ model_id: o3-mini
8
+ agent_type: openai
9
+ tools:
10
+ - "surf_spot_finder.tools.driving_hours_to_meters"
11
+ - "surf_spot_finder.tools.get_area_lat_lon"
12
+ - "surf_spot_finder.tools.get_surfing_spots"
13
+ - "surf_spot_finder.tools.get_wave_forecast"
14
+ - "surf_spot_finder.tools.get_wind_forecast"
15
+ - "surf_spot_finder.tools.search_web"
16
+ - "surf_spot_finder.tools.show_plan"
17
+ - "surf_spot_finder.tools.visit_webpage"
examples/smolagents_single_agent.yaml CHANGED
@@ -1,7 +1,9 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: openai/o3-mini
5
- api_key_var: OPENAI_API_KEY
6
- agent_type: smolagents
7
- # input_prompt_template:
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ # input_prompt_template:
6
+ agent:
7
+ model_id: openai/o3-mini
8
+ api_key_var: OPENAI_API_KEY
9
+ agent_type: smolagents
examples/smolagents_single_agent_mcp.yaml CHANGED
@@ -1,11 +1,12 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: openai/gpt-3.5-turbo
5
- api_key_var: OPENAI_API_KEY
6
- agent_type: smolagents
7
- tools:
8
- - "smolagents.DuckDuckGoSearchTool"
9
- - "mcp/fetch"
10
-
11
- # input_prompt_template:
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ # input_prompt_template:
6
+ agent:
7
+ model_id: openai/gpt-3.5-turbo
8
+ api_key_var: OPENAI_API_KEY
9
+ agent_type: smolagents
10
+ tools:
11
+ - "smolagents.DuckDuckGoSearchTool"
12
+ - "mcp/fetch"
examples/smolagents_single_agent_vertical.yaml CHANGED
@@ -1,17 +1,19 @@
1
- location: Pontevedra
2
- date: 2025-03-22 12:00
3
- max_driving_hours: 2
4
- model_id: openai/o3-mini
5
- api_key_var: OPENAI_API_KEY
6
- agent_type: smolagents
7
- tools:
8
- - "surf_spot_finder.tools.driving_hours_to_meters"
9
- - "surf_spot_finder.tools.get_area_lat_lon"
10
- - "surf_spot_finder.tools.get_surfing_spots"
11
- - "surf_spot_finder.tools.get_wave_forecast"
12
- - "surf_spot_finder.tools.get_wind_forecast"
13
- - "surf_spot_finder.tools.search_web"
14
- - "surf_spot_finder.tools.visit_webpage"
15
- - "smolagents.PythonInterpreterTool"
16
- - "smolagents.FinalAnswerTool"
17
- # input_prompt_template:
 
 
 
1
+ input:
2
+ location: Pontevedra
3
+ date: 2025-03-27 12:00
4
+ max_driving_hours: 2
5
+ # input_prompt_template:
6
+ agent:
7
+ model_id: openai/o1
8
+ api_key_var: OPENAI_API_KEY
9
+ agent_type: smolagents
10
+ tools:
11
+ - "surf_spot_finder.tools.driving_hours_to_meters"
12
+ - "surf_spot_finder.tools.get_area_lat_lon"
13
+ - "surf_spot_finder.tools.get_surfing_spots"
14
+ - "surf_spot_finder.tools.get_wave_forecast"
15
+ - "surf_spot_finder.tools.get_wind_forecast"
16
+ - "surf_spot_finder.tools.search_web"
17
+ - "surf_spot_finder.tools.visit_webpage"
18
+ - "smolagents.PythonInterpreterTool"
19
+ - "smolagents.FinalAnswerTool"
src/surf_spot_finder/cli.py CHANGED
@@ -1,7 +1,5 @@
1
- from pathlib import Path
2
  from typing import Optional
3
 
4
- import yaml
5
  from fire import Fire
6
  from loguru import logger
7
 
@@ -55,7 +53,7 @@ def find_surf_spot(
55
  """
56
  if from_config:
57
  logger.info(f"Loading {from_config}")
58
- config = Config.model_validate(yaml.safe_load(Path(from_config).read_text()))
59
  else:
60
  config = Config(
61
  location=location,
 
 
1
  from typing import Optional
2
 
 
3
  from fire import Fire
4
  from loguru import logger
5
 
 
53
  """
54
  if from_config:
55
  logger.info(f"Loading {from_config}")
56
+ config = Config.from_yaml(from_config)
57
  else:
58
  config = Config(
59
  location=location,
src/surf_spot_finder/config.py CHANGED
@@ -1,5 +1,6 @@
1
  from typing import Annotated, Optional
2
  from pydantic import AfterValidator, BaseModel, FutureDatetime, PositiveInt
 
3
 
4
  from surf_spot_finder.prompts.shared import INPUT_PROMPT
5
 
@@ -31,3 +32,30 @@ class Config(BaseModel):
31
  json_tracer: bool = True
32
  api_base: Optional[str] = None
33
  tools: Optional[list[str]] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Annotated, Optional
2
  from pydantic import AfterValidator, BaseModel, FutureDatetime, PositiveInt
3
+ import yaml
4
 
5
  from surf_spot_finder.prompts.shared import INPUT_PROMPT
6
 
 
32
  json_tracer: bool = True
33
  api_base: Optional[str] = None
34
  tools: Optional[list[str]] = None
35
+
36
+ @classmethod
37
+ def from_yaml(cls, yaml_path: str) -> "Config":
38
+ """
39
+ Create a Config instance from a YAML file.
40
+
41
+ Args:
42
+ yaml_path: Path to the YAML configuration file
43
+
44
+ Returns:
45
+ Config: A new Config instance populated with values from the YAML file
46
+ """
47
+ with open(yaml_path, "r") as f:
48
+ data = yaml.safe_load(f)
49
+
50
+ # Extract and flatten the nested structure
51
+ config_dict = {}
52
+
53
+ # Add input parameters
54
+ if "input" in data:
55
+ config_dict.update(data["input"])
56
+
57
+ # Add agent parameters
58
+ if "agent" in data:
59
+ config_dict.update(data["agent"])
60
+ # Create instance from the flattened dictionary
61
+ return cls(**config_dict)
src/surf_spot_finder/evaluation/evaluate.py CHANGED
@@ -24,18 +24,19 @@ logger.add(sys.stdout, colorize=True, format="{message}")
24
 
25
  def run_agent(test_case: TestCase) -> str:
26
  input_data = test_case.input
 
27
  logger.info("Loading config")
28
  config = Config(
29
  location=input_data.location,
30
  date=input_data.date,
31
  max_driving_hours=input_data.max_driving_hours,
32
- model_id=input_data.model_id,
33
- api_key_var=input_data.api_key_var,
34
  prompt=INPUT_PROMPT,
35
  json_tracer=input_data.json_tracer,
36
- api_base=input_data.api_base,
37
- agent_type=input_data.agent_type,
38
- tools=input_data.tools,
39
  )
40
  return find_surf_spot(
41
  location=config.location,
 
24
 
25
  def run_agent(test_case: TestCase) -> str:
26
  input_data = test_case.input
27
+ agent_config = test_case.agent
28
  logger.info("Loading config")
29
  config = Config(
30
  location=input_data.location,
31
  date=input_data.date,
32
  max_driving_hours=input_data.max_driving_hours,
33
+ model_id=agent_config.model_id,
34
+ api_key_var=agent_config.api_key_var,
35
  prompt=INPUT_PROMPT,
36
  json_tracer=input_data.json_tracer,
37
+ api_base=agent_config.api_base,
38
+ agent_type=agent_config.agent_type,
39
+ tools=agent_config.tools,
40
  )
41
  return find_surf_spot(
42
  location=config.location,
src/surf_spot_finder/evaluation/test_case.py CHANGED
@@ -10,9 +10,12 @@ class InputModel(BaseModel):
10
  location: str
11
  date: str
12
  max_driving_hours: int
 
 
 
 
13
  model_id: str
14
  api_key_var: str
15
- json_tracer: bool
16
  api_base: Optional[str] = None
17
  agent_type: str
18
  tools: Optional[List[str]] = None
@@ -29,6 +32,7 @@ class CheckpointCriteria(BaseModel):
29
  class TestCase(BaseModel):
30
  model_config = ConfigDict(extra="forbid")
31
  input: InputModel
 
32
  ground_truth: List[Dict[str, Any]] = Field(default_factory=list)
33
  checkpoints: List[CheckpointCriteria] = Field(default_factory=list)
34
  final_answer_criteria: List[CheckpointCriteria] = Field(default_factory=list)
 
10
  location: str
11
  date: str
12
  max_driving_hours: int
13
+ json_tracer: bool
14
+
15
+
16
+ class AgentModel(BaseModel):
17
  model_id: str
18
  api_key_var: str
 
19
  api_base: Optional[str] = None
20
  agent_type: str
21
  tools: Optional[List[str]] = None
 
32
  class TestCase(BaseModel):
33
  model_config = ConfigDict(extra="forbid")
34
  input: InputModel
35
+ agent: AgentModel
36
  ground_truth: List[Dict[str, Any]] = Field(default_factory=list)
37
  checkpoints: List[CheckpointCriteria] = Field(default_factory=list)
38
  final_answer_criteria: List[CheckpointCriteria] = Field(default_factory=list)
src/surf_spot_finder/evaluation/test_cases/alpha.yaml CHANGED
@@ -6,23 +6,12 @@ input:
6
  location: "Vigo"
7
  date: "2025-03-27 22:00"
8
  max_driving_hours: 3
9
- api_key_var: "OPENAI_API_KEY"
10
  json_tracer: true
 
 
11
  api_base: null
12
- # model_id: "openai/o1"
13
- # agent_type: "smolagents"
14
- # tools:
15
- # - "surf_spot_finder.tools.driving_hours_to_meters"
16
- # - "surf_spot_finder.tools.get_area_lat_lon"
17
- # - "surf_spot_finder.tools.get_surfing_spots"
18
- # - "surf_spot_finder.tools.get_wave_forecast"
19
- # - "surf_spot_finder.tools.get_wind_forecast"
20
- # - "surf_spot_finder.tools.search_web"
21
- # - "surf_spot_finder.tools.visit_webpage"
22
- # - "smolagents.PythonInterpreterTool"
23
- # - "smolagents.FinalAnswerTool"
24
- agent_type: langchain
25
- model_id: o1
26
  tools:
27
  - "surf_spot_finder.tools.driving_hours_to_meters"
28
  - "surf_spot_finder.tools.get_area_lat_lon"
@@ -31,17 +20,10 @@ input:
31
  - "surf_spot_finder.tools.get_wind_forecast"
32
  - "surf_spot_finder.tools.search_web"
33
  - "surf_spot_finder.tools.visit_webpage"
34
- # model_id: o3-mini
35
- # agent_type: openai
36
- # tools:
37
- # - "surf_spot_finder.tools.driving_hours_to_meters"
38
- # - "surf_spot_finder.tools.get_area_lat_lon"
39
- # - "surf_spot_finder.tools.get_surfing_spots"
40
- # - "surf_spot_finder.tools.get_wave_forecast"
41
- # - "surf_spot_finder.tools.get_wind_forecast"
42
- # - "surf_spot_finder.tools.search_web"
43
- # - "surf_spot_finder.tools.show_plan"
44
- # - "surf_spot_finder.tools.visit_webpage"
45
  ground_truth:
46
  - name: "Surf location"
47
  points: 5
 
6
  location: "Vigo"
7
  date: "2025-03-27 22:00"
8
  max_driving_hours: 3
 
9
  json_tracer: true
10
+ agent:
11
+ api_key_var: "OPENAI_API_KEY"
12
  api_base: null
13
+ model_id: "openai/o1"
14
+ agent_type: "smolagents"
 
 
 
 
 
 
 
 
 
 
 
 
15
  tools:
16
  - "surf_spot_finder.tools.driving_hours_to_meters"
17
  - "surf_spot_finder.tools.get_area_lat_lon"
 
20
  - "surf_spot_finder.tools.get_wind_forecast"
21
  - "surf_spot_finder.tools.search_web"
22
  - "surf_spot_finder.tools.visit_webpage"
23
+ - "smolagents.PythonInterpreterTool"
24
+ - "smolagents.FinalAnswerTool"
25
+
26
+
 
 
 
 
 
 
 
27
  ground_truth:
28
  - name: "Surf location"
29
  points: 5