Spaces:
Running
Running
hammaad-swe
commited on
Commit
·
fd5668b
1
Parent(s):
11534de
feat: gemini-model support
Browse files- .gradio/certificate.pem +31 -0
- agent.py +60 -0
- app.py +7 -5
- gaia_agent.py +0 -33
- logic.py +6 -5
- requirements.txt +4 -1
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
agent.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from smolagents import (
|
2 |
+
CodeAgent,
|
3 |
+
DuckDuckGoSearchTool,
|
4 |
+
FinalAnswerTool,
|
5 |
+
PythonInterpreterTool,
|
6 |
+
VisitWebpageTool,
|
7 |
+
WikipediaSearchTool,
|
8 |
+
tool,
|
9 |
+
)
|
10 |
+
from smolagents.models import Model
|
11 |
+
import google.generativeai as genai
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
import os
|
14 |
+
|
15 |
+
load_dotenv()
|
16 |
+
|
17 |
+
class GeminiModel(Model):
|
18 |
+
def __init__(self):
|
19 |
+
self.api_key = os.getenv("GEMINI_API_KEY")
|
20 |
+
self.model_name = os.getenv("GEMINI_MODEL", "gemini-pro")
|
21 |
+
|
22 |
+
if not self.api_key:
|
23 |
+
raise ValueError("GEMINI_API_KEY not found in .env")
|
24 |
+
|
25 |
+
genai.configure(api_key=self.api_key)
|
26 |
+
self.model = genai.GenerativeModel(self.model_name)
|
27 |
+
|
28 |
+
def complete(self, prompt: str) -> str:
|
29 |
+
try:
|
30 |
+
response = self.model.generate_content(prompt)
|
31 |
+
return response.text.strip() if hasattr(response, "text") else str(response)
|
32 |
+
except Exception as e:
|
33 |
+
return f"Error generating content: {e}"
|
34 |
+
|
35 |
+
class GaiaAgent:
|
36 |
+
"""
|
37 |
+
An agent designed to answer questions using a combination of tools,
|
38 |
+
including search engines, web page access, a Python interpreter, and more.
|
39 |
+
"""
|
40 |
+
|
41 |
+
def __init__(self):
|
42 |
+
print("GaiaAgent initialized with tools.")
|
43 |
+
|
44 |
+
gemini_model = GeminiModel()
|
45 |
+
|
46 |
+
tools = [
|
47 |
+
DuckDuckGoSearchTool(),
|
48 |
+
VisitWebpageTool(),
|
49 |
+
WikipediaSearchTool(),
|
50 |
+
PythonInterpreterTool(),
|
51 |
+
FinalAnswerTool(),
|
52 |
+
]
|
53 |
+
|
54 |
+
self.agent = CodeAgent(model=gemini_model, tools=tools)
|
55 |
+
|
56 |
+
def __call__(self, task_id: str, question: str) -> str:
|
57 |
+
print(f"Agent received {task_id=}\n{question[:50]=}...")
|
58 |
+
answer = self.agent.run(question)
|
59 |
+
print(f"Agent returning answer: {answer}")
|
60 |
+
return answer
|
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
|
3 |
-
import
|
4 |
import gradio as gr
|
5 |
import logic
|
6 |
import pandas as pd
|
@@ -9,7 +9,9 @@ from dotenv import load_dotenv
|
|
9 |
load_dotenv()
|
10 |
|
11 |
|
12 |
-
def run_and_submit_all(
|
|
|
|
|
13 |
"""Fetches all questions, runs the BasicAgent on them, submits all answers,
|
14 |
and displays the results.
|
15 |
|
@@ -39,7 +41,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
39 |
|
40 |
# 1. Instantiate Agent
|
41 |
try:
|
42 |
-
|
43 |
except Exception as e:
|
44 |
print(f"Error instantiating agent: {e}")
|
45 |
return f"Error initializing agent: {e}", None
|
@@ -51,7 +53,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
51 |
return str(e), None
|
52 |
|
53 |
# 3. Run the Agent
|
54 |
-
results_log, answers_payload = logic.run_agent(
|
55 |
if not answers_payload:
|
56 |
print("Agent did not produce any answers to submit.")
|
57 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
@@ -136,4 +138,4 @@ if __name__ == "__main__":
|
|
136 |
print("-" * (60 + len(" App Starting ")) + "\n")
|
137 |
|
138 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
139 |
-
gaia_ui.launch(debug=True, share=
|
|
|
1 |
import os
|
2 |
|
3 |
+
import agent
|
4 |
import gradio as gr
|
5 |
import logic
|
6 |
import pandas as pd
|
|
|
9 |
load_dotenv()
|
10 |
|
11 |
|
12 |
+
def run_and_submit_all(
|
13 |
+
profile: gr.OAuthProfile | None,
|
14 |
+
) -> tuple[str, pd.DataFrame | None]:
|
15 |
"""Fetches all questions, runs the BasicAgent on them, submits all answers,
|
16 |
and displays the results.
|
17 |
|
|
|
41 |
|
42 |
# 1. Instantiate Agent
|
43 |
try:
|
44 |
+
gaia_agent = agent.GaiaAgent()
|
45 |
except Exception as e:
|
46 |
print(f"Error instantiating agent: {e}")
|
47 |
return f"Error initializing agent: {e}", None
|
|
|
53 |
return str(e), None
|
54 |
|
55 |
# 3. Run the Agent
|
56 |
+
results_log, answers_payload = logic.run_agent(gaia_agent, questions_data)
|
57 |
if not answers_payload:
|
58 |
print("Agent did not produce any answers to submit.")
|
59 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
138 |
print("-" * (60 + len(" App Starting ")) + "\n")
|
139 |
|
140 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
141 |
+
gaia_ui.launch(debug=True, share=True)
|
gaia_agent.py
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
class GaiaAgent:
|
2 |
-
"""
|
3 |
-
A basic agent that receives a question and returns a fixed answer.
|
4 |
-
|
5 |
-
This class serves as a placeholder or a simple baseline agent for testing
|
6 |
-
and demonstration purposes. It does not perform any sophisticated
|
7 |
-
reasoning or information retrieval.
|
8 |
-
"""
|
9 |
-
|
10 |
-
def __init__(self):
|
11 |
-
"""
|
12 |
-
Initializes the GaiaAgent.
|
13 |
-
|
14 |
-
Currently, this constructor simply prints a message to the console.
|
15 |
-
In a more complex implementation, this method might load a model,
|
16 |
-
connect to a database, or perform other setup tasks.
|
17 |
-
"""
|
18 |
-
print("BasicAgent initialized.")
|
19 |
-
|
20 |
-
def __call__(self, question: str) -> str:
|
21 |
-
"""
|
22 |
-
Processes a question and returns a fixed answer.
|
23 |
-
|
24 |
-
Args:
|
25 |
-
question: The question to be processed.
|
26 |
-
|
27 |
-
Returns:
|
28 |
-
A fixed string representing the agent's answer.
|
29 |
-
"""
|
30 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
31 |
-
fixed_answer = "This is a default answer."
|
32 |
-
print(f"Agent returning fixed answer: {fixed_answer}")
|
33 |
-
return fixed_answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logic.py
CHANGED
@@ -2,7 +2,7 @@ from typing import Dict, List, Tuple
|
|
2 |
|
3 |
import pandas as pd
|
4 |
import requests
|
5 |
-
from
|
6 |
from pandas import DataFrame
|
7 |
|
8 |
# --- Constants ---
|
@@ -113,8 +113,9 @@ def submit_answers(submission_data: dict, results_log: list) -> Tuple[str, DataF
|
|
113 |
return status_message, results_df
|
114 |
|
115 |
|
116 |
-
def run_agent(
|
117 |
-
|
|
|
118 |
"""Runs the agent on a list of questions and returns the results and answers.
|
119 |
|
120 |
This function iterates through a list of questions, runs the provided agent on each
|
@@ -122,7 +123,7 @@ def run_agent(agent: GaiaAgent,
|
|
122 |
agent execution and returns the results log and the answers payload.
|
123 |
|
124 |
Args:
|
125 |
-
|
126 |
generating answers to the questions.
|
127 |
questions_data (List[Dict]): A list of dictionaries, where each dictionary
|
128 |
represents a question and contains at least the 'task_id' and 'question' keys.
|
@@ -145,7 +146,7 @@ def run_agent(agent: GaiaAgent,
|
|
145 |
print(f"⚠️ Skipping invalid item (missing task_id or question): {item}")
|
146 |
continue
|
147 |
try:
|
148 |
-
submitted_answer =
|
149 |
answers_payload.append(
|
150 |
{"task_id": task_id, "submitted_answer": submitted_answer}
|
151 |
)
|
|
|
2 |
|
3 |
import pandas as pd
|
4 |
import requests
|
5 |
+
from agent import GaiaAgent
|
6 |
from pandas import DataFrame
|
7 |
|
8 |
# --- Constants ---
|
|
|
113 |
return status_message, results_df
|
114 |
|
115 |
|
116 |
+
def run_agent(
|
117 |
+
gaia_agent: GaiaAgent, questions_data: List[Dict]
|
118 |
+
) -> Tuple[List[Dict], List[Dict]]:
|
119 |
"""Runs the agent on a list of questions and returns the results and answers.
|
120 |
|
121 |
This function iterates through a list of questions, runs the provided agent on each
|
|
|
123 |
agent execution and returns the results log and the answers payload.
|
124 |
|
125 |
Args:
|
126 |
+
gaia_agent (GaiaAgent): An instance of the GaiaAgent class, which is responsible for
|
127 |
generating answers to the questions.
|
128 |
questions_data (List[Dict]): A list of dictionaries, where each dictionary
|
129 |
represents a question and contains at least the 'task_id' and 'question' keys.
|
|
|
146 |
print(f"⚠️ Skipping invalid item (missing task_id or question): {item}")
|
147 |
continue
|
148 |
try:
|
149 |
+
submitted_answer = gaia_agent(task_id, question_text)
|
150 |
answers_payload.append(
|
151 |
{"task_id": task_id, "submitted_answer": submitted_answer}
|
152 |
)
|
requirements.txt
CHANGED
@@ -2,4 +2,7 @@ gradio
|
|
2 |
gradio[oauth]
|
3 |
requests
|
4 |
python-dotenv
|
5 |
-
pandas
|
|
|
|
|
|
|
|
2 |
gradio[oauth]
|
3 |
requests
|
4 |
python-dotenv
|
5 |
+
pandas
|
6 |
+
smolagents
|
7 |
+
wikipedia-api
|
8 |
+
google-generativeai
|