Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import requests | |
from smolagents import Agent, Tool | |
from audio_transcriber import AudioTranscriptionTool | |
from image_analyzer import ImageAnalysisTool | |
from wikipedia_searcher import WikipediaSearcher | |
# Hugging Face API setup | |
HF_API_TOKEN = os.getenv("HF_API_TOKEN") | |
HF_CHAT_MODEL_URL = "https://api-inference.huggingface.com/models/HuggingFaceH4/zephyr-7b-beta" | |
HEADERS = { | |
"Authorization": f"Bearer {HF_API_TOKEN}", | |
"Content-Type": "application/json" | |
} | |
# Static system prompt | |
SYSTEM_PROMPT = """You are an agent solving the GAIA benchmark and you are required to provide exact answers. | |
Rules to follow: | |
1. Return only the exact requested answer: no explanation and no reasoning. | |
2. For yes/no questions, return exactly "Yes" or "No". | |
3. For dates, use the exact format requested. | |
4. For numbers, use the exact number, no other format. | |
5. For names, use the exact name as found in sources. | |
6. If the question has an associated file, process it accordingly. | |
Examples of good responses: | |
- "42" | |
- "Yes" | |
- "October 5, 2001" | |
- "Buenos Aires" | |
Never include phrases like "the answer is..." or "Based on my research". | |
Only return the exact answer.""" | |
# Agent tools | |
audio_tool = AudioTranscriptionTool() | |
image_tool = ImageAnalysisTool() | |
wiki_tool = Tool.from_function( | |
name="wikipedia_search", | |
description="Search for facts using Wikipedia.", | |
input_schema={"query": {"type": "string", "description": "Search query"}}, | |
output_type="string", | |
forward=lambda query: WikipediaSearcher().search(query) | |
) | |
tools = [audio_tool, image_tool, wiki_tool] | |
agent = Agent( | |
tools=tools, | |
system_prompt=SYSTEM_PROMPT | |
) | |
def query_hf_model(prompt: str) -> str: | |
try: | |
response = requests.post( | |
HF_CHAT_MODEL_URL, | |
headers=HEADERS, | |
json={ | |
"inputs": { | |
"past_user_inputs": [], | |
"text": prompt | |
}, | |
"parameters": { | |
"max_new_tokens": 256, | |
"return_full_text": False | |
} | |
} | |
) | |
result = response.json() | |
if isinstance(result, dict) and "error" in result: | |
return f"HF API Error: {result['error']}" | |
return result[0]["generated_text"].strip() | |
except Exception as e: | |
return f"Error querying Hugging Face model: {e}" | |
def run_and_submit_all(question, file): | |
if file: | |
file_path = file.name | |
if file_path.endswith((".mp3", ".wav")): | |
transcript = audio_tool.forward(file_path) | |
question = f"{question}\n\nTranscription of audio: {transcript}" | |
elif file_path.endswith((".png", ".jpg", ".jpeg")): | |
image_answer = image_tool.forward(file_path, question) | |
return image_answer | |
elif file_path.endswith(".py"): | |
try: | |
with open(file_path, "r") as f: | |
code = f.read() | |
question = f"{question}\n\nPython code:\n{code}" | |
except Exception as e: | |
return f"Error reading code file: {e}" | |
else: | |
return "Unsupported file type." | |
full_prompt = f"{SYSTEM_PROMPT}\nQUESTION:\n{question}" | |
return query_hf_model(full_prompt) | |
with gr.Blocks(title="GAIA Agent with HF API") as demo: | |
gr.Markdown("### GAIA Evaluation Agent (Hugging Face-based)") | |
with gr.Row(): | |
question_input = gr.Textbox(label="Question", placeholder="Enter your question here...", lines=3) | |
file_input = gr.File(label="Optional File (Audio, Image, or Python)", file_types=[".mp3", ".wav", ".jpg", ".jpeg", ".png", ".py"]) | |
submit_button = gr.Button("Run Agent") | |
output_box = gr.Textbox(label="Answer") | |
submit_button.click(fn=run_and_submit_all, inputs=[question_input, file_input], outputs=output_box) | |
if __name__ == "__main__": | |
demo.launch() | |