dlaima commited on
Commit
03f0224
·
verified ·
1 Parent(s): 5c5f32d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -74
app.py CHANGED
@@ -1,85 +1,82 @@
1
  import os
2
- import gradio as gr
3
  import requests
4
- import pandas as pd
5
-
6
- from smolagents import CodeAgent, DuckDuckGoSearchTool
7
- from smolagents.models import OpenAIServerModel
8
-
9
- from smolagents import Tool
10
- from wikipedia_searcher import WikipediaSearcher
11
-
12
  from audio_transcriber import AudioTranscriptionTool
13
-
14
  from image_analyzer import ImageAnalysisTool
15
-
16
-
17
- class WikipediaSearchTool(Tool):
18
- name = "wikipedia_search"
19
- description = "Search Wikipedia for a given query."
20
- inputs = {
21
- "query": {
22
- "type": "string",
23
- "description": "The search query string"
24
- }
25
- }
26
- output_type = "string"
27
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def __init__(self):
29
- super().__init__()
30
- self.searcher = WikipediaSearcher()
31
 
32
- def forward(self, query: str) -> str:
33
- return self.searcher.search(query)
34
-
35
- wikipedia_search_tool = WikipediaSearchTool()
36
-
37
-
38
-
39
- # Define the system prompt
40
- SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question.
41
- Report your thoughts, and finish your answer with the following template:
42
- FINAL ANSWER: [YOUR FINAL ANSWER].
43
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list
44
- of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
45
-
46
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Patched model to prepend system prompt correctly
49
- class PatchedOpenAIServerModel(OpenAIServerModel):
50
- def generate(self, messages, stop_sequences=None, **kwargs):
51
- if isinstance(messages, list):
52
- if not any(m["role"] == "system" for m in messages):
53
- messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages
54
- else:
55
- raise TypeError("Expected 'messages' to be a list of message dicts")
56
- return super().generate(messages=messages, stop_sequences=stop_sequences, **kwargs)
57
 
58
- class MyAgent:
59
- def __init__(self):
60
- self.model = PatchedOpenAIServerModel(model_id="gpt-4-turbo") #gpt-4-turbo
61
- self.agent = CodeAgent(tools=[
62
- DuckDuckGoSearchTool(),
63
- wikipedia_search_tool,
64
- AudioTranscriptionTool(),
65
- ImageAnalysisTool()
66
- ], model=self.model)
67
-
68
- def __call__(self, task: dict) -> str:
69
- question_text = task.get("question", "")
70
-
71
- # Merge any code or attachment content if available
72
- if "code" in task:
73
- question_text += f"\n\nAttached code:\n{task['code']}"
74
- elif "attachment" in task:
75
- question_text += f"\n\nAttached content:\n{task['attachment']}"
76
- #Consider audio video
77
- #if "L1vXCYZAYYM" in question or "https://www.youtube.com/watch?v=L1vXCYZAYYM" in question:
78
- #return "FINAL ANSWER: 11" # Replace with correct known number
79
- if "L1vXCYZAYYM" in question_text or "https://www.youtube.com/watch?v=L1vXCYZAYYM" in question_text:
80
- return "FINAL ANSWER: 11"
81
-
82
- return self.agent.run(question_text)
83
 
84
  def run_and_submit_all(profile: gr.OAuthProfile | None):
85
  space_id = os.getenv("SPACE_ID")
 
1
  import os
 
2
  import requests
3
+ from smolagents import Agent
 
 
 
 
 
 
 
4
  from audio_transcriber import AudioTranscriptionTool
 
5
  from image_analyzer import ImageAnalysisTool
6
+ from wikipedia_searcher import WikipediaSearcher
7
+ from openai import OpenAI
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv()
11
+
12
+ # Tools
13
+ audio_tool = AudioTranscriptionTool()
14
+ image_tool = ImageAnalysisTool()
15
+ wiki_tool = WikipediaSearcher()
16
+
17
+ # Static system prompt
18
+ def build_prompt(question: str) -> str:
19
+ return f"""You are an agent solving the GAIA benchmark and you are required to provide exact answers.
20
+ Rules to follow:
21
+ 1. Return only the exact requested answer: no explanation and no reasoning.
22
+ 2. For yes/no questions, return exactly "Yes" or "No".
23
+ 3. For dates, use the exact format requested.
24
+ 4. For numbers, use the exact number, no other format.
25
+ 5. For names, use the exact name as found in sources.
26
+ 6. If the question has an associated file, download the file first using the task ID.
27
+ Examples of good responses:
28
+ - "42"
29
+ - "Yes"
30
+ - "October 5, 2001"
31
+ - "Buenos Aires"
32
+ Never include phrases like "the answer is..." or "Based on my research".
33
+ Only return the exact answer.
34
+
35
+ QUESTION:
36
+ {question}
37
+ """
38
+
39
+ # Main agent function
40
+ class GAIAAgent:
41
  def __init__(self):
42
+ self.llm = OpenAI(model="gpt-4-turbo", temperature=0)
 
43
 
44
+ def __call__(self, task: dict) -> str:
45
+ question = task.get("question", "")
46
+ attachment_url = task.get("attachment", "")
47
+
48
+ # Handle audio
49
+ if attachment_url.endswith((".mp3", ".wav")):
50
+ transcript = audio_tool.forward(attachment_url)
51
+ question += f"\n\nTranscript of attached audio:\n{transcript}"
52
+
53
+ # Handle image
54
+ elif attachment_url.endswith((".jpg", ".jpeg", ".png")):
55
+ return image_tool.forward(attachment_url, question)
56
+
57
+ # Handle Python file
58
+ elif attachment_url.endswith(".py"):
59
+ try:
60
+ code_text = requests.get(attachment_url).text
61
+ question += f"\n\nAttached Python file content:\n{code_text}"
62
+ except Exception as e:
63
+ return f"Error retrieving Python file: {e}"
64
+
65
+ # Wikipedia queries (if task type or instruction indicates)
66
+ if "wikipedia" in question.lower():
67
+ return wiki_tool.search(question)
68
+
69
+ # Build prompt
70
+ prompt = build_prompt(question)
71
+
72
+ # Run model
73
+ response = self.llm.chat.completions.create(
74
+ messages=[{"role": "system", "content": prompt}],
75
+ model="gpt-4-turbo"
76
+ )
77
 
78
+ return response.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  def run_and_submit_all(profile: gr.OAuthProfile | None):
82
  space_id = os.getenv("SPACE_ID")