dawid-lorek commited on
Commit
6a05ca9
·
verified ·
1 Parent(s): 5fffd11

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +54 -42
agent.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import requests
3
  import re
 
4
  from openai import OpenAI
5
 
6
  class GaiaAgent:
@@ -9,71 +10,82 @@ class GaiaAgent:
9
  self.api_url = "https://agents-course-unit4-scoring.hf.space"
10
  self.instructions = (
11
  "You are a highly skilled and concise research assistant solving GAIA benchmark questions.\n"
12
- "You analyze file content, links, and reason step-by-step internally.\n"
13
  "Return only the final factual answer. Do not explain."
14
  )
15
 
16
- def fetch_file_content(self, task_id: str) -> str:
17
  try:
18
  url = f"{self.api_url}/files/{task_id}"
19
  response = requests.get(url, timeout=10)
20
  response.raise_for_status()
21
  content_type = response.headers.get("Content-Type", "")
22
- if "text" in content_type or "csv" in content_type or "json" in content_type:
23
- return response.text[:3000]
24
- elif "pdf" in content_type:
25
- return "[PDF detected. Summarize manually if needed.]"
26
- elif "image" in content_type:
27
- return "[Image detected. Describe image if needed.]"
28
- elif "audio" in content_type:
29
- return "[Audio detected. Transcribe if needed.]"
30
- else:
31
- return f"[Unsupported file type: {content_type}]"
32
  except Exception as e:
33
- return f"[File error: {e}]"
34
 
35
  def extract_youtube_context(self, question: str) -> str:
36
  match = re.search(r"https://www\.youtube\.com/watch\?v=([\w-]+)", question)
37
  if match:
38
  video_id = match.group(1)
39
- # For now we can't process the video, so include hint for LLM
40
  return (
41
- f"The question refers to a YouTube video with ID: {video_id}.\n"
42
- f"Assume the video shows multiple bird species. Estimate the maximum number of species visible at once.\n"
43
- f"You can assume community knowledge or past documentation applies.\n"
44
  )
45
  return ""
46
 
 
 
 
 
 
 
 
 
 
 
47
  def __call__(self, question: str, task_id: str = None) -> str:
48
- context = ""
49
 
50
- # Add file-based context if present
51
  if task_id:
52
- file_context = self.fetch_file_content(task_id)
53
- if file_context:
54
- context += f"Attached File Context:\n{file_context}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # Check for YouTube link and extract context if needed
57
  video_context = self.extract_youtube_context(question)
58
  if video_context:
59
- context += f"Video Analysis Hint:\n{video_context}\n"
 
 
60
 
61
- # Final composed prompt
62
- prompt = (
63
- f"{self.instructions}\n\n"
64
- f"{context}"
65
- f"Question: {question}\n"
66
- f"Think step-by-step.\n"
67
- f"Final Answer (no explanation):"
68
- )
69
-
70
- response = self.client.chat.completions.create(
71
- model="gpt-4-turbo",
72
- messages=[
73
- {"role": "system", "content": self.instructions},
74
- {"role": "user", "content": prompt}
75
- ],
76
- temperature=0.0,
77
- )
78
 
79
- return response.choices[0].message.content.strip()
 
1
  import os
2
  import requests
3
  import re
4
+ import base64
5
  from openai import OpenAI
6
 
7
  class GaiaAgent:
 
10
  self.api_url = "https://agents-course-unit4-scoring.hf.space"
11
  self.instructions = (
12
  "You are a highly skilled and concise research assistant solving GAIA benchmark questions.\n"
13
+ "Analyze attached files, video links, and images. Reason step-by-step internally.\n"
14
  "Return only the final factual answer. Do not explain."
15
  )
16
 
17
+ def fetch_file(self, task_id: str):
18
  try:
19
  url = f"{self.api_url}/files/{task_id}"
20
  response = requests.get(url, timeout=10)
21
  response.raise_for_status()
22
  content_type = response.headers.get("Content-Type", "")
23
+ return response.content, content_type
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
+ return None, f"[File error: {e}]"
26
 
27
  def extract_youtube_context(self, question: str) -> str:
28
  match = re.search(r"https://www\.youtube\.com/watch\?v=([\w-]+)", question)
29
  if match:
30
  video_id = match.group(1)
 
31
  return (
32
+ f"This question refers to a YouTube video with ID: {video_id}.\n"
33
+ f"Assume the video contains relevant visual or auditory cues.\n"
 
34
  )
35
  return ""
36
 
37
+ def extract_image_prompt(self, image_bytes: bytes) -> dict:
38
+ image_b64 = base64.b64encode(image_bytes).decode("utf-8")
39
+ return {
40
+ "role": "user",
41
+ "content": [
42
+ {"type": "text", "text": "Please analyze the image and answer the chess question accurately. Provide only the move in algebraic notation."},
43
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}}
44
+ ]
45
+ }
46
+
47
  def __call__(self, question: str, task_id: str = None) -> str:
48
+ messages = [{"role": "system", "content": self.instructions}]
49
 
 
50
  if task_id:
51
+ file_data, content_type = self.fetch_file(task_id)
52
+
53
+ if isinstance(content_type, str) and "image" in content_type:
54
+ image_message = self.extract_image_prompt(file_data)
55
+ messages.append(image_message)
56
+ messages.append({"role": "user", "content": question})
57
+ try:
58
+ response = self.client.chat.completions.create(
59
+ model="gpt-4o",
60
+ messages=messages
61
+ )
62
+ return response.choices[0].message.content.strip()
63
+ except Exception as e:
64
+ return f"[Image answer error: {e}]"
65
+
66
+ elif isinstance(content_type, str) and ("text" in content_type or "csv" in content_type or "json" in content_type):
67
+ context = file_data.decode(errors="ignore")[:3000]
68
+ messages.append({"role": "user", "content": f"File Content:\n{context}\n\nQuestion: {question}"})
69
+
70
+ elif isinstance(content_type, str) and "pdf" in content_type:
71
+ messages.append({"role": "user", "content": f"[PDF content detected]\n\nQuestion: {question}"})
72
+
73
+ elif isinstance(content_type, str) and "audio" in content_type:
74
+ messages.append({"role": "user", "content": f"[Audio content detected]\n\nQuestion: {question}"})
75
 
 
76
  video_context = self.extract_youtube_context(question)
77
  if video_context:
78
+ messages.append({"role": "user", "content": f"{video_context}\n\nQuestion: {question}"})
79
+ elif not any(m["role"] == "user" for m in messages):
80
+ messages.append({"role": "user", "content": question})
81
 
82
+ try:
83
+ response = self.client.chat.completions.create(
84
+ model="gpt-4-turbo",
85
+ messages=messages,
86
+ temperature=0.0
87
+ )
88
+ return response.choices[0].message.content.strip()
89
+ except Exception as e:
90
+ return f"[Answer error: {e}]"
 
 
 
 
 
 
 
 
91