dawid-lorek commited on
Commit
e70ca94
·
verified ·
1 Parent(s): 703ec74

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +109 -44
agent.py CHANGED
@@ -1,72 +1,137 @@
1
- from smolagents import LiteLLMModel
2
- from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, tool, PythonInterpreterTool
3
- from youtube_transcript_api import YouTubeTranscriptApi
4
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  @tool
7
- def reverse_sentence_tool(reverse_sentence: str) -> str:
8
  """
9
- Receives a sentence where both the word order and the characters in each word are reversed.
10
- Returns the sentence with words and order corrected.
11
  Args:
12
- reverse_sentence: A sentence with reversed words and reversed word order.
13
  Returns:
14
- A sentence in natural reading order.
15
  """
16
- inverted_words = reverse_sentence.split(" ")[::-1]
17
- correct_words = [word[::-1] for word in inverted_words]
18
- return " ".join(correct_words)
19
 
20
  @tool
21
- def get_youtube_transcript(video_url: str) -> str:
22
  """
23
- Fetches the transcript from a YouTube video if available.
24
  Args:
25
- video_url: Full URL to the YouTube video.
26
  Returns:
27
- Transcript text.
28
  """
29
- video_id = video_url.split("v=")[-1]
30
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
31
- full_text = " ".join([entry['text'] for entry in transcript])
32
- return full_text
 
 
 
 
 
 
 
 
33
 
34
  @tool
35
- def check_answer(answer: str) -> str:
36
  """
37
- Reviews the answer to check that it meets the requirements specified by the user and modifies it if necessary.
38
  Args:
39
- answer (str): The answer of the Agent.
40
  Returns:
41
- str: The final answer.
42
  """
43
- if answer and answer[-1] == '.':
44
- answer = answer[:-1]
45
- if "St." in answer:
46
- answer = answer.replace("St.", "Saint")
47
- return answer
 
 
 
 
 
48
 
49
- class BasicAgent:
50
- def __init__(self):
51
- # Odczytaj klucz OpenAI z ENV
52
- self.api_key = os.getenv("OPENAI_API_KEY")
53
- # Ustaw preferowany model, np. GPT-4o lub inny OpenAI
54
- self.model = LiteLLMModel(model_id="gpt-4o", api_key=self.api_key)
55
 
56
- self.agent = CodeAgent(
 
 
 
 
 
 
 
 
 
 
57
  tools=[
58
  DuckDuckGoSearchTool(),
59
- PythonInterpreterTool(),
 
 
 
60
  VisitWebpageTool(),
61
- reverse_sentence_tool,
62
- get_youtube_transcript,
63
- check_answer,
64
  ],
65
- model=self.model
 
66
  )
67
- print("BasicAgent initialized (OpenAI).")
68
 
69
  def __call__(self, question: str) -> str:
70
- print(f"Agent received question: {question[:50]}...")
71
- answer = self.agent.run(question)
72
- return answer
 
 
 
 
 
 
 
 
 
1
+ # agent.py
2
+
 
3
  import os
4
+ import requests
5
+ from smolagents import LiteLLMModel, CodeAgent, tool, DuckDuckGoSearchTool, SpeechToTextTool, VisitWebpageTool
6
+ import speech_recognition as sr
7
+ from pydub import AudioSegment
8
+ from PIL import Image
9
+
10
+ # Ustaw endpoint API (dostosuj jeśli inny)
11
+ api_url = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ # ==== Narzędzia własne do podpięcia ====
14
+
15
+ @tool
16
+ def download_question_file(task_id: str, file_name: str = "", save_dir: str = ".") -> str:
17
+ """
18
+ Downloads the file associated with a given task ID and saves it to disk.
19
+ Args:
20
+ task_id (str): Unique question/task identifier.
21
+ file_name (str): Optional file name.
22
+ save_dir (str): Directory to save.
23
+ Returns:
24
+ str: Path to the saved file, or error.
25
+ """
26
+ url = f"{api_url}/files/{task_id}"
27
+ try:
28
+ resp = requests.get(url, timeout=15)
29
+ resp.raise_for_status()
30
+ except requests.exceptions.HTTPError as e:
31
+ return f"HTTP error: {e.response.status_code}"
32
+ except Exception as e:
33
+ return f"Network error: {e}"
34
+ content_disposition = resp.headers.get("Content-Disposition", "")
35
+ filename = (
36
+ content_disposition.split('filename="')[-1].rstrip('"')
37
+ if "filename=" in content_disposition
38
+ else file_name if file_name else f"{task_id}.dat"
39
+ )
40
+ os.makedirs(save_dir, exist_ok=True)
41
+ file_path = os.path.join(save_dir, filename)
42
+ with open(file_path, "wb") as f:
43
+ f.write(resp.content)
44
+ return file_path
45
 
46
  @tool
47
+ def read_image(image_path: str) -> Image:
48
  """
49
+ Loads image from disk.
 
50
  Args:
51
+ image_path (str): Path to the image file.
52
  Returns:
53
+ The image.
54
  """
55
+ return Image.open(image_path)
 
 
56
 
57
  @tool
58
+ def audio_to_text(audio_path: str) -> str:
59
  """
60
+ Converts audio (mp3/wav) to text using Google Speech Recognition.
61
  Args:
62
+ audio_path (str): Path to the audio file.
63
  Returns:
64
+ str: Recognized text.
65
  """
66
+ if audio_path.endswith(".mp3"):
67
+ source_file = audio_path.replace(".mp3", ".wav")
68
+ sound = AudioSegment.from_mp3(audio_path)
69
+ sound.export(source_file, format="wav")
70
+ else:
71
+ source_file = audio_path
72
+ r = sr.Recognizer()
73
+ audio_file = sr.AudioFile(source_file)
74
+ with audio_file as source:
75
+ audio = r.record(source)
76
+ text = r.recognize_google(audio)
77
+ return text
78
 
79
  @tool
80
+ def extract_text_from_image(image_path: str) -> str:
81
  """
82
+ Extract text from image using pytesseract (OCR).
83
  Args:
84
+ image_path: Path to the image file.
85
  Returns:
86
+ Extracted text or error message.
87
  """
88
+ try:
89
+ import pytesseract
90
+ from PIL import Image
91
+ image = Image.open(image_path)
92
+ text = pytesseract.image_to_string(image)
93
+ return text
94
+ except ImportError:
95
+ return "Error: pytesseract is not installed."
96
+ except Exception as e:
97
+ return f"Error extracting text from image: {str(e)}"
98
 
99
+ # ==== AGENT ====
 
 
 
 
 
100
 
101
+ class GaiaAgent:
102
+ def __init__(self, model=None, max_steps=8):
103
+ # Jeśli model nie został przekazany, inicjalizuj domyślnie na OpenAI GPT-4o (lub inny)
104
+ if model is None:
105
+ api_key = os.getenv("OPENAI_API_KEY", "")
106
+ model = LiteLLMModel(
107
+ model_id="gpt-4o", # Zmień na swój model jeśli potrzeba
108
+ api_key=api_key,
109
+ )
110
+ self.gaia_agent = CodeAgent(
111
+ model=model,
112
  tools=[
113
  DuckDuckGoSearchTool(),
114
+ download_question_file,
115
+ read_image,
116
+ audio_to_text,
117
+ extract_text_from_image,
118
  VisitWebpageTool(),
119
+ SpeechToTextTool()
 
 
120
  ],
121
+ additional_authorized_imports=["pandas", "numpy", "math", "statistics", "scipy"],
122
+ max_steps=max_steps
123
  )
124
+ # Możesz dodać tu dodatkową konfigurację promptów jeśli chcesz.
125
 
126
  def __call__(self, question: str) -> str:
127
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
128
+ if self.gaia_agent:
129
+ try:
130
+ answer = self.gaia_agent.run(question)
131
+ print(f"Agent generated answer: {answer[:50]}..." if len(answer) > 50 else f"Agent generated answer: {answer}")
132
+ return answer
133
+ except Exception as e:
134
+ print(f"Error processing question: {e}")
135
+ return "An error occurred while processing your question. Please check the agent logs for details."
136
+ else:
137
+ return "The agent is not properly initialized. Please check your API keys and configuration."