Alexandre Gazola commited on
Commit
0ff39df
·
1 Parent(s): 0f6881a
analyse_chess_position_tool.py CHANGED
@@ -1,7 +1,7 @@
1
  from langchain_core.tools import tool
2
  import requests
3
 
4
- @tool
5
  def get_chess_best_move(fen: str) -> str:
6
  """
7
  Given the description of a chess board using FEN notation, returns the next best move.
 
1
  from langchain_core.tools import tool
2
  import requests
3
 
4
+ @tool(return_direct=True)
5
  def get_chess_best_move(fen: str) -> str:
6
  """
7
  Given the description of a chess board using FEN notation, returns the next best move.
app.py CHANGED
@@ -9,6 +9,7 @@ from langchain_agent import LangChainAgent
9
  from analyse_chess_position_tool import get_chess_best_move
10
  from utils import get_bytes, get_text_file_contents, get_base64
11
  from audio_to_text_tool import audio_to_text
 
12
 
13
  # (Keep Constants as is) ok!!!
14
  # --- Constants ---
@@ -68,7 +69,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
68
  # 3. Run your Agent
69
  results_log = []
70
  answers_payload = []
71
- print(f"Running agent on {len(questions_data)} questions...")
72
  for item in questions_data:
73
  task_id = item.get("task_id")
74
  question_text = item.get("question")
@@ -76,8 +77,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  print(f"Skipping item with missing task_id or question: {item}")
77
  continue
78
 
79
- #if "making a pie" not in question_text:
80
- # continue
81
 
82
  try:
83
  file_name = item.get("file_name")
@@ -91,10 +92,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
91
  file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
92
  question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
93
 
94
- #print('testing whisper')
95
- #whisper_return = audio_to_text(file_path)
96
- #print(whisper_return)
97
- #return None
98
  else:
99
  file_path = os.path.join(BASE_DIR, 'files', file_name)
100
  plain_txt_file = get_text_file_contents(file_path)
 
9
  from analyse_chess_position_tool import get_chess_best_move
10
  from utils import get_bytes, get_text_file_contents, get_base64
11
  from audio_to_text_tool import audio_to_text
12
+ from audio_to_text_tool import audio_to_text_from_youtube
13
 
14
  # (Keep Constants as is) ok!!!
15
  # --- Constants ---
 
69
  # 3. Run your Agent
70
  results_log = []
71
  answers_payload = []
72
+ print(f"Running agent on {len(questions_data)} questions....")
73
  for item in questions_data:
74
  task_id = item.get("task_id")
75
  question_text = item.get("question")
 
77
  print(f"Skipping item with missing task_id or question: {item}")
78
  continue
79
 
80
+ if "in response to the question" not in question_text:
81
+ continue
82
 
83
  try:
84
  file_name = item.get("file_name")
 
92
  file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
93
  question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
94
 
95
+ print('testing whisper with youtube video')
96
+ whisper_return = audio_to_text_from_youtube('https://www.youtube.com/watch?v=1htKBjuUWec')
97
+ print(whisper_return)
98
+ return None
99
  else:
100
  file_path = os.path.join(BASE_DIR, 'files', file_name)
101
  plain_txt_file = get_text_file_contents(file_path)
audio_to_text_tool.py CHANGED
@@ -3,6 +3,7 @@ import tempfile
3
  from openai import OpenAI
4
  from langchain.tools import tool
5
  from constants import OPENAI_KEY
 
6
 
7
  # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
8
  client = OpenAI(api_key=OPENAI_KEY)
@@ -44,6 +45,42 @@ def audio_to_text(base64_audio_path: str) -> str:
44
  except Exception as e:
45
  return f"An error occurred during transcription: {str(e)}"
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  if __name__ == "__main__":
48
  # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
49
  base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"
 
3
  from openai import OpenAI
4
  from langchain.tools import tool
5
  from constants import OPENAI_KEY
6
+ from pytube import YouTube
7
 
8
  # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
9
  client = OpenAI(api_key=OPENAI_KEY)
 
45
  except Exception as e:
46
  return f"An error occurred during transcription: {str(e)}"
47
 
48
+ @tool
49
+ def audio_to_text_from_youtube(youtube_url: str) -> str:
50
+ """
51
+ Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.
52
+
53
+ Args:
54
+ youtube_url (str): URL of the YouTube video.
55
+
56
+ Returns:
57
+ str: Transcribed text.
58
+ """
59
+ try:
60
+ # Download audio stream
61
+ yt = YouTube(youtube_url)
62
+ audio_stream = yt.streams.filter(only_audio=True).first()
63
+
64
+ if not audio_stream:
65
+ return "No audio stream found in the YouTube video."
66
+
67
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
68
+ audio_stream.download(output_path=None, filename=temp_audio_file.name)
69
+ temp_audio_path = temp_audio_file.name
70
+
71
+ # Transcribe using OpenAI Whisper
72
+ with open(temp_audio_path, "rb") as audio_file:
73
+ transcript = client.audio.transcriptions.create(
74
+ model="whisper-1",
75
+ file=audio_file,
76
+ response_format="text"
77
+ )
78
+
79
+ return transcript.strip()
80
+
81
+ except Exception as e:
82
+ return f"An error occurred during YouTube transcription: {str(e)}"
83
+
84
  if __name__ == "__main__":
85
  # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
86
  base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"
chess_image_to_fen_tool.py CHANGED
@@ -6,7 +6,7 @@ from utils import get_base64
6
  import requests
7
  import json
8
 
9
- @tool
10
  def chess_image_to_fen(image_path_in_base64:str, current_player: Literal["black", "white"]) -> Dict[str,str]:
11
  """
12
  Convert chess image to FEN (Forsyth-Edwards Notation) notation.
 
6
  import requests
7
  import json
8
 
9
+ @tool(return_direct=True)
10
  def chess_image_to_fen(image_path_in_base64:str, current_player: Literal["black", "white"]) -> Dict[str,str]:
11
  """
12
  Convert chess image to FEN (Forsyth-Edwards Notation) notation.