AGAZO_Final_Assignment

Sleeping

App Files Files Community

Alexandre Gazola commited on Jun 3

Commit

0ff39df

1 Parent(s): 0f6881a

fixfix

Browse files

Files changed (4) hide show

analyse_chess_position_tool.py +1 -1
app.py +8 -7
audio_to_text_tool.py +37 -0
chess_image_to_fen_tool.py +1 -1

analyse_chess_position_tool.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from langchain_core.tools import tool
 import requests
-@tool
 def get_chess_best_move(fen: str) -> str:
     """
     Given the description of a chess board using FEN notation, returns the next best move.

 from langchain_core.tools import tool
 import requests
+@tool(return_direct=True)
 def get_chess_best_move(fen: str) -> str:
     """
     Given the description of a chess board using FEN notation, returns the next best move.

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain_agent import LangChainAgent
 from analyse_chess_position_tool import get_chess_best_move
 from utils import get_bytes, get_text_file_contents, get_base64
 from audio_to_text_tool import audio_to_text
 # (Keep Constants as is) ok!!!
 # --- Constants ---
@@ -68,7 +69,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 3. Run your Agent
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
@@ -76,8 +77,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        #if "making a pie" not in question_text:
-        #    continue
         try:
             file_name = item.get("file_name")
@@ -91,10 +92,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
                     file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
                     question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
-                    #print('testing whisper')
-                    #whisper_return = audio_to_text(file_path)
-                    #print(whisper_return)
-                    #return None
                 else:
                     file_path = os.path.join(BASE_DIR, 'files', file_name)
                     plain_txt_file = get_text_file_contents(file_path)

 from analyse_chess_position_tool import get_chess_best_move
 from utils import get_bytes, get_text_file_contents, get_base64
 from audio_to_text_tool import audio_to_text
+from audio_to_text_tool import audio_to_text_from_youtube
 # (Keep Constants as is) ok!!!
 # --- Constants ---
     # 3. Run your Agent
     results_log = []
     answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions....")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        if "in response to the question" not in question_text:
+            continue
         try:
             file_name = item.get("file_name")
                     file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
                     question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
+                    print('testing whisper with youtube video')
+                    whisper_return = audio_to_text_from_youtube('https://www.youtube.com/watch?v=1htKBjuUWec')
+                    print(whisper_return)
+                    return None
                 else:
                     file_path = os.path.join(BASE_DIR, 'files', file_name)
                     plain_txt_file = get_text_file_contents(file_path)

audio_to_text_tool.py CHANGED Viewed

@@ -3,6 +3,7 @@ import tempfile
 from openai import OpenAI
 from langchain.tools import tool
 from constants import OPENAI_KEY
 # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
 client = OpenAI(api_key=OPENAI_KEY)
@@ -44,6 +45,42 @@ def audio_to_text(base64_audio_path: str) -> str:
     except Exception as e:
         return f"An error occurred during transcription: {str(e)}"
 if __name__ == "__main__":
     # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
     base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"

 from openai import OpenAI
 from langchain.tools import tool
 from constants import OPENAI_KEY
+from pytube import YouTube
 # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
 client = OpenAI(api_key=OPENAI_KEY)
     except Exception as e:
         return f"An error occurred during transcription: {str(e)}"
+@tool
+def audio_to_text_from_youtube(youtube_url: str) -> str:
+    """
+    Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.
+    Args:
+        youtube_url (str): URL of the YouTube video.
+    Returns:
+        str: Transcribed text.
+    """
+    try:
+        # Download audio stream
+        yt = YouTube(youtube_url)
+        audio_stream = yt.streams.filter(only_audio=True).first()
+        if not audio_stream:
+            return "No audio stream found in the YouTube video."
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
+            audio_stream.download(output_path=None, filename=temp_audio_file.name)
+            temp_audio_path = temp_audio_file.name
+        # Transcribe using OpenAI Whisper
+        with open(temp_audio_path, "rb") as audio_file:
+            transcript = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file,
+                response_format="text"
+            )
+        return transcript.strip()
+    except Exception as e:
+        return f"An error occurred during YouTube transcription: {str(e)}"
 if __name__ == "__main__":
     # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
     base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"

chess_image_to_fen_tool.py CHANGED Viewed

@@ -6,7 +6,7 @@ from utils import get_base64
 import requests
 import json
-@tool
 def chess_image_to_fen(image_path_in_base64:str, current_player: Literal["black", "white"]) -> Dict[str,str]:
     """
         Convert chess image to FEN (Forsyth-Edwards Notation) notation.

 import requests
 import json
+@tool(return_direct=True)
 def chess_image_to_fen(image_path_in_base64:str, current_player: Literal["black", "white"]) -> Dict[str,str]:
     """
         Convert chess image to FEN (Forsyth-Edwards Notation) notation.