AGAZO_Final_Assignment

Sleeping

App Files Files Community

Alexandre Gazola commited on Jun 3

Commit

e4749b5

1 Parent(s): e514a96

teste whisper

Browse files

Files changed (2) hide show

app.py +6 -2
audio_to_text_tool.py +46 -51

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import time
 from langchain_agent import LangChainAgent
 from analyse_chess_position_tool import get_chess_best_move
 from utils import get_bytes, get_text_file_contents, get_base64
 # (Keep Constants as is) ok!!!
 # --- Constants ---
@@ -75,8 +76,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        #if "chess" not in question_text:
-        #    continue
         try:
             file_name = item.get("file_name")
@@ -89,6 +90,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
                 if file_name.endswith(('.mp3', '.xlsx', '.png')):
                     file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
                     question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
                 else:
                     file_path = os.path.join(BASE_DIR, 'files', file_name)
                     plain_txt_file = get_text_file_contents(file_path)

 from langchain_agent import LangChainAgent
 from analyse_chess_position_tool import get_chess_best_move
 from utils import get_bytes, get_text_file_contents, get_base64
+from audio_to_text_tool import audio_to_text
 # (Keep Constants as is) ok!!!
 # --- Constants ---
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        if "making a pie" not in question_text:
+            continue
         try:
             file_name = item.get("file_name")
                 if file_name.endswith(('.mp3', '.xlsx', '.png')):
                     file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
                     question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
+                        print('testing whisper')
+                        audio_to_text(file_path)
                 else:
                     file_path = os.path.join(BASE_DIR, 'files', file_name)
                     plain_txt_file = get_text_file_contents(file_path)

audio_to_text_tool.py CHANGED Viewed

@@ -1,61 +1,56 @@
-import whisper
-from langchain_core.tools import tool
-#@tool
-import whisper
-import os
-import os
-import whisper
-import subprocess
-import tempfile
-import os
-import whisper
-import subprocess
 import tempfile
-def audio_to_text(file_path: str) -> str:
     """
-    Converts an MP3 file to WAV and transcribes it using Whisper.
     Args:
-        file_path (str): Path to the MP3 file.
     Returns:
-        str: Transcribed text.
     """
-    if not os.path.isfile(file_path):
-        raise FileNotFoundError(f"File not found: {file_path}")
-    # Convert MP3 to temporary WAV file
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
-        tmp_wav_path = tmp_wav.name
-    try:
-        # Convert to WAV using ffmpeg
-        subprocess.run(
-            ["ffmpeg", "-y", "-i", file_path, tmp_wav_path],
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-            check=True
-        )
-        model = whisper.load_model("base")
-        result = model.transcribe(tmp_wav_path)
-        if result is None or "text" not in result:
-            raise ValueError("Transcription failed or result is invalid.")
-        return result["text"]
-    finally:
-        # Clean up temporary WAV file
-        if os.path.exists(tmp_wav_path):
-            os.remove(tmp_wav_path)
-if __name__ == "__main__":
-    try:
-        print(audio_to_text("C:\\tmp\\ibm\\audio.mp3"))
     except Exception as e:
-        print(f"Error: {e}")

+import base64
 import tempfile
+from openai import OpenAI
+from langchain.tools import tool
+from constants import OPENAI_KEY
+# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
+client = OpenAI(api_key=OPENAI_KEY)
+#@tool
+def audio_to_text(base64_audio_path: str) -> str:
     """
+    Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.
     Args:
+        base64_audio_path (str): Path to a file containing base64-encoded audio as text.
     Returns:
+        str: The transcribed text.
     """
+    try:
+        # Read base64 string
+        with open(base64_audio_path, "r") as f:
+            base64_str = f.read()
+        # Decode base64 to bytes
+        audio_bytes = base64.b64decode(base64_str)
+        # Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
+            temp_audio.write(audio_bytes)
+            temp_audio_path = temp_audio.name
+        # Transcribe using OpenAI Whisper API
+        with open(temp_audio_path, "rb") as audio_file:
+            transcript = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file,
+                response_format="text"
+            )
+        return transcript.strip()
     except Exception as e:
+        return f"An error occurred during transcription: {str(e)}"
+if __name__ == "__main__":
+    # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
+    base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"
+    # Call the tool function
+    transcription = audio_to_text(base64_audio_file_path)
+    # Print the result
+    print("Transcription result:")
+    print(transcription)