Alexandre Gazola commited on
Commit
e4749b5
·
1 Parent(s): e514a96

teste whisper

Browse files
Files changed (2) hide show
  1. app.py +6 -2
  2. audio_to_text_tool.py +46 -51
app.py CHANGED
@@ -8,6 +8,7 @@ import time
8
  from langchain_agent import LangChainAgent
9
  from analyse_chess_position_tool import get_chess_best_move
10
  from utils import get_bytes, get_text_file_contents, get_base64
 
11
 
12
  # (Keep Constants as is) ok!!!
13
  # --- Constants ---
@@ -75,8 +76,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
75
  print(f"Skipping item with missing task_id or question: {item}")
76
  continue
77
 
78
- #if "chess" not in question_text:
79
- # continue
80
 
81
  try:
82
  file_name = item.get("file_name")
@@ -89,6 +90,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
89
  if file_name.endswith(('.mp3', '.xlsx', '.png')):
90
  file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
91
  question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
 
 
 
92
  else:
93
  file_path = os.path.join(BASE_DIR, 'files', file_name)
94
  plain_txt_file = get_text_file_contents(file_path)
 
8
  from langchain_agent import LangChainAgent
9
  from analyse_chess_position_tool import get_chess_best_move
10
  from utils import get_bytes, get_text_file_contents, get_base64
11
+ from audio_to_text_tool import audio_to_text
12
 
13
  # (Keep Constants as is) ok!!!
14
  # --- Constants ---
 
76
  print(f"Skipping item with missing task_id or question: {item}")
77
  continue
78
 
79
+ if "making a pie" not in question_text:
80
+ continue
81
 
82
  try:
83
  file_name = item.get("file_name")
 
90
  if file_name.endswith(('.mp3', '.xlsx', '.png')):
91
  file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
92
  question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
93
+
94
+ print('testing whisper')
95
+ audio_to_text(file_path)
96
  else:
97
  file_path = os.path.join(BASE_DIR, 'files', file_name)
98
  plain_txt_file = get_text_file_contents(file_path)
audio_to_text_tool.py CHANGED
@@ -1,61 +1,56 @@
1
- import whisper
2
- from langchain_core.tools import tool
3
-
4
- #@tool
5
- import whisper
6
- import os
7
-
8
- import os
9
- import whisper
10
- import subprocess
11
- import tempfile
12
-
13
- import os
14
- import whisper
15
- import subprocess
16
  import tempfile
 
 
 
17
 
18
- def audio_to_text(file_path: str) -> str:
 
 
 
 
19
  """
20
- Converts an MP3 file to WAV and transcribes it using Whisper.
21
-
22
  Args:
23
- file_path (str): Path to the MP3 file.
24
-
25
  Returns:
26
- str: Transcribed text.
27
  """
28
- if not os.path.isfile(file_path):
29
- raise FileNotFoundError(f"File not found: {file_path}")
 
 
30
 
31
- # Convert MP3 to temporary WAV file
32
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
33
- tmp_wav_path = tmp_wav.name
34
 
35
- try:
36
- # Convert to WAV using ffmpeg
37
- subprocess.run(
38
- ["ffmpeg", "-y", "-i", file_path, tmp_wav_path],
39
- stdout=subprocess.DEVNULL,
40
- stderr=subprocess.DEVNULL,
41
- check=True
42
- )
43
-
44
- model = whisper.load_model("base")
45
- result = model.transcribe(tmp_wav_path)
46
-
47
- if result is None or "text" not in result:
48
- raise ValueError("Transcription failed or result is invalid.")
49
-
50
- return result["text"]
51
-
52
- finally:
53
- # Clean up temporary WAV file
54
- if os.path.exists(tmp_wav_path):
55
- os.remove(tmp_wav_path)
56
 
57
- if __name__ == "__main__":
58
- try:
59
- print(audio_to_text("C:\\tmp\\ibm\\audio.mp3"))
60
  except Exception as e:
61
- print(f"Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import tempfile
3
+ from openai import OpenAI
4
+ from langchain.tools import tool
5
+ from constants import OPENAI_KEY
6
 
7
+ # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
8
+ client = OpenAI(api_key=OPENAI_KEY)
9
+
10
+ #@tool
11
+ def audio_to_text(base64_audio_path: str) -> str:
12
  """
13
+ Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.
14
+
15
  Args:
16
+ base64_audio_path (str): Path to a file containing base64-encoded audio as text.
17
+
18
  Returns:
19
+ str: The transcribed text.
20
  """
21
+ try:
22
+ # Read base64 string
23
+ with open(base64_audio_path, "r") as f:
24
+ base64_str = f.read()
25
 
26
+ # Decode base64 to bytes
27
+ audio_bytes = base64.b64decode(base64_str)
 
28
 
29
+ # Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
30
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
31
+ temp_audio.write(audio_bytes)
32
+ temp_audio_path = temp_audio.name
33
+
34
+ # Transcribe using OpenAI Whisper API
35
+ with open(temp_audio_path, "rb") as audio_file:
36
+ transcript = client.audio.transcriptions.create(
37
+ model="whisper-1",
38
+ file=audio_file,
39
+ response_format="text"
40
+ )
41
+
42
+ return transcript.strip()
 
 
 
 
 
 
 
43
 
 
 
 
44
  except Exception as e:
45
+ return f"An error occurred during transcription: {str(e)}"
46
+
47
+ if __name__ == "__main__":
48
+ # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
49
+ base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"
50
+
51
+ # Call the tool function
52
+ transcription = audio_to_text(base64_audio_file_path)
53
+
54
+ # Print the result
55
+ print("Transcription result:")
56
+ print(transcription)