Spaces:
Sleeping
Sleeping
Alexandre Gazola
commited on
Commit
·
e4749b5
1
Parent(s):
e514a96
teste whisper
Browse files- app.py +6 -2
- audio_to_text_tool.py +46 -51
app.py
CHANGED
@@ -8,6 +8,7 @@ import time
|
|
8 |
from langchain_agent import LangChainAgent
|
9 |
from analyse_chess_position_tool import get_chess_best_move
|
10 |
from utils import get_bytes, get_text_file_contents, get_base64
|
|
|
11 |
|
12 |
# (Keep Constants as is) ok!!!
|
13 |
# --- Constants ---
|
@@ -75,8 +76,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
75 |
print(f"Skipping item with missing task_id or question: {item}")
|
76 |
continue
|
77 |
|
78 |
-
|
79 |
-
|
80 |
|
81 |
try:
|
82 |
file_name = item.get("file_name")
|
@@ -89,6 +90,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
89 |
if file_name.endswith(('.mp3', '.xlsx', '.png')):
|
90 |
file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
|
91 |
question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
|
|
|
|
|
|
|
92 |
else:
|
93 |
file_path = os.path.join(BASE_DIR, 'files', file_name)
|
94 |
plain_txt_file = get_text_file_contents(file_path)
|
|
|
8 |
from langchain_agent import LangChainAgent
|
9 |
from analyse_chess_position_tool import get_chess_best_move
|
10 |
from utils import get_bytes, get_text_file_contents, get_base64
|
11 |
+
from audio_to_text_tool import audio_to_text
|
12 |
|
13 |
# (Keep Constants as is) ok!!!
|
14 |
# --- Constants ---
|
|
|
76 |
print(f"Skipping item with missing task_id or question: {item}")
|
77 |
continue
|
78 |
|
79 |
+
if "making a pie" not in question_text:
|
80 |
+
continue
|
81 |
|
82 |
try:
|
83 |
file_name = item.get("file_name")
|
|
|
90 |
if file_name.endswith(('.mp3', '.xlsx', '.png')):
|
91 |
file_path = os.path.join(BASE_DIR, 'files', f'{file_name}.b64')
|
92 |
question_text_for_agent += f'. The path to the base64 contents of the attatched file mentioned in the question is the following: {file_path}'
|
93 |
+
|
94 |
+
print('testing whisper')
|
95 |
+
audio_to_text(file_path)
|
96 |
else:
|
97 |
file_path = os.path.join(BASE_DIR, 'files', file_name)
|
98 |
plain_txt_file = get_text_file_contents(file_path)
|
audio_to_text_tool.py
CHANGED
@@ -1,61 +1,56 @@
|
|
1 |
-
import
|
2 |
-
from langchain_core.tools import tool
|
3 |
-
|
4 |
-
#@tool
|
5 |
-
import whisper
|
6 |
-
import os
|
7 |
-
|
8 |
-
import os
|
9 |
-
import whisper
|
10 |
-
import subprocess
|
11 |
-
import tempfile
|
12 |
-
|
13 |
-
import os
|
14 |
-
import whisper
|
15 |
-
import subprocess
|
16 |
import tempfile
|
|
|
|
|
|
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
"""
|
20 |
-
|
21 |
-
|
22 |
Args:
|
23 |
-
|
24 |
-
|
25 |
Returns:
|
26 |
-
str:
|
27 |
"""
|
28 |
-
|
29 |
-
|
|
|
|
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
tmp_wav_path = tmp_wav.name
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
return result["text"]
|
51 |
-
|
52 |
-
finally:
|
53 |
-
# Clean up temporary WAV file
|
54 |
-
if os.path.exists(tmp_wav_path):
|
55 |
-
os.remove(tmp_wav_path)
|
56 |
|
57 |
-
if __name__ == "__main__":
|
58 |
-
try:
|
59 |
-
print(audio_to_text("C:\\tmp\\ibm\\audio.mp3"))
|
60 |
except Exception as e:
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import tempfile
|
3 |
+
from openai import OpenAI
|
4 |
+
from langchain.tools import tool
|
5 |
+
from constants import OPENAI_KEY
|
6 |
|
7 |
+
# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
|
8 |
+
client = OpenAI(api_key=OPENAI_KEY)
|
9 |
+
|
10 |
+
#@tool
|
11 |
+
def audio_to_text(base64_audio_path: str) -> str:
|
12 |
"""
|
13 |
+
Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.
|
14 |
+
|
15 |
Args:
|
16 |
+
base64_audio_path (str): Path to a file containing base64-encoded audio as text.
|
17 |
+
|
18 |
Returns:
|
19 |
+
str: The transcribed text.
|
20 |
"""
|
21 |
+
try:
|
22 |
+
# Read base64 string
|
23 |
+
with open(base64_audio_path, "r") as f:
|
24 |
+
base64_str = f.read()
|
25 |
|
26 |
+
# Decode base64 to bytes
|
27 |
+
audio_bytes = base64.b64decode(base64_str)
|
|
|
28 |
|
29 |
+
# Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
|
30 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
31 |
+
temp_audio.write(audio_bytes)
|
32 |
+
temp_audio_path = temp_audio.name
|
33 |
+
|
34 |
+
# Transcribe using OpenAI Whisper API
|
35 |
+
with open(temp_audio_path, "rb") as audio_file:
|
36 |
+
transcript = client.audio.transcriptions.create(
|
37 |
+
model="whisper-1",
|
38 |
+
file=audio_file,
|
39 |
+
response_format="text"
|
40 |
+
)
|
41 |
+
|
42 |
+
return transcript.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
|
|
|
|
|
|
44 |
except Exception as e:
|
45 |
+
return f"An error occurred during transcription: {str(e)}"
|
46 |
+
|
47 |
+
if __name__ == "__main__":
|
48 |
+
# Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
|
49 |
+
base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"
|
50 |
+
|
51 |
+
# Call the tool function
|
52 |
+
transcription = audio_to_text(base64_audio_file_path)
|
53 |
+
|
54 |
+
# Print the result
|
55 |
+
print("Transcription result:")
|
56 |
+
print(transcription)
|