Alexandre Gazola commited on
Commit
7d34100
·
1 Parent(s): 36ef534
Files changed (2) hide show
  1. app.py +5 -5
  2. audio_to_text_tool.py +35 -23
app.py CHANGED
@@ -77,12 +77,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
77
  print(f"Skipping item with missing task_id or question: {item}")
78
  continue
79
 
80
- if "in response to the question" not in question_text:
81
- continue
82
 
83
- print('testing whisper with youtube video...')
84
- whisper_return = audio_to_text_from_youtube('https://www.youtube.com/watch?v=1htKBjuUWec')
85
- print(whisper_return)
86
 
87
  try:
88
  file_name = item.get("file_name")
 
77
  print(f"Skipping item with missing task_id or question: {item}")
78
  continue
79
 
80
+ #if "in response to the question" not in question_text:
81
+ # continue
82
 
83
+ #print('testing whisper with youtube video...')
84
+ #whisper_return = audio_to_text_from_youtube('https://www.youtube.com/watch?v=1htKBjuUWec')
85
+ #print(whisper_return)
86
 
87
  try:
88
  file_name = item.get("file_name")
audio_to_text_tool.py CHANGED
@@ -3,7 +3,12 @@ import tempfile
3
  from openai import OpenAI
4
  from langchain.tools import tool
5
  from constants import OPENAI_KEY
6
- from pytube import YouTube
 
 
 
 
 
7
 
8
  # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
9
  client = OpenAI(api_key=OPENAI_KEY)
@@ -45,7 +50,7 @@ def audio_to_text(base64_audio_path: str) -> str:
45
  except Exception as e:
46
  return f"An error occurred during transcription: {str(e)}"
47
 
48
- #@tool
49
  def audio_to_text_from_youtube(youtube_url: str) -> str:
50
  """
51
  Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.
@@ -56,27 +61,34 @@ def audio_to_text_from_youtube(youtube_url: str) -> str:
56
  Returns:
57
  str: Transcribed text.
58
  """
59
- try:
60
- # Download audio stream
61
- yt = YouTube(youtube_url)
62
- audio_stream = yt.streams.filter(only_audio=True).first()
63
-
64
- if not audio_stream:
65
- return "No audio stream found in the YouTube video."
66
-
67
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
68
- audio_stream.download(output_path=None, filename=temp_audio_file.name)
69
- temp_audio_path = temp_audio_file.name
70
-
71
- # Transcribe using OpenAI Whisper
72
- with open(temp_audio_path, "rb") as audio_file:
73
- transcript = client.audio.transcriptions.create(
74
- model="whisper-1",
75
- file=audio_file,
76
- response_format="text"
77
- )
78
-
79
- return transcript.strip()
 
 
 
 
 
 
 
80
 
81
  except Exception as e:
82
  return f"An error occurred during YouTube transcription: {str(e)}"
 
3
  from openai import OpenAI
4
  from langchain.tools import tool
5
  from constants import OPENAI_KEY
6
+ import tempfile
7
+ import os
8
+ import openai
9
+ from openai import OpenAI
10
+ from langchain.tools import tool
11
+ import yt_dlp
12
 
13
  # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
14
  client = OpenAI(api_key=OPENAI_KEY)
 
50
  except Exception as e:
51
  return f"An error occurred during transcription: {str(e)}"
52
 
53
+ @tool
54
  def audio_to_text_from_youtube(youtube_url: str) -> str:
55
  """
56
  Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.
 
61
  Returns:
62
  str: Transcribed text.
63
  """
64
+ try:
65
+ with tempfile.TemporaryDirectory() as tmpdir:
66
+ audio_output_path = os.path.join(tmpdir, "audio.mp3")
67
+
68
+ # Download best audio using yt-dlp
69
+ ydl_opts = {
70
+ "format": "bestaudio/best",
71
+ "outtmpl": audio_output_path,
72
+ "quiet": True,
73
+ "postprocessors": [{
74
+ "key": "FFmpegExtractAudio",
75
+ "preferredcodec": "mp3",
76
+ "preferredquality": "192",
77
+ }],
78
+ }
79
+
80
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
81
+ ydl.download([youtube_url])
82
+
83
+ # Transcribe with OpenAI Whisper
84
+ with open(audio_output_path, "rb") as audio_file:
85
+ transcript = client.audio.transcriptions.create(
86
+ model="whisper-1",
87
+ file=audio_file,
88
+ response_format="text"
89
+ )
90
+
91
+ return transcript.strip()
92
 
93
  except Exception as e:
94
  return f"An error occurred during YouTube transcription: {str(e)}"