Spaces:
Paused
Paused
WIP
Browse files
app.py
CHANGED
@@ -9,10 +9,9 @@ import os
|
|
9 |
import time
|
10 |
import subprocess
|
11 |
from loguru import logger
|
12 |
-
import browser_cookie3 # Add this import for browser cookies
|
13 |
|
14 |
MODEL_NAME = "muhtasham/whisper-tg"
|
15 |
-
BATCH_SIZE =
|
16 |
FILE_LIMIT_MB = 1000
|
17 |
YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
18 |
|
@@ -37,12 +36,21 @@ pipe = pipeline(
|
|
37 |
)
|
38 |
|
39 |
@spaces.GPU
|
40 |
-
def transcribe(inputs
|
41 |
if inputs is None:
|
42 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
43 |
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def _return_yt_html_embed(yt_url):
|
48 |
try:
|
@@ -56,33 +64,10 @@ def _return_yt_html_embed(yt_url):
|
|
56 |
logger.error(f"Error creating embed HTML: {str(e)}")
|
57 |
raise gr.Error("Invalid YouTube URL format")
|
58 |
|
59 |
-
def get_youtube_cookies():
|
60 |
-
"""Get YouTube cookies from the browser"""
|
61 |
-
try:
|
62 |
-
# Try Chrome first
|
63 |
-
cookies = browser_cookie3.chrome(domain_name='.youtube.com')
|
64 |
-
except:
|
65 |
-
try:
|
66 |
-
# Try Firefox if Chrome fails
|
67 |
-
cookies = browser_cookie3.firefox(domain_name='.youtube.com')
|
68 |
-
except:
|
69 |
-
try:
|
70 |
-
# Try Safari if Firefox fails
|
71 |
-
cookies = browser_cookie3.safari(domain_name='.youtube.com')
|
72 |
-
except Exception as e:
|
73 |
-
logger.warning(f"Could not get browser cookies: {str(e)}")
|
74 |
-
return None
|
75 |
-
|
76 |
-
# Convert cookies to the format yt-dlp expects
|
77 |
-
return {cookie.name: cookie.value for cookie in cookies}
|
78 |
-
|
79 |
def download_yt_audio(yt_url, filename):
|
80 |
logger.info(f"Starting download for URL: {yt_url}")
|
81 |
|
82 |
-
#
|
83 |
-
cookies = get_youtube_cookies()
|
84 |
-
|
85 |
-
# Configure yt-dlp options
|
86 |
ydl_opts = {
|
87 |
"format": "bestaudio/best",
|
88 |
"postprocessors": [{
|
@@ -99,13 +84,25 @@ def download_yt_audio(yt_url, filename):
|
|
99 |
"ignoreerrors": False,
|
100 |
"logtostderr": False,
|
101 |
"verbose": False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
}
|
103 |
|
104 |
-
# Add cookies if available
|
105 |
-
if cookies:
|
106 |
-
ydl_opts["cookiesfrombrowser"] = ("chrome",) # or "firefox" or "safari"
|
107 |
-
logger.info("Using browser cookies for YouTube authentication")
|
108 |
-
|
109 |
try:
|
110 |
# First, get video info without downloading
|
111 |
with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
|
@@ -151,7 +148,7 @@ def download_yt_audio(yt_url, filename):
|
|
151 |
raise gr.Error(f"An unexpected error occurred: {str(e)}")
|
152 |
|
153 |
@spaces.GPU
|
154 |
-
def yt_transcribe(yt_url
|
155 |
html_embed_str = _return_yt_html_embed(yt_url)
|
156 |
|
157 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
@@ -163,9 +160,17 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
|
|
163 |
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
164 |
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
171 |
|
@@ -174,7 +179,10 @@ mf_transcribe = gr.Interface(
|
|
174 |
inputs=[
|
175 |
gr.Audio(sources="microphone", type="filepath"),
|
176 |
],
|
177 |
-
outputs=
|
|
|
|
|
|
|
178 |
title="Whisper Large V3 Turbo: Transcribe Audio",
|
179 |
description=(
|
180 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
@@ -189,7 +197,10 @@ file_transcribe = gr.Interface(
|
|
189 |
inputs=[
|
190 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
191 |
],
|
192 |
-
outputs=
|
|
|
|
|
|
|
193 |
title="Whisper Large V3: Transcribe Audio",
|
194 |
description=(
|
195 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
@@ -204,7 +215,11 @@ yt_transcribe = gr.Interface(
|
|
204 |
inputs=[
|
205 |
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
206 |
],
|
207 |
-
outputs=[
|
|
|
|
|
|
|
|
|
208 |
title="Whisper Large V3: Transcribe YouTube",
|
209 |
description=(
|
210 |
"Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
|
|
|
9 |
import time
|
10 |
import subprocess
|
11 |
from loguru import logger
|
|
|
12 |
|
13 |
MODEL_NAME = "muhtasham/whisper-tg"
|
14 |
+
BATCH_SIZE = 32
|
15 |
FILE_LIMIT_MB = 1000
|
16 |
YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
17 |
|
|
|
36 |
)
|
37 |
|
38 |
@spaces.GPU
|
39 |
+
def transcribe(inputs):
|
40 |
if inputs is None:
|
41 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
42 |
|
43 |
+
result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
|
44 |
+
|
45 |
+
# Format timestamps with text
|
46 |
+
timestamps = []
|
47 |
+
for chunk in result["chunks"]:
|
48 |
+
start_time = chunk["timestamp"][0]
|
49 |
+
end_time = chunk["timestamp"][1]
|
50 |
+
text = chunk["text"].strip()
|
51 |
+
timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
|
52 |
+
|
53 |
+
return result["text"], "\n".join(timestamps)
|
54 |
|
55 |
def _return_yt_html_embed(yt_url):
|
56 |
try:
|
|
|
64 |
logger.error(f"Error creating embed HTML: {str(e)}")
|
65 |
raise gr.Error("Invalid YouTube URL format")
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def download_yt_audio(yt_url, filename):
|
68 |
logger.info(f"Starting download for URL: {yt_url}")
|
69 |
|
70 |
+
# Configure yt-dlp options with anti-bot detection measures
|
|
|
|
|
|
|
71 |
ydl_opts = {
|
72 |
"format": "bestaudio/best",
|
73 |
"postprocessors": [{
|
|
|
84 |
"ignoreerrors": False,
|
85 |
"logtostderr": False,
|
86 |
"verbose": False,
|
87 |
+
# Anti-bot detection options
|
88 |
+
"cookiesfrombrowser": ("chrome",),
|
89 |
+
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
90 |
+
"http_headers": {
|
91 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
92 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
93 |
+
"Accept-Language": "en-us,en;q=0.5",
|
94 |
+
"Sec-Fetch-Mode": "navigate",
|
95 |
+
},
|
96 |
+
"socket_timeout": 30,
|
97 |
+
"retries": 10,
|
98 |
+
"fragment_retries": 10,
|
99 |
+
"file_access_retries": 10,
|
100 |
+
"extractor_retries": 10,
|
101 |
+
"ignoreerrors": False,
|
102 |
+
"no_warnings": True,
|
103 |
+
"quiet": True,
|
104 |
}
|
105 |
|
|
|
|
|
|
|
|
|
|
|
106 |
try:
|
107 |
# First, get video info without downloading
|
108 |
with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
|
|
|
148 |
raise gr.Error(f"An unexpected error occurred: {str(e)}")
|
149 |
|
150 |
@spaces.GPU
|
151 |
+
def yt_transcribe(yt_url):
|
152 |
html_embed_str = _return_yt_html_embed(yt_url)
|
153 |
|
154 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
|
160 |
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
161 |
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
162 |
|
163 |
+
result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
|
164 |
+
|
165 |
+
# Format timestamps with text
|
166 |
+
timestamps = []
|
167 |
+
for chunk in result["chunks"]:
|
168 |
+
start_time = chunk["timestamp"][0]
|
169 |
+
end_time = chunk["timestamp"][1]
|
170 |
+
text = chunk["text"].strip()
|
171 |
+
timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
|
172 |
+
|
173 |
+
return html_embed_str, result["text"], "\n".join(timestamps)
|
174 |
|
175 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
176 |
|
|
|
179 |
inputs=[
|
180 |
gr.Audio(sources="microphone", type="filepath"),
|
181 |
],
|
182 |
+
outputs=[
|
183 |
+
gr.Textbox(label="Transcription", lines=10),
|
184 |
+
gr.Textbox(label="Timestamps", lines=10),
|
185 |
+
],
|
186 |
title="Whisper Large V3 Turbo: Transcribe Audio",
|
187 |
description=(
|
188 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
|
|
197 |
inputs=[
|
198 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
199 |
],
|
200 |
+
outputs=[
|
201 |
+
gr.Textbox(label="Transcription", lines=10),
|
202 |
+
gr.Textbox(label="Timestamps", lines=10),
|
203 |
+
],
|
204 |
title="Whisper Large V3: Transcribe Audio",
|
205 |
description=(
|
206 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
|
|
215 |
inputs=[
|
216 |
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
217 |
],
|
218 |
+
outputs=[
|
219 |
+
gr.HTML(label="Video"),
|
220 |
+
gr.Textbox(label="Transcription", lines=10),
|
221 |
+
gr.Textbox(label="Timestamps", lines=10),
|
222 |
+
],
|
223 |
title="Whisper Large V3: Transcribe YouTube",
|
224 |
description=(
|
225 |
"Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
|