muhtasham commited on
Commit
5476911
·
1 Parent(s): c2dcdf8
Files changed (1) hide show
  1. app.py +109 -210
app.py CHANGED
@@ -41,63 +41,21 @@ def format_time(seconds):
41
  milliseconds = td.microseconds // 1000
42
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
43
 
44
- def generate_srt(chunks):
45
- """Generate SRT format subtitles from transcription chunks.
46
-
47
- Args:
48
- chunks (list): List of dictionaries containing transcription chunks.
49
- Each chunk must have:
50
- - "timestamp": List of [start_time, end_time] in seconds
51
- - "text": The transcribed text for that time segment
52
-
53
- Returns:
54
- str: SRT formatted subtitles string with format:
55
- ```
56
- 1
57
- HH:MM:SS,mmm --> HH:MM:SS,mmm
58
- Text content
59
-
60
- 2
61
- HH:MM:SS,mmm --> HH:MM:SS,mmm
62
- Text content
63
- ...
64
- ```
65
-
66
- Example:
67
- >>> chunks = [
68
- ... {"timestamp": [0.0, 1.5], "text": "Hello"},
69
- ... {"timestamp": [1.5, 3.0], "text": "World"}
70
- ... ]
71
- >>> generate_srt(chunks)
72
- '1\\n00:00:00,000 --> 00:00:01,500\\nHello\\n\\n2\\n00:00:01,500 --> 00:00:03,000\\nWorld\\n\\n'
73
- """
74
  srt_content = []
75
- for i, chunk in enumerate(chunks, 1):
76
- start_time = format_time(chunk["timestamp"][0])
77
- end_time = format_time(chunk["timestamp"][1])
78
- text = chunk.get("text", "").strip()
79
  srt_content.append(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")
80
  return "".join(srt_content)
81
 
82
  def save_srt_to_file(srt_content):
83
- """Save SRT content to a temporary file.
84
-
85
- Args:
86
- srt_content (str): The SRT formatted subtitles content to save.
87
-
88
- Returns:
89
- str or None: Path to the temporary file if content was saved,
90
- None if srt_content was empty.
91
-
92
- Note:
93
- The temporary file is created with delete=False to allow it to be
94
- used after the function returns. The file should be deleted by the
95
- caller when no longer needed.
96
- """
97
  if not srt_content:
98
  return None
99
 
100
- # Create a temporary file with .srt extension
101
  temp_file = tempfile.NamedTemporaryFile(suffix='.srt', delete=False)
102
  temp_file.write(srt_content.encode('utf-8'))
103
  temp_file.close()
@@ -116,17 +74,7 @@ def check_ffmpeg():
116
  check_ffmpeg()
117
 
118
  def download_youtube_audio(url):
119
- """Download audio from YouTube using Sieve API.
120
-
121
- Args:
122
- url (str): YouTube video URL
123
-
124
- Returns:
125
- str: Path to downloaded audio file
126
-
127
- Raises:
128
- gr.Error: If download fails or API key is not set
129
- """
130
  logger.info(f"Starting YouTube audio download process for URL: {url}")
131
 
132
  if not SIEVE_API_KEY:
@@ -134,13 +82,11 @@ def download_youtube_audio(url):
134
  raise gr.Error("SIEVE_API_KEY environment variable is not set")
135
 
136
  try:
137
- # Create a temporary file for the audio
138
  temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
139
  temp_file.close()
140
  output_path = temp_file.name
141
  logger.info(f"Created temporary file at: {output_path}")
142
 
143
- # Prepare the request to Sieve API with exact parameters
144
  payload = {
145
  "function": "sieve/youtube-downloader",
146
  "inputs": {
@@ -158,53 +104,42 @@ def download_youtube_audio(url):
158
  "audio_format": "mp3"
159
  }
160
  }
161
- logger.debug(f"Prepared Sieve API payload: {payload}")
162
 
163
  # Send request to Sieve API with retries
164
  max_retries = 3
165
- retry_delay = 5 # seconds
166
 
167
  for attempt in range(max_retries):
168
  try:
169
- logger.info(f"Sending request to Sieve API (attempt {attempt + 1}/{max_retries})...")
170
  response = requests.post(
171
  f"{SIEVE_API_URL}/push",
172
  headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
173
  json=payload,
174
- timeout=1800 # Add timeout
175
  )
176
  response.raise_for_status()
177
  response_data = response.json()
178
- logger.debug(f"Sieve API response: {response_data}")
179
 
180
  job_id = response_data.get("id")
181
  if not job_id:
182
- logger.error("No job ID received from Sieve API")
183
  if attempt < max_retries - 1:
184
- logger.warning(f"Retrying in {retry_delay} seconds...")
185
  time.sleep(retry_delay)
186
  continue
187
  raise gr.Error("Failed to get job ID from Sieve API")
188
  break
189
 
190
  except requests.exceptions.RequestException as e:
191
- logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {str(e)}")
192
  if attempt < max_retries - 1:
193
- logger.info(f"Retrying in {retry_delay} seconds...")
194
  time.sleep(retry_delay)
195
  continue
196
  raise
197
 
198
- logger.info(f"Received job ID: {job_id}")
199
-
200
  # Poll for job completion
201
  poll_count = 0
202
  max_polls = 1800
203
- last_status = None
204
 
205
  while True:
206
  poll_count += 1
207
- logger.info(f"Polling job status (attempt {poll_count}/{max_polls})...")
208
 
209
  try:
210
  job_response = requests.get(
@@ -216,212 +151,170 @@ def download_youtube_audio(url):
216
  job_data = job_response.json()
217
 
218
  status = job_data.get("status")
219
- if status != last_status:
220
- logger.info(f"Job status changed: {status}")
221
- last_status = status
222
 
223
  if status == "completed" or status == "finished":
224
- logger.debug(f"Job status response: {job_data}")
225
- logger.info("Job completed successfully")
226
- # Get the output data
227
  output_data = job_data.get("outputs", [])
228
  if not output_data:
229
- logger.error(f"No output data found in completed job response. Full response: {job_data}")
230
  raise gr.Error("No output data in job response")
231
 
232
- # Get the first output which should contain the audio URL
233
  first_output = output_data[0]
234
  if not isinstance(first_output, dict):
235
- logger.error(f"Unexpected output format: {first_output}")
236
  raise gr.Error("Unexpected output format from job response")
237
 
238
- # Get the data field which contains the URL
239
  output_data = first_output.get("data", {})
240
  if not isinstance(output_data, dict):
241
- logger.error(f"Unexpected data format: {output_data}")
242
  raise gr.Error("Unexpected data format from job response")
243
 
244
- # Get the audio URL from the data
245
  audio_url = output_data.get("url")
246
  if not audio_url:
247
- logger.error(f"No audio URL found in output data. Output data: {output_data}")
248
  raise gr.Error("No audio URL in output data")
249
 
250
- logger.info(f"Received audio URL from Sieve: {audio_url}")
251
-
252
- # Download the audio file
253
- logger.info("Downloading audio file from Sieve storage...")
254
  audio_response = requests.get(audio_url, timeout=1800)
255
  audio_response.raise_for_status()
256
 
257
- file_size = len(audio_response.content)
258
- logger.info(f"Downloaded audio file size: {file_size/1024/1024:.2f} MB")
259
-
260
- # Save the file
261
  with open(output_path, "wb") as f:
262
  f.write(audio_response.content)
263
- logger.info(f"Successfully saved audio to: {output_path}")
264
 
265
- # Break out of the polling loop after successful download
266
  break
267
 
268
  elif status == "failed":
269
  error_msg = job_data.get("error", "Unknown error")
270
- logger.error(f"Job failed with error: {error_msg}")
271
  raise gr.Error(f"Job failed: {error_msg}")
272
 
273
  if poll_count >= max_polls:
274
- logger.error("Maximum polling attempts reached")
275
  raise gr.Error("Download took too long. Please try again or check if the video is accessible.")
276
 
277
- logger.info("Job still processing, waiting 2 seconds before next poll...")
278
  time.sleep(2)
279
 
280
  except requests.exceptions.RequestException as e:
281
- logger.warning(f"Poll request failed: {str(e)}")
282
  if poll_count >= max_polls:
283
  raise gr.Error("Failed to check job status. Please try again.")
284
  time.sleep(2)
285
 
286
- except requests.exceptions.RequestException as e:
287
- logger.exception(f"Network error during YouTube download: {str(e)}")
288
- raise gr.Error(f"Failed to download YouTube audio: Network error - {str(e)}")
289
  except Exception as e:
290
- logger.exception(f"Unexpected error during YouTube download: {str(e)}")
291
  raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
292
 
293
  return output_path
294
 
295
- def transcribe_youtube(url, return_timestamps, generate_subs):
296
- """Transcribe audio from YouTube video.
297
-
298
- Args:
299
- url (str): YouTube video URL
300
- return_timestamps (bool): Whether to include timestamps in output
301
- generate_subs (bool): Whether to generate SRT subtitles
302
-
303
- Returns:
304
- tuple: (formatted_result, srt_file, correction_text)
305
- """
306
- logger.info(f"Starting YouTube transcription process for URL: {url}")
307
- logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
308
-
309
  try:
310
- # Download audio from YouTube
311
- logger.info("Step 1: Downloading audio from YouTube...")
312
- audio_path = download_youtube_audio(url)
313
- logger.info(f"Successfully downloaded audio to: {audio_path}")
314
 
315
- # Transcribe the downloaded audio
316
- logger.info("Step 2: Transcribing downloaded audio...")
317
- result = transcribe(audio_path, return_timestamps, generate_subs)
318
- logger.info("Successfully completed transcription")
 
 
 
319
 
320
- # Clean up the temporary file
321
- logger.info("Step 3: Cleaning up temporary files...")
322
- try:
323
- os.unlink(audio_path)
324
- logger.info(f"Successfully deleted temporary file: {audio_path}")
325
- except Exception as e:
326
- logger.warning(f"Failed to delete temporary file: {str(e)}")
327
 
328
- return result
329
 
330
- except Exception as e:
331
- logger.exception(f"Error in YouTube transcription: {str(e)}")
332
- raise gr.Error(f"Failed to transcribe YouTube video: {str(e)}")
333
 
334
- def transcribe(inputs, return_timestamps, generate_subs):
335
- """Transcribe audio input using Whisper model via Hugging Face Inference API.
336
-
337
- Args:
338
- inputs (str): Path to audio file to transcribe.
339
- return_timestamps (bool): Whether to include timestamps in output.
340
- generate_subs (bool): Whether to generate SRT subtitles.
341
-
342
- Returns:
343
- tuple: (formatted_result, srt_file, correction_text)
344
- - formatted_result (dict): Transcription results
345
- - srt_file (str): Path to SRT file if generated, None otherwise
346
- - correction_text (str): Empty string for corrections
347
-
348
- Raises:
349
- gr.Error: If no audio file is provided or transcription fails.
350
- """
351
- logger.info(f"Starting transcription process for file: {inputs}")
352
- logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
353
 
354
- if inputs is None:
355
- logger.warning("No audio file submitted")
356
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
357
-
358
  try:
359
- # Read the audio file
360
- logger.info("Step 1: Reading audio file...")
361
- with open(inputs, "rb") as f:
362
- data = f.read()
363
- file_size = len(data)
364
- logger.info(f"Successfully read audio file, size: {file_size/1024/1024:.2f} MB")
365
 
366
  # Prepare request parameters
367
- params = {"return_timestamps": "true"} if return_timestamps else {}
 
 
 
 
 
 
368
 
369
  # Send request to API
370
- logger.info("Step 2: Sending request to Whisper API...")
371
  response = requests.post(
372
- API_URL,
373
- headers=headers,
374
- data=data,
375
  params=params
376
  )
377
  response.raise_for_status()
378
  result = response.json()
379
- logger.debug(f"API response: {result}")
380
- logger.info("Successfully received response from API")
381
 
382
- # Format response as JSON
383
- logger.info("Step 3: Processing API response...")
384
  formatted_result = {
385
- "text": result.get("text", "")
 
386
  }
387
- logger.info(f"Transcribed text length: {len(formatted_result['text'])} characters")
388
-
389
- chunks = []
390
- if return_timestamps and "chunks" in result:
391
- logger.info(f"Processing {len(result['chunks'])} chunks for timestamps")
392
- for i, chunk in enumerate(result["chunks"]):
393
- logger.debug(f"Processing chunk {i}: {chunk}")
394
- try:
395
- start_time = chunk.get("timestamp", [None, None])[0]
396
- end_time = chunk.get("timestamp", [None, None])[1]
397
- text = chunk.get("text", "").strip()
398
-
399
- if start_time is not None and end_time is not None:
400
- chunk_data = {
401
- "text": text,
402
- "timestamp": [start_time, end_time]
403
- }
404
- chunks.append(chunk_data)
405
- else:
406
- logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
407
- except Exception as chunk_error:
408
- logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
409
- continue
410
-
411
- formatted_result["chunks"] = chunks
412
- logger.info(f"Successfully processed {len(chunks)} chunks with timestamps")
413
 
414
  # Generate subtitles if requested
415
  srt_file = None
416
- if generate_subs and chunks:
417
- logger.info("Step 4: Generating SRT subtitles...")
418
- srt_content = generate_srt(chunks)
419
  srt_file = save_srt_to_file(srt_content)
420
- logger.info(f"Successfully generated SRT file: {srt_file}")
421
 
422
- logger.info("Transcription process completed successfully")
423
- return formatted_result, srt_file, "" # Return empty string for correction textbox
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  except requests.exceptions.RequestException as e:
426
  logger.exception(f"API request failed: {str(e)}")
427
  raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
@@ -439,6 +332,8 @@ youtube_transcribe = gr.Interface(
439
  gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."),
440
  gr.Checkbox(label="Include timestamps", value=True),
441
  gr.Checkbox(label="Generate subtitles", value=True),
 
 
442
  ],
443
  outputs=[
444
  gr.JSON(label="Transcription", open=True),
@@ -461,6 +356,8 @@ mf_transcribe = gr.Interface(
461
  gr.Audio(sources="microphone", type="filepath"),
462
  gr.Checkbox(label="Include timestamps", value=True),
463
  gr.Checkbox(label="Generate subtitles", value=True),
 
 
464
  ],
465
  outputs=[
466
  gr.JSON(label="Transcription", open=True),
@@ -480,6 +377,8 @@ file_transcribe = gr.Interface(
480
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
481
  gr.Checkbox(label="Include timestamps", value=True),
482
  gr.Checkbox(label="Generate subtitles", value=True),
 
 
483
  ],
484
  outputs=[
485
  gr.JSON(label="Transcription", open=True),
 
41
  milliseconds = td.microseconds // 1000
42
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
43
 
44
+ def generate_srt(segments):
45
+ """Generate SRT format subtitles from transcription segments."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  srt_content = []
47
+ for i, segment in enumerate(segments, 1):
48
+ start_time = format_time(segment["start"])
49
+ end_time = format_time(segment["end"])
50
+ text = segment.get("text", "").strip()
51
  srt_content.append(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")
52
  return "".join(srt_content)
53
 
54
  def save_srt_to_file(srt_content):
55
+ """Save SRT content to a temporary file."""
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  if not srt_content:
57
  return None
58
 
 
59
  temp_file = tempfile.NamedTemporaryFile(suffix='.srt', delete=False)
60
  temp_file.write(srt_content.encode('utf-8'))
61
  temp_file.close()
 
74
  check_ffmpeg()
75
 
76
  def download_youtube_audio(url):
77
+ """Download audio from YouTube using Sieve API."""
 
 
 
 
 
 
 
 
 
 
78
  logger.info(f"Starting YouTube audio download process for URL: {url}")
79
 
80
  if not SIEVE_API_KEY:
 
82
  raise gr.Error("SIEVE_API_KEY environment variable is not set")
83
 
84
  try:
 
85
  temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
86
  temp_file.close()
87
  output_path = temp_file.name
88
  logger.info(f"Created temporary file at: {output_path}")
89
 
 
90
  payload = {
91
  "function": "sieve/youtube-downloader",
92
  "inputs": {
 
104
  "audio_format": "mp3"
105
  }
106
  }
 
107
 
108
  # Send request to Sieve API with retries
109
  max_retries = 3
110
+ retry_delay = 5
111
 
112
  for attempt in range(max_retries):
113
  try:
 
114
  response = requests.post(
115
  f"{SIEVE_API_URL}/push",
116
  headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
117
  json=payload,
118
+ timeout=1800
119
  )
120
  response.raise_for_status()
121
  response_data = response.json()
 
122
 
123
  job_id = response_data.get("id")
124
  if not job_id:
 
125
  if attempt < max_retries - 1:
 
126
  time.sleep(retry_delay)
127
  continue
128
  raise gr.Error("Failed to get job ID from Sieve API")
129
  break
130
 
131
  except requests.exceptions.RequestException as e:
 
132
  if attempt < max_retries - 1:
 
133
  time.sleep(retry_delay)
134
  continue
135
  raise
136
 
 
 
137
  # Poll for job completion
138
  poll_count = 0
139
  max_polls = 1800
 
140
 
141
  while True:
142
  poll_count += 1
 
143
 
144
  try:
145
  job_response = requests.get(
 
151
  job_data = job_response.json()
152
 
153
  status = job_data.get("status")
 
 
 
154
 
155
  if status == "completed" or status == "finished":
 
 
 
156
  output_data = job_data.get("outputs", [])
157
  if not output_data:
 
158
  raise gr.Error("No output data in job response")
159
 
 
160
  first_output = output_data[0]
161
  if not isinstance(first_output, dict):
 
162
  raise gr.Error("Unexpected output format from job response")
163
 
 
164
  output_data = first_output.get("data", {})
165
  if not isinstance(output_data, dict):
 
166
  raise gr.Error("Unexpected data format from job response")
167
 
 
168
  audio_url = output_data.get("url")
169
  if not audio_url:
 
170
  raise gr.Error("No audio URL in output data")
171
 
 
 
 
 
172
  audio_response = requests.get(audio_url, timeout=1800)
173
  audio_response.raise_for_status()
174
 
 
 
 
 
175
  with open(output_path, "wb") as f:
176
  f.write(audio_response.content)
 
177
 
 
178
  break
179
 
180
  elif status == "failed":
181
  error_msg = job_data.get("error", "Unknown error")
 
182
  raise gr.Error(f"Job failed: {error_msg}")
183
 
184
  if poll_count >= max_polls:
 
185
  raise gr.Error("Download took too long. Please try again or check if the video is accessible.")
186
 
 
187
  time.sleep(2)
188
 
189
  except requests.exceptions.RequestException as e:
 
190
  if poll_count >= max_polls:
191
  raise gr.Error("Failed to check job status. Please try again.")
192
  time.sleep(2)
193
 
 
 
 
194
  except Exception as e:
195
+ logger.exception(f"Error during YouTube download: {str(e)}")
196
  raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
197
 
198
  return output_path
199
 
200
+ def check_api_health():
201
+ """Check if the API is healthy before making requests."""
 
 
 
 
 
 
 
 
 
 
 
 
202
  try:
203
+ response = requests.get(f"{API_URL}/health")
204
+ response.raise_for_status()
205
+ health_data = response.json()
 
206
 
207
+ # Check if service is healthy
208
+ if health_data.get("status") != "healthy":
209
+ raise gr.Error("API service is not healthy. Please try again later.")
210
+
211
+ # Check resource usage
212
+ cpu_percent = health_data.get("cpu_percent", 0)
213
+ memory_percent = health_data.get("memory_percent", 0)
214
 
215
+ if cpu_percent > 90 or memory_percent > 90:
216
+ logger.warning(f"High resource usage detected - CPU: {cpu_percent}%, Memory: {memory_percent}%")
 
 
 
 
 
217
 
218
+ return True
219
 
220
+ except requests.exceptions.RequestException as e:
221
+ logger.error(f"Health check failed: {str(e)}")
222
+ raise gr.Error("Failed to connect to the API service. Please try again later.")
223
 
224
+ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
225
+ """Transcribe audio from YouTube video using URL endpoint."""
226
+ logger.info(f"Starting YouTube transcription process for URL: {url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
 
 
 
 
228
  try:
229
+ # Check API health first
230
+ check_api_health()
231
+
232
+ # Get audio URL from Sieve
233
+ audio_url = download_youtube_audio(url)
 
234
 
235
  # Prepare request parameters
236
+ params = {
237
+ "source_language": "tg", # Tajik language
238
+ "timestamp_level": "sentence" if return_timestamps else None,
239
+ "task": "transcribe",
240
+ "chunk_length_s": chunk_length_s,
241
+ "batch_size": batch_size
242
+ }
243
 
244
  # Send request to API
 
245
  response = requests.post(
246
+ f"{API_URL}/transcribe/url",
247
+ json={"url": audio_url},
 
248
  params=params
249
  )
250
  response.raise_for_status()
251
  result = response.json()
 
 
252
 
253
+ # Format response
 
254
  formatted_result = {
255
+ "text": result["transcription"]["text"],
256
+ "language": result["transcription"]["language"]
257
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  # Generate subtitles if requested
260
  srt_file = None
261
+ if generate_subs and return_timestamps and "segments" in result["transcription"]:
262
+ srt_content = generate_srt(result["transcription"]["segments"])
 
263
  srt_file = save_srt_to_file(srt_content)
 
264
 
265
+ return formatted_result, srt_file, ""
266
+
267
+ except Exception as e:
268
+ logger.exception(f"Error in YouTube transcription: {str(e)}")
269
+ raise gr.Error(f"Failed to transcribe YouTube video: {str(e)}")
270
+
271
+ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
272
+ """Transcribe audio input using Whisper API."""
273
+ logger.info(f"Starting transcription process for file: {inputs}")
274
+
275
+ if inputs is None:
276
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
277
+
278
+ try:
279
+ # Check API health first
280
+ check_api_health()
281
 
282
+ # Read the audio file
283
+ with open(inputs, "rb") as f:
284
+ files = {"file": f}
285
+
286
+ # Prepare request parameters
287
+ params = {
288
+ "source_language": "tg", # Tajik language
289
+ "timestamp_level": "sentence" if return_timestamps else None,
290
+ "task": "transcribe",
291
+ "chunk_length_s": chunk_length_s,
292
+ "batch_size": batch_size
293
+ }
294
+
295
+ # Send request to API
296
+ response = requests.post(
297
+ f"{API_URL}/transcribe",
298
+ files=files,
299
+ params=params
300
+ )
301
+ response.raise_for_status()
302
+ result = response.json()
303
+
304
+ # Format response
305
+ formatted_result = {
306
+ "text": result["transcription"]["text"],
307
+ "language": result["transcription"]["language"]
308
+ }
309
+
310
+ # Generate subtitles if requested
311
+ srt_file = None
312
+ if generate_subs and return_timestamps and "segments" in result["transcription"]:
313
+ srt_content = generate_srt(result["transcription"]["segments"])
314
+ srt_file = save_srt_to_file(srt_content)
315
+
316
+ return formatted_result, srt_file, ""
317
+
318
  except requests.exceptions.RequestException as e:
319
  logger.exception(f"API request failed: {str(e)}")
320
  raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
 
332
  gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."),
333
  gr.Checkbox(label="Include timestamps", value=True),
334
  gr.Checkbox(label="Generate subtitles", value=True),
335
+ gr.Slider(minimum=10, maximum=60, value=30, step=5, label="Chunk Length (seconds)"),
336
+ gr.Slider(minimum=32, maximum=256, value=128, step=32, label="Batch Size")
337
  ],
338
  outputs=[
339
  gr.JSON(label="Transcription", open=True),
 
356
  gr.Audio(sources="microphone", type="filepath"),
357
  gr.Checkbox(label="Include timestamps", value=True),
358
  gr.Checkbox(label="Generate subtitles", value=True),
359
+ gr.Slider(minimum=10, maximum=60, value=30, step=5, label="Chunk Length (seconds)"),
360
+ gr.Slider(minimum=32, maximum=256, value=128, step=32, label="Batch Size")
361
  ],
362
  outputs=[
363
  gr.JSON(label="Transcription", open=True),
 
377
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
378
  gr.Checkbox(label="Include timestamps", value=True),
379
  gr.Checkbox(label="Generate subtitles", value=True),
380
+ gr.Slider(minimum=10, maximum=60, value=30, step=5, label="Chunk Length (seconds)"),
381
+ gr.Slider(minimum=32, maximum=256, value=128, step=32, label="Batch Size")
382
  ],
383
  outputs=[
384
  gr.JSON(label="Transcription", open=True),