muhtasham commited on
Commit
c5741b3
·
1 Parent(s): eb5510b
Files changed (1) hide show
  1. app.py +41 -6
app.py CHANGED
@@ -2,12 +2,32 @@ import gradio as gr
2
  import requests
3
  import subprocess
4
  from loguru import logger
 
5
 
6
  # Configure loguru
7
  logger.add("app.log", rotation="500 MB", level="DEBUG")
8
 
9
  API_URL = "https://skdpcqcdd929o4k3.us-east-1.aws.endpoints.huggingface.cloud"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Check if ffmpeg is installed
12
  def check_ffmpeg():
13
  try:
@@ -20,7 +40,7 @@ def check_ffmpeg():
20
  # Initialize ffmpeg check
21
  check_ffmpeg()
22
 
23
- def transcribe(inputs, return_timestamps):
24
  if inputs is None:
25
  logger.warning("No audio file submitted")
26
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
@@ -63,9 +83,9 @@ def transcribe(inputs, return_timestamps):
63
  "text": result["text"]
64
  }
65
 
 
66
  if return_timestamps and "chunks" in result:
67
  logger.info(f"Processing {len(result['chunks'])} chunks")
68
- formatted_result["chunks"] = []
69
  for i, chunk in enumerate(result["chunks"]):
70
  logger.debug(f"Processing chunk {i}: {chunk}")
71
  try:
@@ -74,18 +94,27 @@ def transcribe(inputs, return_timestamps):
74
  text = chunk.get("text", "").strip()
75
 
76
  if start_time is not None and end_time is not None:
77
- formatted_result["chunks"].append({
78
  "text": text,
79
  "timestamp": [start_time, end_time]
80
- })
 
 
81
  else:
82
  logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
83
  except Exception as chunk_error:
84
  logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
85
  continue
86
- logger.info(f"Successfully processed transcription with {len(formatted_result['chunks'])} chunks")
 
 
 
 
 
 
 
87
 
88
- return formatted_result
89
  except Exception as e:
90
  logger.exception(f"Error during transcription: {str(e)}")
91
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
@@ -97,13 +126,16 @@ mf_transcribe = gr.Interface(
97
  inputs=[
98
  gr.Audio(sources="microphone", type="filepath"),
99
  gr.Checkbox(label="Include timestamps", value=True),
 
100
  ],
101
  outputs=[
102
  gr.JSON(label="Transcription", open=True),
 
103
  ],
104
  title="Whisper Large V3 Turbo: Transcribe Audio",
105
  description=(
106
  "Transcribe long-form microphone or audio inputs with the click of a button! "
 
107
  ),
108
  flagging_mode="manual",
109
  flagging_options=[
@@ -119,13 +151,16 @@ file_transcribe = gr.Interface(
119
  inputs=[
120
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
121
  gr.Checkbox(label="Include timestamps", value=True),
 
122
  ],
123
  outputs=[
124
  gr.JSON(label="Transcription", open=True),
 
125
  ],
126
  title="Whisper Large V3: Transcribe Audio",
127
  description=(
128
  "Transcribe long-form microphone or audio inputs with the click of a button! "
 
129
  ),
130
  flagging_mode="manual",
131
  flagging_options=[
 
2
  import requests
3
  import subprocess
4
  from loguru import logger
5
+ import datetime
6
 
7
  # Configure loguru
8
  logger.add("app.log", rotation="500 MB", level="DEBUG")
9
 
10
  API_URL = "https://skdpcqcdd929o4k3.us-east-1.aws.endpoints.huggingface.cloud"
11
 
12
+ def format_time(seconds):
13
+ """Convert seconds to SRT time format (HH:MM:SS,mmm)"""
14
+ td = datetime.timedelta(seconds=float(seconds))
15
+ hours = td.seconds // 3600
16
+ minutes = (td.seconds % 3600) // 60
17
+ seconds = td.seconds % 60
18
+ milliseconds = td.microseconds // 1000
19
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
20
+
21
+ def generate_srt(chunks):
22
+ """Generate SRT format subtitles from chunks"""
23
+ srt_content = []
24
+ for i, chunk in enumerate(chunks, 1):
25
+ start_time = format_time(chunk["timestamp"][0])
26
+ end_time = format_time(chunk["timestamp"][1])
27
+ text = chunk["text"].strip()
28
+ srt_content.append(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")
29
+ return "".join(srt_content)
30
+
31
  # Check if ffmpeg is installed
32
  def check_ffmpeg():
33
  try:
 
40
  # Initialize ffmpeg check
41
  check_ffmpeg()
42
 
43
+ def transcribe(inputs, return_timestamps, generate_subs):
44
  if inputs is None:
45
  logger.warning("No audio file submitted")
46
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
 
83
  "text": result["text"]
84
  }
85
 
86
+ chunks = []
87
  if return_timestamps and "chunks" in result:
88
  logger.info(f"Processing {len(result['chunks'])} chunks")
 
89
  for i, chunk in enumerate(result["chunks"]):
90
  logger.debug(f"Processing chunk {i}: {chunk}")
91
  try:
 
94
  text = chunk.get("text", "").strip()
95
 
96
  if start_time is not None and end_time is not None:
97
+ chunk_data = {
98
  "text": text,
99
  "timestamp": [start_time, end_time]
100
+ }
101
+ formatted_result["chunks"] = chunks
102
+ chunks.append(chunk_data)
103
  else:
104
  logger.warning(f"Invalid timestamp in chunk {i}: {chunk}")
105
  except Exception as chunk_error:
106
  logger.error(f"Error processing chunk {i}: {str(chunk_error)}")
107
  continue
108
+ logger.info(f"Successfully processed transcription with {len(chunks)} chunks")
109
+
110
+ # Generate subtitles if requested
111
+ srt_content = None
112
+ if generate_subs and chunks:
113
+ logger.info("Generating SRT subtitles")
114
+ srt_content = generate_srt(chunks)
115
+ logger.info("SRT subtitles generated successfully")
116
 
117
+ return formatted_result, srt_content
118
  except Exception as e:
119
  logger.exception(f"Error during transcription: {str(e)}")
120
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
 
126
  inputs=[
127
  gr.Audio(sources="microphone", type="filepath"),
128
  gr.Checkbox(label="Include timestamps", value=True),
129
+ gr.Checkbox(label="Generate subtitles", value=True),
130
  ],
131
  outputs=[
132
  gr.JSON(label="Transcription", open=True),
133
+ gr.File(label="Subtitles (SRT)", visible=True),
134
  ],
135
  title="Whisper Large V3 Turbo: Transcribe Audio",
136
  description=(
137
  "Transcribe long-form microphone or audio inputs with the click of a button! "
138
+ "Generate subtitles for your videos in SRT format."
139
  ),
140
  flagging_mode="manual",
141
  flagging_options=[
 
151
  inputs=[
152
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
153
  gr.Checkbox(label="Include timestamps", value=True),
154
+ gr.Checkbox(label="Generate subtitles", value=True),
155
  ],
156
  outputs=[
157
  gr.JSON(label="Transcription", open=True),
158
+ gr.File(label="Subtitles (SRT)", visible=True),
159
  ],
160
  title="Whisper Large V3: Transcribe Audio",
161
  description=(
162
  "Transcribe long-form microphone or audio inputs with the click of a button! "
163
+ "Generate subtitles for your videos in SRT format."
164
  ),
165
  flagging_mode="manual",
166
  flagging_options=[