muhtasham commited on
Commit
2a49988
Β·
1 Parent(s): 06acc93
Files changed (1) hide show
  1. app.py +52 -4
app.py CHANGED
@@ -7,6 +7,8 @@ from loguru import logger
7
  import datetime
8
  import tempfile
9
  import os
 
 
10
 
11
  # Configure loguru
12
  logger.add("app.log", rotation="500 MB", level="DEBUG")
@@ -127,8 +129,50 @@ def transcribe(inputs, return_timestamps, generate_subs, batch_size, chunk_lengt
127
  logger.exception(f"Error during transcription: {str(e)}")
128
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  demo = gr.Blocks(theme=gr.themes.Ocean())
131
 
 
 
 
132
  # Define interfaces first
133
  mf_transcribe = gr.Interface(
134
  fn=transcribe,
@@ -136,12 +180,13 @@ mf_transcribe = gr.Interface(
136
  gr.Audio(sources="microphone", type="filepath"),
137
  gr.Checkbox(label="Include timestamps", value=True),
138
  gr.Checkbox(label="Generate subtitles", value=True),
139
- gr.Slider(minimum=1, maximum=32, value=8, step=1, label="Batch Size"),
140
  gr.Slider(minimum=5, maximum=30, value=15, step=5, label="Chunk Length (seconds)"),
141
  ],
142
  outputs=[
143
  gr.JSON(label="Transcription", open=True),
144
  gr.File(label="Subtitles (SRT)", visible=True),
 
145
  ],
146
  title="Whisper Large V3 Turbo: Transcribe Audio",
147
  description=(
@@ -151,7 +196,8 @@ mf_transcribe = gr.Interface(
151
  ),
152
  flagging_mode="manual",
153
  flagging_options=["πŸ‘ Good", "πŸ‘Ž Bad"],
154
- flagging_dir="flagged_data"
 
155
  )
156
 
157
  file_transcribe = gr.Interface(
@@ -166,6 +212,7 @@ file_transcribe = gr.Interface(
166
  outputs=[
167
  gr.JSON(label="Transcription", open=True),
168
  gr.File(label="Subtitles (SRT)", visible=True),
 
169
  ],
170
  title="Whisper Large V3: Transcribe Audio",
171
  description=(
@@ -175,12 +222,13 @@ file_transcribe = gr.Interface(
175
  ),
176
  flagging_mode="manual",
177
  flagging_options=["πŸ‘ Good", "πŸ‘Ž Bad"],
178
- flagging_dir="flagged_data"
 
179
  )
180
 
181
  # Then set up the demo with the interfaces
182
  with demo:
183
- gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
184
 
185
  logger.info("Starting Gradio interface")
186
  demo.queue().launch(ssr_mode=False)
 
7
  import datetime
8
  import tempfile
9
  import os
10
+ import json
11
+ from pathlib import Path
12
 
13
  # Configure loguru
14
  logger.add("app.log", rotation="500 MB", level="DEBUG")
 
129
  logger.exception(f"Error during transcription: {str(e)}")
130
  raise gr.Error(f"Failed to transcribe audio: {str(e)}")
131
 
132
+ # Create a custom flagging callback
133
+ class TranscriptionFlaggingCallback(gr.FlaggingCallback):
134
+ def __init__(self, flagging_dir):
135
+ self.flagging_dir = Path(flagging_dir)
136
+ self.flagging_dir.mkdir(exist_ok=True)
137
+ self.log_file = self.flagging_dir / "flagged_data.jsonl"
138
+
139
+ def setup(self, components, flagging_dir):
140
+ pass
141
+
142
+ def flag(self, components, flag_data, flag_option, username):
143
+ # Create a unique filename for the audio file
144
+ audio_file = components[0] # First component is the audio input
145
+ if audio_file:
146
+ audio_filename = os.path.basename(audio_file)
147
+ # Copy audio file to flagged directory
148
+ audio_dir = self.flagging_dir / "audio"
149
+ audio_dir.mkdir(exist_ok=True)
150
+ import shutil
151
+ shutil.copy2(audio_file, audio_dir / audio_filename)
152
+ else:
153
+ audio_filename = None
154
+
155
+ # Prepare the data to save
156
+ data = {
157
+ "timestamp": datetime.datetime.now().isoformat(),
158
+ "audio_file": audio_filename,
159
+ "transcription": components[1], # JSON output
160
+ "feedback": flag_option,
161
+ "correction": components[2] if len(components) > 2 else None, # Correction text if provided
162
+ "username": username
163
+ }
164
+
165
+ # Append to JSONL file
166
+ with open(self.log_file, "a", encoding="utf-8") as f:
167
+ f.write(json.dumps(data) + "\n")
168
+
169
+ logger.info(f"Saved flagged data: {data}")
170
+
171
  demo = gr.Blocks(theme=gr.themes.Ocean())
172
 
173
+ # Create flagging callback
174
+ flagging_callback = TranscriptionFlaggingCallback("flagged_data")
175
+
176
  # Define interfaces first
177
  mf_transcribe = gr.Interface(
178
  fn=transcribe,
 
180
  gr.Audio(sources="microphone", type="filepath"),
181
  gr.Checkbox(label="Include timestamps", value=True),
182
  gr.Checkbox(label="Generate subtitles", value=True),
183
+ gr.Slider(minimum=1, maximum=128, value=8, step=1, label="Batch Size"),
184
  gr.Slider(minimum=5, maximum=30, value=15, step=5, label="Chunk Length (seconds)"),
185
  ],
186
  outputs=[
187
  gr.JSON(label="Transcription", open=True),
188
  gr.File(label="Subtitles (SRT)", visible=True),
189
+ gr.Textbox(label="Correction", visible=False), # Hidden correction input
190
  ],
191
  title="Whisper Large V3 Turbo: Transcribe Audio",
192
  description=(
 
196
  ),
197
  flagging_mode="manual",
198
  flagging_options=["πŸ‘ Good", "πŸ‘Ž Bad"],
199
+ flagging_dir="flagged_data",
200
+ flagging_callback=flagging_callback
201
  )
202
 
203
  file_transcribe = gr.Interface(
 
212
  outputs=[
213
  gr.JSON(label="Transcription", open=True),
214
  gr.File(label="Subtitles (SRT)", visible=True),
215
+ gr.Textbox(label="Correction", visible=False), # Hidden correction input
216
  ],
217
  title="Whisper Large V3: Transcribe Audio",
218
  description=(
 
222
  ),
223
  flagging_mode="manual",
224
  flagging_options=["πŸ‘ Good", "πŸ‘Ž Bad"],
225
+ flagging_dir="flagged_data",
226
+ flagging_callback=flagging_callback
227
  )
228
 
229
  # Then set up the demo with the interfaces
230
  with demo:
231
+ gr.TabbedInterface([file_transcribe, mf_transcribe], ["Audio file", "Microphone"])
232
 
233
  logger.info("Starting Gradio interface")
234
  demo.queue().launch(ssr_mode=False)