Spaces:
Running
Running
WIP
Browse files
app.py
CHANGED
@@ -7,6 +7,8 @@ from loguru import logger
|
|
7 |
import datetime
|
8 |
import tempfile
|
9 |
import os
|
|
|
|
|
10 |
|
11 |
# Configure loguru
|
12 |
logger.add("app.log", rotation="500 MB", level="DEBUG")
|
@@ -127,8 +129,50 @@ def transcribe(inputs, return_timestamps, generate_subs, batch_size, chunk_lengt
|
|
127 |
logger.exception(f"Error during transcription: {str(e)}")
|
128 |
raise gr.Error(f"Failed to transcribe audio: {str(e)}")
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
131 |
|
|
|
|
|
|
|
132 |
# Define interfaces first
|
133 |
mf_transcribe = gr.Interface(
|
134 |
fn=transcribe,
|
@@ -136,12 +180,13 @@ mf_transcribe = gr.Interface(
|
|
136 |
gr.Audio(sources="microphone", type="filepath"),
|
137 |
gr.Checkbox(label="Include timestamps", value=True),
|
138 |
gr.Checkbox(label="Generate subtitles", value=True),
|
139 |
-
gr.Slider(minimum=1, maximum=
|
140 |
gr.Slider(minimum=5, maximum=30, value=15, step=5, label="Chunk Length (seconds)"),
|
141 |
],
|
142 |
outputs=[
|
143 |
gr.JSON(label="Transcription", open=True),
|
144 |
gr.File(label="Subtitles (SRT)", visible=True),
|
|
|
145 |
],
|
146 |
title="Whisper Large V3 Turbo: Transcribe Audio",
|
147 |
description=(
|
@@ -151,7 +196,8 @@ mf_transcribe = gr.Interface(
|
|
151 |
),
|
152 |
flagging_mode="manual",
|
153 |
flagging_options=["π Good", "π Bad"],
|
154 |
-
flagging_dir="flagged_data"
|
|
|
155 |
)
|
156 |
|
157 |
file_transcribe = gr.Interface(
|
@@ -166,6 +212,7 @@ file_transcribe = gr.Interface(
|
|
166 |
outputs=[
|
167 |
gr.JSON(label="Transcription", open=True),
|
168 |
gr.File(label="Subtitles (SRT)", visible=True),
|
|
|
169 |
],
|
170 |
title="Whisper Large V3: Transcribe Audio",
|
171 |
description=(
|
@@ -175,12 +222,13 @@ file_transcribe = gr.Interface(
|
|
175 |
),
|
176 |
flagging_mode="manual",
|
177 |
flagging_options=["π Good", "π Bad"],
|
178 |
-
flagging_dir="flagged_data"
|
|
|
179 |
)
|
180 |
|
181 |
# Then set up the demo with the interfaces
|
182 |
with demo:
|
183 |
-
gr.TabbedInterface([
|
184 |
|
185 |
logger.info("Starting Gradio interface")
|
186 |
demo.queue().launch(ssr_mode=False)
|
|
|
7 |
import datetime
|
8 |
import tempfile
|
9 |
import os
|
10 |
+
import json
|
11 |
+
from pathlib import Path
|
12 |
|
13 |
# Configure loguru
|
14 |
logger.add("app.log", rotation="500 MB", level="DEBUG")
|
|
|
129 |
logger.exception(f"Error during transcription: {str(e)}")
|
130 |
raise gr.Error(f"Failed to transcribe audio: {str(e)}")
|
131 |
|
132 |
+
# Create a custom flagging callback
|
133 |
+
class TranscriptionFlaggingCallback(gr.FlaggingCallback):
|
134 |
+
def __init__(self, flagging_dir):
|
135 |
+
self.flagging_dir = Path(flagging_dir)
|
136 |
+
self.flagging_dir.mkdir(exist_ok=True)
|
137 |
+
self.log_file = self.flagging_dir / "flagged_data.jsonl"
|
138 |
+
|
139 |
+
def setup(self, components, flagging_dir):
|
140 |
+
pass
|
141 |
+
|
142 |
+
def flag(self, components, flag_data, flag_option, username):
|
143 |
+
# Create a unique filename for the audio file
|
144 |
+
audio_file = components[0] # First component is the audio input
|
145 |
+
if audio_file:
|
146 |
+
audio_filename = os.path.basename(audio_file)
|
147 |
+
# Copy audio file to flagged directory
|
148 |
+
audio_dir = self.flagging_dir / "audio"
|
149 |
+
audio_dir.mkdir(exist_ok=True)
|
150 |
+
import shutil
|
151 |
+
shutil.copy2(audio_file, audio_dir / audio_filename)
|
152 |
+
else:
|
153 |
+
audio_filename = None
|
154 |
+
|
155 |
+
# Prepare the data to save
|
156 |
+
data = {
|
157 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
158 |
+
"audio_file": audio_filename,
|
159 |
+
"transcription": components[1], # JSON output
|
160 |
+
"feedback": flag_option,
|
161 |
+
"correction": components[2] if len(components) > 2 else None, # Correction text if provided
|
162 |
+
"username": username
|
163 |
+
}
|
164 |
+
|
165 |
+
# Append to JSONL file
|
166 |
+
with open(self.log_file, "a", encoding="utf-8") as f:
|
167 |
+
f.write(json.dumps(data) + "\n")
|
168 |
+
|
169 |
+
logger.info(f"Saved flagged data: {data}")
|
170 |
+
|
171 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
172 |
|
173 |
+
# Create flagging callback
|
174 |
+
flagging_callback = TranscriptionFlaggingCallback("flagged_data")
|
175 |
+
|
176 |
# Define interfaces first
|
177 |
mf_transcribe = gr.Interface(
|
178 |
fn=transcribe,
|
|
|
180 |
gr.Audio(sources="microphone", type="filepath"),
|
181 |
gr.Checkbox(label="Include timestamps", value=True),
|
182 |
gr.Checkbox(label="Generate subtitles", value=True),
|
183 |
+
gr.Slider(minimum=1, maximum=128, value=8, step=1, label="Batch Size"),
|
184 |
gr.Slider(minimum=5, maximum=30, value=15, step=5, label="Chunk Length (seconds)"),
|
185 |
],
|
186 |
outputs=[
|
187 |
gr.JSON(label="Transcription", open=True),
|
188 |
gr.File(label="Subtitles (SRT)", visible=True),
|
189 |
+
gr.Textbox(label="Correction", visible=False), # Hidden correction input
|
190 |
],
|
191 |
title="Whisper Large V3 Turbo: Transcribe Audio",
|
192 |
description=(
|
|
|
196 |
),
|
197 |
flagging_mode="manual",
|
198 |
flagging_options=["π Good", "π Bad"],
|
199 |
+
flagging_dir="flagged_data",
|
200 |
+
flagging_callback=flagging_callback
|
201 |
)
|
202 |
|
203 |
file_transcribe = gr.Interface(
|
|
|
212 |
outputs=[
|
213 |
gr.JSON(label="Transcription", open=True),
|
214 |
gr.File(label="Subtitles (SRT)", visible=True),
|
215 |
+
gr.Textbox(label="Correction", visible=False), # Hidden correction input
|
216 |
],
|
217 |
title="Whisper Large V3: Transcribe Audio",
|
218 |
description=(
|
|
|
222 |
),
|
223 |
flagging_mode="manual",
|
224 |
flagging_options=["π Good", "π Bad"],
|
225 |
+
flagging_dir="flagged_data",
|
226 |
+
flagging_callback=flagging_callback
|
227 |
)
|
228 |
|
229 |
# Then set up the demo with the interfaces
|
230 |
with demo:
|
231 |
+
gr.TabbedInterface([file_transcribe, mf_transcribe], ["Audio file", "Microphone"])
|
232 |
|
233 |
logger.info("Starting Gradio interface")
|
234 |
demo.queue().launch(ssr_mode=False)
|