whisper-tg

Running

App Files Files Community

muhtasham commited on Mar 21

Commit

2a49988

1 Parent(s): 06acc93

WIP

Browse files

Files changed (1) hide show

app.py +52 -4

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from loguru import logger
 import datetime
 import tempfile
 import os
 # Configure loguru
 logger.add("app.log", rotation="500 MB", level="DEBUG")
@@ -127,8 +129,50 @@ def transcribe(inputs, return_timestamps, generate_subs, batch_size, chunk_lengt
         logger.exception(f"Error during transcription: {str(e)}")
         raise gr.Error(f"Failed to transcribe audio: {str(e)}")
 demo = gr.Blocks(theme=gr.themes.Ocean())
 # Define interfaces first
 mf_transcribe = gr.Interface(
     fn=transcribe,
@@ -136,12 +180,13 @@ mf_transcribe = gr.Interface(
         gr.Audio(sources="microphone", type="filepath"),
         gr.Checkbox(label="Include timestamps", value=True),
         gr.Checkbox(label="Generate subtitles", value=True),
-        gr.Slider(minimum=1, maximum=32, value=8, step=1, label="Batch Size"),
         gr.Slider(minimum=5, maximum=30, value=15, step=5, label="Chunk Length (seconds)"),
     ],
     outputs=[
         gr.JSON(label="Transcription", open=True),
         gr.File(label="Subtitles (SRT)", visible=True),
     ],
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
@@ -151,7 +196,8 @@ mf_transcribe = gr.Interface(
     ),
     flagging_mode="manual",
     flagging_options=["👍 Good", "👎 Bad"],
-    flagging_dir="flagged_data"
 )
 file_transcribe = gr.Interface(
@@ -166,6 +212,7 @@ file_transcribe = gr.Interface(
     outputs=[
         gr.JSON(label="Transcription", open=True),
         gr.File(label="Subtitles (SRT)", visible=True),
     ],
     title="Whisper Large V3: Transcribe Audio",
     description=(
@@ -175,12 +222,13 @@ file_transcribe = gr.Interface(
     ),
     flagging_mode="manual",
     flagging_options=["👍 Good", "👎 Bad"],
-    flagging_dir="flagged_data"
 )
 # Then set up the demo with the interfaces
 with demo:
-    gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
 logger.info("Starting Gradio interface")
 demo.queue().launch(ssr_mode=False)

 import datetime
 import tempfile
 import os
+import json
+from pathlib import Path
 # Configure loguru
 logger.add("app.log", rotation="500 MB", level="DEBUG")
         logger.exception(f"Error during transcription: {str(e)}")
         raise gr.Error(f"Failed to transcribe audio: {str(e)}")
+# Create a custom flagging callback
+class TranscriptionFlaggingCallback(gr.FlaggingCallback):
+    def __init__(self, flagging_dir):
+        self.flagging_dir = Path(flagging_dir)
+        self.flagging_dir.mkdir(exist_ok=True)
+        self.log_file = self.flagging_dir / "flagged_data.jsonl"
+    def setup(self, components, flagging_dir):
+        pass
+    def flag(self, components, flag_data, flag_option, username):
+        # Create a unique filename for the audio file
+        audio_file = components[0]  # First component is the audio input
+        if audio_file:
+            audio_filename = os.path.basename(audio_file)
+            # Copy audio file to flagged directory
+            audio_dir = self.flagging_dir / "audio"
+            audio_dir.mkdir(exist_ok=True)
+            import shutil
+            shutil.copy2(audio_file, audio_dir / audio_filename)
+        else:
+            audio_filename = None
+        # Prepare the data to save
+        data = {
+            "timestamp": datetime.datetime.now().isoformat(),
+            "audio_file": audio_filename,
+            "transcription": components[1],  # JSON output
+            "feedback": flag_option,
+            "correction": components[2] if len(components) > 2 else None,  # Correction text if provided
+            "username": username
+        }
+        # Append to JSONL file
+        with open(self.log_file, "a", encoding="utf-8") as f:
+            f.write(json.dumps(data) + "\n")
+        logger.info(f"Saved flagged data: {data}")
 demo = gr.Blocks(theme=gr.themes.Ocean())
+# Create flagging callback
+flagging_callback = TranscriptionFlaggingCallback("flagged_data")
 # Define interfaces first
 mf_transcribe = gr.Interface(
     fn=transcribe,
         gr.Audio(sources="microphone", type="filepath"),
         gr.Checkbox(label="Include timestamps", value=True),
         gr.Checkbox(label="Generate subtitles", value=True),
+        gr.Slider(minimum=1, maximum=128, value=8, step=1, label="Batch Size"),
         gr.Slider(minimum=5, maximum=30, value=15, step=5, label="Chunk Length (seconds)"),
     ],
     outputs=[
         gr.JSON(label="Transcription", open=True),
         gr.File(label="Subtitles (SRT)", visible=True),
+        gr.Textbox(label="Correction", visible=False),  # Hidden correction input
     ],
     title="Whisper Large V3 Turbo: Transcribe Audio",
     description=(
     ),
     flagging_mode="manual",
     flagging_options=["👍 Good", "👎 Bad"],
+    flagging_dir="flagged_data",
+    flagging_callback=flagging_callback
 )
 file_transcribe = gr.Interface(
     outputs=[
         gr.JSON(label="Transcription", open=True),
         gr.File(label="Subtitles (SRT)", visible=True),
+        gr.Textbox(label="Correction", visible=False),  # Hidden correction input
     ],
     title="Whisper Large V3: Transcribe Audio",
     description=(
     ),
     flagging_mode="manual",
     flagging_options=["👍 Good", "👎 Bad"],
+    flagging_dir="flagged_data",
+    flagging_callback=flagging_callback
 )
 # Then set up the demo with the interfaces
 with demo:
+    gr.TabbedInterface([file_transcribe, mf_transcribe], ["Audio file", "Microphone"])
 logger.info("Starting Gradio interface")
 demo.queue().launch(ssr_mode=False)