Spaces:

jsbeaudry
/

oswald-large-v3-turbo-m1

Sleeping

App Files Files Community

jsbeaudry commited on Jun 23

Commit

2c4cfe6

verified ·

1 Parent(s): cd9a4ee

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -43

app.py CHANGED Viewed

@@ -1,64 +1,39 @@
-from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
-import librosa
-import torch
 import gradio as gr
-# Load Whisper model and processor
 print("Loading model...")
-processor = AutoProcessor.from_pretrained("jsbeaudry/whisper-medium-oswald")
-model = AutoModelForSpeechSeq2Seq.from_pretrained("jsbeaudry/whisper-medium-oswald")
-model.eval()
-# Set device (GPU if available, else CPU)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
 print("Model loaded successfully.")
 # Transcription function
-def transcribe(audio):
-    if audio is None:
         return "Please upload or record an audio file first."
-    # Gradio provides a tuple (sr, data)
-    sr, data = audio
-    # If stereo, convert to mono
-    if len(data.shape) == 2:
-        data = librosa.to_mono(data.T)
-    # Resample to 16kHz if needed
-    if sr != 16000:
-        data = librosa.resample(data, orig_sr=sr, target_sr=16000)
-        sr = 16000
-    # Process audio
-    input_features = processor(data, sampling_rate=sr, return_tensors="pt").input_features.to(device)
-    # Predict
-    with torch.no_grad():
-        predicted_ids = model.generate(input_features)
-    # Decode
-    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-    return transcription
-# Gradio UI
 def create_interface():
     with gr.Blocks(title="Whisper Medium - Haitian Creole") as demo:
         gr.Markdown("# 🎙️ Whisper Medium Creole ASR")
         gr.Markdown(
-            "Upload or record your voice in Haitian Creole. Then click **Transcribe** to get the text."
         )
         with gr.Row():
-            audio_input = gr.Audio(label="🎧 Upload or Record Audio", type="numpy", format="wav")
-            transcribe_button = gr.Button("🔍 Transcribe")
-            output_text = gr.Textbox(label="📝 Transcribed Text", lines=4)
         transcribe_button.click(fn=transcribe, inputs=audio_input, outputs=output_text)
     return demo
 if __name__ == "__main__":
     interface = create_interface()
-    interface.launch()

+from transformers import pipeline
 import gradio as gr
+# Load Whisper model
 print("Loading model...")
+pipe = pipeline(model="jsbeaudry/oswald-large-v3-turbo-m1")
 print("Model loaded successfully.")
 # Transcription function
+def transcribe(audio_path):
+    if audio_path is None:
         return "Please upload or record an audio file first."
+    result = pipe(audio_path)
+    return result["text"]
+# Build Gradio interface
 def create_interface():
     with gr.Blocks(title="Whisper Medium - Haitian Creole") as demo:
         gr.Markdown("# 🎙️ Whisper Medium Creole ASR")
         gr.Markdown(
+            "Upload an audio file or record your voice in Haitian Creole. "
+            "Then click **Transcribe** to see the result."
         )
         with gr.Row():
+            with gr.Column():
+                audio_input = gr.Audio( type="filepath", label="🎧 Upload Audio")
+            with gr.Column():
+                transcribe_button = gr.Button("🔍 Transcribe")
+                output_text = gr.Textbox(label="📝 Transcribed Text", lines=4)
         transcribe_button.click(fn=transcribe, inputs=audio_input, outputs=output_text)
     return demo
 if __name__ == "__main__":
     interface = create_interface()
+    interface.launch()