jsbeaudry commited on
Commit
2c4cfe6
Β·
verified Β·
1 Parent(s): cd9a4ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -43
app.py CHANGED
@@ -1,64 +1,39 @@
1
- from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
2
- import librosa
3
- import torch
4
  import gradio as gr
5
 
6
- # Load Whisper model and processor
7
  print("Loading model...")
8
- processor = AutoProcessor.from_pretrained("jsbeaudry/whisper-medium-oswald")
9
- model = AutoModelForSpeechSeq2Seq.from_pretrained("jsbeaudry/whisper-medium-oswald")
10
- model.eval()
11
-
12
- # Set device (GPU if available, else CPU)
13
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
- model.to(device)
15
  print("Model loaded successfully.")
16
 
17
  # Transcription function
18
- def transcribe(audio):
19
- if audio is None:
20
  return "Please upload or record an audio file first."
 
 
21
 
22
- # Gradio provides a tuple (sr, data)
23
- sr, data = audio
24
-
25
- # If stereo, convert to mono
26
- if len(data.shape) == 2:
27
- data = librosa.to_mono(data.T)
28
-
29
- # Resample to 16kHz if needed
30
- if sr != 16000:
31
- data = librosa.resample(data, orig_sr=sr, target_sr=16000)
32
- sr = 16000
33
-
34
- # Process audio
35
- input_features = processor(data, sampling_rate=sr, return_tensors="pt").input_features.to(device)
36
-
37
- # Predict
38
- with torch.no_grad():
39
- predicted_ids = model.generate(input_features)
40
-
41
- # Decode
42
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
43
- return transcription
44
-
45
- # Gradio UI
46
  def create_interface():
47
  with gr.Blocks(title="Whisper Medium - Haitian Creole") as demo:
48
  gr.Markdown("# πŸŽ™οΈ Whisper Medium Creole ASR")
49
  gr.Markdown(
50
- "Upload or record your voice in Haitian Creole. Then click **Transcribe** to get the text."
 
51
  )
52
 
53
  with gr.Row():
54
- audio_input = gr.Audio(label="🎧 Upload or Record Audio", type="numpy", format="wav")
55
- transcribe_button = gr.Button("πŸ” Transcribe")
56
- output_text = gr.Textbox(label="πŸ“ Transcribed Text", lines=4)
57
-
 
 
 
58
  transcribe_button.click(fn=transcribe, inputs=audio_input, outputs=output_text)
59
 
60
  return demo
61
 
62
  if __name__ == "__main__":
63
  interface = create_interface()
64
- interface.launch()
 
1
+ from transformers import pipeline
 
 
2
  import gradio as gr
3
 
4
+ # Load Whisper model
5
  print("Loading model...")
6
+ pipe = pipeline(model="jsbeaudry/oswald-large-v3-turbo-m1")
 
 
 
 
 
 
7
  print("Model loaded successfully.")
8
 
9
  # Transcription function
10
+ def transcribe(audio_path):
11
+ if audio_path is None:
12
  return "Please upload or record an audio file first."
13
+ result = pipe(audio_path)
14
+ return result["text"]
15
 
16
+ # Build Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def create_interface():
18
  with gr.Blocks(title="Whisper Medium - Haitian Creole") as demo:
19
  gr.Markdown("# πŸŽ™οΈ Whisper Medium Creole ASR")
20
  gr.Markdown(
21
+ "Upload an audio file or record your voice in Haitian Creole. "
22
+ "Then click **Transcribe** to see the result."
23
  )
24
 
25
  with gr.Row():
26
+ with gr.Column():
27
+ audio_input = gr.Audio( type="filepath", label="🎧 Upload Audio")
28
+ with gr.Column():
29
+ transcribe_button = gr.Button("πŸ” Transcribe")
30
+ output_text = gr.Textbox(label="πŸ“ Transcribed Text", lines=4)
31
+
32
+
33
  transcribe_button.click(fn=transcribe, inputs=audio_input, outputs=output_text)
34
 
35
  return demo
36
 
37
  if __name__ == "__main__":
38
  interface = create_interface()
39
+ interface.launch()