jsbeaudry commited on
Commit
ad65f9d
Β·
verified Β·
1 Parent(s): 1ee5c4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -9
app.py CHANGED
@@ -1,29 +1,57 @@
1
- from transformers import pipeline
 
 
2
  import gradio as gr
3
 
4
- # Load Whisper model
5
  print("Loading model...")
6
- pipe = pipeline(model="jsbeaudry/whisper-medium-oswald")
 
 
 
 
 
 
7
  print("Model loaded successfully.")
8
 
9
  # Transcription function
10
  def transcribe(audio):
11
  if audio is None:
12
  return "Please upload or record an audio file first."
13
- result = pipe(audio)
14
- return result["text"]
15
 
16
- # Build Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def create_interface():
18
  with gr.Blocks(title="Whisper Medium - Haitian Creole") as demo:
19
  gr.Markdown("# πŸŽ™οΈ Whisper Medium Creole ASR")
20
  gr.Markdown(
21
- "Upload an audio file or record your voice in Haitian Creole. "
22
- "Then click **Transcribe** to see the result."
23
  )
24
 
25
  with gr.Row():
26
- audio_input = gr.Audio(label="🎧 Upload or Record Audio", format="wav")
27
  transcribe_button = gr.Button("πŸ” Transcribe")
28
  output_text = gr.Textbox(label="πŸ“ Transcribed Text", lines=4)
29
 
 
1
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
2
+ import librosa
3
+ import torch
4
  import gradio as gr
5
 
6
+ # Load Whisper model and processor
7
  print("Loading model...")
8
+ processor = AutoProcessor.from_pretrained("jsbeaudry/whisper-medium-oswald")
9
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("jsbeaudry/whisper-medium-oswald")
10
+ model.eval()
11
+
12
+ # Set device (GPU if available, else CPU)
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ model.to(device)
15
  print("Model loaded successfully.")
16
 
17
  # Transcription function
18
  def transcribe(audio):
19
  if audio is None:
20
  return "Please upload or record an audio file first."
 
 
21
 
22
+ # Gradio provides a tuple (sr, data)
23
+ sr, data = audio
24
+
25
+ # If stereo, convert to mono
26
+ if len(data.shape) == 2:
27
+ data = librosa.to_mono(data.T)
28
+
29
+ # Resample to 16kHz if needed
30
+ if sr != 16000:
31
+ data = librosa.resample(data, orig_sr=sr, target_sr=16000)
32
+ sr = 16000
33
+
34
+ # Process audio
35
+ input_features = processor(data, sampling_rate=sr, return_tensors="pt").input_features.to(device)
36
+
37
+ # Predict
38
+ with torch.no_grad():
39
+ predicted_ids = model.generate(input_features)
40
+
41
+ # Decode
42
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
43
+ return transcription
44
+
45
+ # Gradio UI
46
  def create_interface():
47
  with gr.Blocks(title="Whisper Medium - Haitian Creole") as demo:
48
  gr.Markdown("# πŸŽ™οΈ Whisper Medium Creole ASR")
49
  gr.Markdown(
50
+ "Upload or record your voice in Haitian Creole. Then click **Transcribe** to get the text."
 
51
  )
52
 
53
  with gr.Row():
54
+ audio_input = gr.Audio(label="🎧 Upload or Record Audio", type="numpy", format="wav")
55
  transcribe_button = gr.Button("πŸ” Transcribe")
56
  output_text = gr.Textbox(label="πŸ“ Transcribed Text", lines=4)
57