nareauow commited on
Commit
c0bdacc
·
verified ·
1 Parent(s): 8c217ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -21
app.py CHANGED
@@ -128,29 +128,27 @@ def recognize_speech(audio_path):
128
  return "Speech recognition model not available"
129
 
130
  try:
 
131
  audio_data, sr = sf.read(audio_path)
132
-
133
- inputs = speech_processor(
134
- audio_data,
135
- sampling_rate=sr,
136
- return_tensors="pt"
137
- ).to(device)
138
 
139
- generated_ids = speech_recognizer.generate(
140
- input_features=inputs["input_features"],
141
- max_length=100,
142
- num_beams=5, # Changed from 1 to 5 for better results
143
- early_stopping=True,
144
- no_repeat_ngram_size=2
145
- )
 
146
 
147
- transcription = speech_processor.batch_decode(
148
- generated_ids,
149
- skip_special_tokens=True
150
- )[0]
151
 
152
- return transcription.strip()
153
-
 
 
 
154
  except Exception as e:
155
  return f"Speech recognition error: {str(e)}"
156
 
@@ -210,7 +208,7 @@ def predict_speaker(audio, model, processor):
210
  return result, probs_dict, recognized_text,predicted_speaker
211
 
212
  except Exception as e:
213
- return f"Erreur : {str(e)}", None, None,None
214
 
215
  # Charger modèle
216
  def load_model(model_id="nareauow/my_speech_recognition", model_filename="model_3.pth"):
@@ -277,7 +275,7 @@ def create_interface():
277
 
278
  gr.Markdown("""### Comment utiliser ?
279
  - Choisissez le modèle.
280
- - Cliquez sur 🎙️ pour enregistrer votre voix.
281
  - Cliquez sur **Reconnaître** pour obtenir la prédiction.
282
  """)
283
 
 
128
  return "Speech recognition model not available"
129
 
130
  try:
131
+ # Read audio file
132
  audio_data, sr = sf.read(audio_path)
 
 
 
 
 
 
133
 
134
+ # Resample to 16kHz if needed
135
+ if sr != 16000:
136
+ audio_data = np.interp(
137
+ np.linspace(0, len(audio_data), int(16000 * len(audio_data) / sr)),
138
+ np.arange(len(audio_data)),
139
+ audio_data
140
+ )
141
+ sr = 16000
142
 
143
+ # Process audio
144
+ inputs = speech_processor(audio_data, sampling_rate=sr, return_tensors="pt")
145
+ inputs = {k: v.to(device) for k, v in inputs.items()}
 
146
 
147
+ # Generate transcription
148
+ generated_ids = speech_recognizer.generate(**inputs)
149
+ transcription = speech_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
150
+
151
+ return transcription
152
  except Exception as e:
153
  return f"Speech recognition error: {str(e)}"
154
 
 
208
  return result, probs_dict, recognized_text,predicted_speaker
209
 
210
  except Exception as e:
211
+ return f"Erreur : {str(e)}", None, None
212
 
213
  # Charger modèle
214
  def load_model(model_id="nareauow/my_speech_recognition", model_filename="model_3.pth"):
 
275
 
276
  gr.Markdown("""### Comment utiliser ?
277
  - Choisissez le modèle.
278
+ - Cliquez sur pour enregistrer votre voix.
279
  - Cliquez sur **Reconnaître** pour obtenir la prédiction.
280
  """)
281