Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -128,29 +128,27 @@ def recognize_speech(audio_path):
|
|
128 |
return "Speech recognition model not available"
|
129 |
|
130 |
try:
|
|
|
131 |
audio_data, sr = sf.read(audio_path)
|
132 |
-
|
133 |
-
inputs = speech_processor(
|
134 |
-
audio_data,
|
135 |
-
sampling_rate=sr,
|
136 |
-
return_tensors="pt"
|
137 |
-
).to(device)
|
138 |
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
)[0]
|
151 |
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
154 |
except Exception as e:
|
155 |
return f"Speech recognition error: {str(e)}"
|
156 |
|
@@ -210,7 +208,7 @@ def predict_speaker(audio, model, processor):
|
|
210 |
return result, probs_dict, recognized_text,predicted_speaker
|
211 |
|
212 |
except Exception as e:
|
213 |
-
return f"Erreur : {str(e)}", None, None
|
214 |
|
215 |
# Charger modèle
|
216 |
def load_model(model_id="nareauow/my_speech_recognition", model_filename="model_3.pth"):
|
@@ -277,7 +275,7 @@ def create_interface():
|
|
277 |
|
278 |
gr.Markdown("""### Comment utiliser ?
|
279 |
- Choisissez le modèle.
|
280 |
-
- Cliquez sur
|
281 |
- Cliquez sur **Reconnaître** pour obtenir la prédiction.
|
282 |
""")
|
283 |
|
|
|
128 |
return "Speech recognition model not available"
|
129 |
|
130 |
try:
|
131 |
+
# Read audio file
|
132 |
audio_data, sr = sf.read(audio_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
+
# Resample to 16kHz if needed
|
135 |
+
if sr != 16000:
|
136 |
+
audio_data = np.interp(
|
137 |
+
np.linspace(0, len(audio_data), int(16000 * len(audio_data) / sr)),
|
138 |
+
np.arange(len(audio_data)),
|
139 |
+
audio_data
|
140 |
+
)
|
141 |
+
sr = 16000
|
142 |
|
143 |
+
# Process audio
|
144 |
+
inputs = speech_processor(audio_data, sampling_rate=sr, return_tensors="pt")
|
145 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
|
|
146 |
|
147 |
+
# Generate transcription
|
148 |
+
generated_ids = speech_recognizer.generate(**inputs)
|
149 |
+
transcription = speech_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
150 |
+
|
151 |
+
return transcription
|
152 |
except Exception as e:
|
153 |
return f"Speech recognition error: {str(e)}"
|
154 |
|
|
|
208 |
return result, probs_dict, recognized_text,predicted_speaker
|
209 |
|
210 |
except Exception as e:
|
211 |
+
return f"Erreur : {str(e)}", None, None
|
212 |
|
213 |
# Charger modèle
|
214 |
def load_model(model_id="nareauow/my_speech_recognition", model_filename="model_3.pth"):
|
|
|
275 |
|
276 |
gr.Markdown("""### Comment utiliser ?
|
277 |
- Choisissez le modèle.
|
278 |
+
- Cliquez sur pour enregistrer votre voix.
|
279 |
- Cliquez sur **Reconnaître** pour obtenir la prédiction.
|
280 |
""")
|
281 |
|