Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -128,10 +128,8 @@ def recognize_speech(audio_path):
|
|
128 |
return "Speech recognition model not available"
|
129 |
|
130 |
try:
|
131 |
-
# Read audio file
|
132 |
audio_data, sr = sf.read(audio_path)
|
133 |
|
134 |
-
# Resample to 16kHz if needed
|
135 |
if sr != 16000:
|
136 |
audio_data = np.interp(
|
137 |
np.linspace(0, len(audio_data), int(16000 * len(audio_data) / sr)),
|
@@ -140,26 +138,26 @@ def recognize_speech(audio_path):
|
|
140 |
)
|
141 |
sr = 16000
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
146 |
|
147 |
-
# Generate transcription with specific parameters to prevent repetition
|
148 |
generated_ids = speech_recognizer.generate(
|
149 |
-
|
150 |
-
max_length=100,
|
151 |
-
num_beams=
|
152 |
-
|
|
|
153 |
)
|
154 |
|
155 |
-
# Decode with skip special tokens
|
156 |
transcription = speech_processor.batch_decode(
|
157 |
generated_ids,
|
158 |
-
skip_special_tokens=True
|
159 |
-
clean_up_tokenization_spaces=True
|
160 |
)[0]
|
161 |
|
162 |
-
return transcription.strip()
|
163 |
|
164 |
except Exception as e:
|
165 |
return f"Speech recognition error: {str(e)}"
|
|
|
128 |
return "Speech recognition model not available"
|
129 |
|
130 |
try:
|
|
|
131 |
audio_data, sr = sf.read(audio_path)
|
132 |
|
|
|
133 |
if sr != 16000:
|
134 |
audio_data = np.interp(
|
135 |
np.linspace(0, len(audio_data), int(16000 * len(audio_data) / sr)),
|
|
|
138 |
)
|
139 |
sr = 16000
|
140 |
|
141 |
+
inputs = speech_processor(
|
142 |
+
audio_data,
|
143 |
+
sampling_rate=sr,
|
144 |
+
return_tensors="pt"
|
145 |
+
).to(device)
|
146 |
|
|
|
147 |
generated_ids = speech_recognizer.generate(
|
148 |
+
input_features=inputs["input_features"],
|
149 |
+
max_length=100,
|
150 |
+
num_beams=5, # Changed from 1 to 5 for better results
|
151 |
+
early_stopping=True,
|
152 |
+
no_repeat_ngram_size=2
|
153 |
)
|
154 |
|
|
|
155 |
transcription = speech_processor.batch_decode(
|
156 |
generated_ids,
|
157 |
+
skip_special_tokens=True
|
|
|
158 |
)[0]
|
159 |
|
160 |
+
return transcription.strip()
|
161 |
|
162 |
except Exception as e:
|
163 |
return f"Speech recognition error: {str(e)}"
|