Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import librosa
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
from sklearn.preprocessing import StandardScaler
|
| 5 |
from sklearn.cluster import KMeans
|
| 6 |
from transformers import pipeline
|
|
@@ -31,22 +32,31 @@ def process_audio(audio_path):
|
|
| 31 |
speaker_labels = kmeans.fit_predict(mfccs_scaled)
|
| 32 |
print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
|
| 33 |
|
| 34 |
-
#
|
| 35 |
-
|
| 36 |
segment_duration = len(audio) // len(speaker_labels)
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
transcriptions.append(f"Speaker {speaker_labels[i // segment_duration]}: {transcription['text']}")
|
| 46 |
-
print(f"Segment {i // sr}-{(i + sr * 5) // sr}s transcrit.")
|
| 47 |
|
| 48 |
-
|
| 49 |
-
return "\n".join(transcriptions)
|
| 50 |
|
| 51 |
except Exception as e:
|
| 52 |
print(f"Erreur : {e}")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import librosa
|
| 3 |
import numpy as np
|
| 4 |
+
import soundfile as sf
|
| 5 |
from sklearn.preprocessing import StandardScaler
|
| 6 |
from sklearn.cluster import KMeans
|
| 7 |
from transformers import pipeline
|
|
|
|
| 32 |
speaker_labels = kmeans.fit_predict(mfccs_scaled)
|
| 33 |
print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
|
| 34 |
|
| 35 |
+
# Regrouper les segments audio par speaker
|
| 36 |
+
speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
|
| 37 |
segment_duration = len(audio) // len(speaker_labels)
|
| 38 |
|
| 39 |
+
for i in range(len(speaker_labels)):
|
| 40 |
+
start = i * segment_duration
|
| 41 |
+
end = start + segment_duration
|
| 42 |
+
speaker_id = speaker_labels[i]
|
| 43 |
+
speaker_audio[speaker_id].extend(audio[start:end])
|
| 44 |
+
|
| 45 |
+
# Transcrire les segments fusionnés
|
| 46 |
+
result = []
|
| 47 |
+
for speaker, audio_segment in speaker_audio.items():
|
| 48 |
+
if len(audio_segment) == 0:
|
| 49 |
continue
|
| 50 |
+
|
| 51 |
+
temp_filename = f"temp_speaker_{speaker}.wav"
|
| 52 |
+
sf.write(temp_filename, np.array(audio_segment), sr) # Sauvegarder le segment
|
| 53 |
+
|
| 54 |
+
transcription = stt_pipeline(temp_filename) # Transcrire
|
| 55 |
+
result.append(f"Speaker {speaker}: {transcription['text']}")
|
| 56 |
|
| 57 |
+
print(f"Transcription Speaker {speaker} terminée.")
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
return "\n".join(result)
|
|
|
|
| 60 |
|
| 61 |
except Exception as e:
|
| 62 |
print(f"Erreur : {e}")
|