Spaces:

wesam0099
/

accentt

Sleeping

wesam0099 commited on May 20

Commit

a50c417

verified ·

1 Parent(s): 764d7db

Upload 3 files

Files changed (3) hide show

src/agent.py ADDED Viewed

+# agent.py
+from audio_utils import record_audio, transcribe_audio
+from deep_model import predict_accent
+class AccentAgent:
+    def __init__(self, duration=5):
+        self.duration = duration
+        self.audio_path = None
+        self.transcription = ""
+        self.accent = ""
+    def run(self):
+        print("[Agent] Starting recording...")
+        self.audio_path = record_audio(duration=self.duration)
+        print("[Agent] Audio recorded at:", self.audio_path)
+        print("[Agent] Predicting accent...")
+        self.accent = predict_accent(self.audio_path)
+        print("[Agent] Transcribing audio...")
+        self.transcription = transcribe_audio(self.audio_path)
+        return {
+            "audio_path": self.audio_path,
+            "accent": self.accent,
+            "transcription": self.transcription
+        }

src/audio_utils.py ADDED Viewed

+# audio_utils.py
+from transformers import pipeline
+from pydub import AudioSegment
+import os
+import uuid
+import sounddevice as sd
+from scipy.io.wavfile import write
+import tempfile
+# تحميل نموذج Whisper
+whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
+def convert_to_wav(audio_file):
+    sound = AudioSegment.from_file(audio_file)
+    temp_filename = f"temp_{uuid.uuid4()}.wav"
+    sound.export(temp_filename, format="wav")
+    return temp_filename
+def transcribe_audio(audio_path):
+    if not audio_path.endswith(".wav"):
+        audio_path = convert_to_wav(audio_path)
+    result = whisper_pipeline(audio_path)
+    text = result['text']
+    # يمكن حذف الملف المؤقت بعد النسخ
+    if os.path.exists(audio_path):
+        os.remove(audio_path)
+    return text
+def record_audio(duration=5, fs=16000):
+    """يسجل صوت من المايك لمدة محددة"""
+    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
+    sd.wait()
+    temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    write(temp_wav.name, fs, recording)
+    return temp_wav.name

src/deep_model.py ADDED Viewed

+# deep_model.py
+import torch
+import librosa
+from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
+MODEL_ID = "ylacombe/accent-classifier"
+feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
+model = AutoModelForAudioClassification.from_pretrained(MODEL_ID)
+# لاحظ أن الترتيب يعتمد على ترتيب تصنيفات النموذج نفسه
+label_map = {
+    4: "england",
+    14: "us"
+}
+def predict_accent(audio_path: str) -> str:
+    audio, sr = librosa.load(audio_path, sr=16000)
+    inputs = feature_extractor(audio, sampling_rate=16000, return_tensors="pt")
+    with torch.no_grad():
+        logits = model(**inputs).logits
+        predicted_id = torch.argmax(logits, dim=-1).item()
+    return label_map.get(predicted_id, f"Unknown (ID: {predicted_id})")