Spaces:
Running
Running
File size: 3,983 Bytes
7621729 9325d10 010c9cb 9325d10 7621729 010c9cb 7621729 010c9cb 7621729 010c9cb 7621729 010c9cb 7621729 010c9cb 9325d10 010c9cb 7621729 010c9cb 7621729 010c9cb 9325d10 010c9cb 7621729 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
import logging
from telegram import Update
from telegram.ext import Application, MessageHandler, filters
from transformers import pipeline, AutoTokenizer, VitsModel
import librosa
import soundfile as sf # <-- المكتبة البديلة
from pydub import AudioSegment
import numpy as np
import torch
# تهيئة النظام
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO
)
logger = logging.getLogger(__name__)
# تهيئة النماذج
asr_pipeline = pipeline(
"automatic-speech-recognition",
model="facebook/wav2vec2-large-xlsr-53-arabic"
)
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-ara")
tts_model = VitsModel.from_pretrained("facebook/mms-tts-ara")
def enhance_audio(input_path, output_path):
"""تحسين جودة الصوت باستخدام تأثيرات متقدمة"""
try:
audio = AudioSegment.from_wav(input_path)
audio = audio.low_pass_filter(3000)
audio = audio.high_pass_filter(100)
audio = audio.normalize()
audio = audio.fade_in(150).fade_out(150)
audio.export(output_path, format="wav")
return True
except Exception as e:
logger.error(f"فشل تحسين الصوت: {str(e)}")
return False
async def speech_to_text(audio_path):
try:
audio, sr = librosa.load(audio_path, sr=16000)
sf.write("temp.wav", audio, sr) # <-- استبدال torchaudio بـ soundfile
result = asr_pipeline("temp.wav")
return result["text"]
except Exception as e:
logger.error(f"فشل التعرف على الصوت: {str(e)}")
return ""
async def generate_response(text):
try:
chatbot = pipeline(
"text-generation",
model="aubmindlab/aragpt2-base"
)
response = chatbot(
text,
max_length=100,
num_return_sequences=1,
pad_token_id=50256
)
return response[0]['generated_text']
except Exception as e:
logger.error(f"فشل توليد الرد: {str(e)}")
return "عذرًا، لم أفهم ما تقصد."
async def text_to_speech(text):
try:
inputs = tts_tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = tts_model(**inputs)
waveform = output.waveform[0].numpy()
# ===== التغيير الرئيسي هنا =====
sf.write("bot_response.wav", waveform, tts_model.config.sampling_rate) # <-- استبدال torchaudio
except Exception as e:
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}")
async def process_voice(update: Update, context):
try:
user = update.message.from_user
logger.info(f"رسالة صوتية من {user.first_name}")
# تحميل الملف الصوتي
voice_file = await update.message.voice.get_file()
await voice_file.download_to_drive("user_voice.ogg")
# معالجة الصوت
user_text = await speech_to_text("user_voice.ogg")
bot_response = await generate_response(user_text)
await text_to_speech(bot_response)
# تحسين الجودة وإرسال الرد
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"):
await update.message.reply_voice("bot_response_enhanced.wav")
else:
await update.message.reply_voice("bot_response.wav")
except Exception as e:
logger.error(f"خطأ رئيسي: {str(e)}")
await update.message.reply_text("⚠️ حدث خطأ غير متوقع، الرجاء المحاولة لاحقًا.")
if __name__ == "__main__":
TOKEN = os.getenv("TELEGRAM_TOKEN")
application = Application.builder().token(TOKEN).build()
application.add_handler(MessageHandler(filters.VOICE, process_voice))
application.run_polling() |