Spaces:
Running
Running
import os | |
import logging | |
from telegram import Update | |
from telegram.ext import Application, MessageHandler, filters | |
from transformers import pipeline, AutoTokenizer, VitsModel | |
import torchaudio | |
import librosa | |
import soundfile as sf | |
from pydub import AudioSegment | |
import numpy as np | |
# تهيئة النظام | |
logging.basicConfig( | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
level=logging.INFO | |
) | |
logger = logging.getLogger(__name__) | |
# تهيئة النماذج | |
asr_pipeline = pipeline( | |
"automatic-speech-recognition", | |
model="facebook/wav2vec2-large-xlsr-53-arabic" | |
) | |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-ara") | |
tts_model = VitsModel.from_pretrained("facebook/mms-tts-ara") | |
def enhance_audio(input_path, output_path): | |
"""تحسين جودة الصوت باستخدام تأثيرات متقدمة""" | |
try: | |
audio = AudioSegment.from_wav(input_path) | |
audio = audio.low_pass_filter(3000) | |
audio = audio.high_pass_filter(100) | |
audio = audio.normalize() | |
audio = audio.fade_in(150).fade_out(150) | |
audio.export(output_path, format="wav") | |
return True | |
except Exception as e: | |
logger.error(f"فشل تحسين الصوت: {str(e)}") | |
return False | |
async def speech_to_text(audio_path): | |
try: | |
audio, sr = librosa.load(audio_path, sr=16000) | |
sf.write("temp.wav", audio, sr) | |
result = asr_pipeline("temp.wav") | |
return result["text"] | |
except Exception as e: | |
logger.error(f"فشل التعرف على الصوت: {str(e)}") | |
return "" | |
async def generate_response(text): | |
try: | |
chatbot = pipeline( | |
"text-generation", | |
model="aubmindlab/aragpt2-base" | |
) | |
response = chatbot( | |
text, | |
max_length=100, | |
num_return_sequences=1, | |
pad_token_id=50256 | |
) | |
return response[0]['generated_text'] | |
except Exception as e: | |
logger.error(f"فشل توليد الرد: {str(e)}") | |
return "عذرًا، لم أفهم ما تقصد." | |
async def text_to_speech(text): | |
try: | |
inputs = tts_tokenizer(text, return_tensors="pt") | |
with torch.no_grad(): | |
output = tts_model(**inputs) | |
waveform = output.waveform[0].numpy() | |
torchaudio.save("bot_response.wav", torch.Tensor(waveform), tts_model.config.sampling_rate) | |
except Exception as e: | |
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}") | |
async def process_voice(update: Update, context): | |
try: | |
user = update.message.from_user | |
logger.info(f"رسالة صوتية من {user.first_name}") | |
# تحميل الملف الصوتي | |
voice_file = await update.message.voice.get_file() | |
await voice_file.download_to_drive("user_voice.ogg") | |
# معالجة الصوت | |
user_text = await speech_to_text("user_voice.ogg") | |
bot_response = await generate_response(user_text) | |
await text_to_speech(bot_response) | |
# تحسين الجودة وإرسال الرد | |
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"): | |
await update.message.reply_voice("bot_response_enhanced.wav") | |
else: | |
await update.message.reply_voice("bot_response.wav") | |
except Exception as e: | |
logger.error(f"خطأ رئيسي: {str(e)}") | |
await update.message.reply_text("⚠️ حدث خطأ غير متوقع، الرجاء المحاولة لاحقًا.") | |
if __name__ == "__main__": | |
TOKEN = os.getenv("TELEGRAM_TOKEN") | |
application = Application.builder().token(TOKEN).build() | |
application.add_handler(MessageHandler(filters.VOICE, process_voice)) | |
application.run_polling() |