Spaces:

wasmdashai
/

wasm-speeker-sa

Sleeping

File size: 4,230 Bytes

273cd6b
 
 
 
 
712a04d
6cc4283
479acdd
 
233814f
e6bbc72
 
 
 
 
 
 
2a84d5f
 
 
e6bbc72
 
 
 
 
 
 
 
 
 
 
 
 
937bd52
a740b3f
 
 
2116406
a740b3f
 
 
 
 
 
76d6781
712a04d
e897a8d
712a04d
479acdd
712a04d
273cd6b
6cc4283
f1db6b0
6cc4283
 
 
6882990
6cc4283
edc1919
27b9232
f1db6b0
 
6bdcbf1
9ab3574
 
 
 
 
 
31b6072
 
 
 
9ab3574
 
 
 
 
 
 
 
 
 
 
 
 
 
6bdcbf1
31b6072
6bdcbf1
9ab3574
 
 
 
76d6781
e6bbc72
9ab3574
e6bbc72
9ab3574
 
7cfa1ec
937bd52
9ab3574
 
 
 
 
6882990
9ab3574
31b6072
 
9ab3574
 
 
 
368a410
8fc7059
9ab3574

import os

import numpy as np

import gradio as gr
import requests
from genai_chat_ai import AI,create_chat_session
api_key = os.environ.get("Id_mode_vits") 
headers = {"Authorization": f"Bearer {api_key}"}

from transformers import AutoTokenizer,VitsModel
import torch
models= {}
tokenizer = AutoTokenizer.from_pretrained("asg2024/vits-ar-sa-huba",token=api_key)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def  get_model(name_model):
    global models
    if name_model in   models:
        return models[name_model]
    models[name_model]=VitsModel.from_pretrained(name_model,token=api_key).to(device)
    return models[name_model]

def  genrate_speech(text,name_model):
    inputs=tokenizer(text,return_tensors="pt")
    model=get_model(name_model) 
    with torch.no_grad():
         wav=model(
             input_ids= input_ids.input_ids.to(device),
             attention_mask=input_ids.attention_mask.to(device),
             speaker_id=0
             ).waveform.cpu().numpy().reshape(-1)
    return model.config.sampling_rate,wav
     

def remove_extra_spaces(text):
  """
  Removes extra spaces between words in a string.

  Args:
    text: The string to process.

  Returns:
    The string with extra spaces removed.
  """
  return '  '.join(text.split())

def query(text,API_URL):
  payload={"inputs": text}
  response = requests.post(API_URL, headers=headers, json=payload)
  return response.content

def   get_answer_ai(text):
      global AI
      try:
          response = AI.send_message(text)
          return response.text

          
      except :
          AI=create_chat_session()
          response = AI.send_message(text)
          return response.text

with gr.Blocks() as demo:  # Use gr.Blocks to wrap the entire interface
    with gr.Tab("محادثة صوتية بالذكاء الاصطناعي باللهجة السعودية"):
        with gr.Row(): # Arrange input/output components side-by-side
            with gr.Column():
                text_input = gr.Textbox(label="أدخل أي نص")
                user_audio = gr.Audio(label="صوتك")
                with gr.Row():
                    btn = gr.Button("إرسال")
                    btn_ai_only = gr.Button("توليد رد الذكاء الاصطناعي فقط")
                    
            with gr.Column():
                model_choices = gr.Dropdown(
                    choices=[
                        "asg2024/vits-ar-sa",
                        "asg2024/vits-ar-sa-huba",
                        "asg2024/vits-ar-sa-ms",
                        "asg2024/vits-ar-sa-magd",
                        "asg2024/vits-ar-sa-fahd",
                    ],
                    label="اختر النموذج",
                    value="asg2024/vits-ar-sa",
                )
                ai_audio = gr.Audio(label="رد الذكاء الاصطناعي الصوتي")
                ai_text = gr.Textbox(label="رد الذكاء الاصطناعي النصي")

       

        # Use a single button to trigger both functionalities
        def process_audio(text, model_choice, generate_user_audio=True):
            API_URL = f"https://api-inference.huggingface.co/models/{model_choice}"
            text_answer = get_answer_ai(text)
            text_answer = remove_extra_spaces(text_answer)
            data_ai = genrate_speech(text_answer,model_choice)#query(text_answer, API_URL)
            if generate_user_audio:  # Generate user audio if needed
                data_user =genrate_speech(text_answer,model_choice)# query(text, API_URL)
                return data_user, data_ai, text_answer
            else:
                return  data_ai  # Return None for user_audio

        btn.click(
            process_audio,  # Call the combined function
            inputs=[text_input, model_choices],
            outputs=[user_audio, ai_audio, ai_text],
        )

        # Additional button to generate only AI audio
      
        btn_ai_only.click(
                lambda text, model_choice: process_audio(text, model_choice, False),
                inputs=[text_input, model_choices],
                outputs=[ai_audio],
            )
 
if __name__ == "__main__":
    demo.launch()