Spaces:

wasmdashai
/

LAHJA-AI

Running on Zero

File size: 1,840 Bytes

f7fd7ba
 
 
c259b68
0c4e5eb
f7906ff
 
 
 
ff3f6c6
5d6531a
ff3f6c6
 
 
 
 
 
 
 
 
 
 
 
 
f7906ff
f7fd7ba
 
f7906ff
5d6531a
ff3f6c6
f7906ff
 
b3add08
ff3f6c6
f7906ff
b3add08
f7906ff
 
 
ff3f6c6
 
88facee
ff3f6c6
 
 
 
 
 
 
 
 
59cd719
f7fd7ba

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer,VitsModel
import os
  

token=os.environ.get("key_")
tokenizer = AutoTokenizer.from_pretrained("wasmdashai/vtk",token=token)
models= {}
@spaces.GPU
def  get_model(name_model):
    global models
    if name_model in   models:
        return models[name_model]
    models[name_model]=VitsModel.from_pretrained(name_model,token=token).cuda()
    models[name_model].decoder.apply_weight_norm()
    # torch.nn.utils.weight_norm(self.decoder.conv_pre)
    # torch.nn.utils.weight_norm(self.decoder.conv_post)
    for flow in models[name_model].flow.flows:
        torch.nn.utils.weight_norm(flow.conv_pre)
        torch.nn.utils.weight_norm(flow.conv_post)
    return models[name_model]


zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' 🤔
import torch
@spaces.GPU
def   modelspeech(text,name_model):
     
    
     inputs = tokenizer(text, return_tensors="pt")
     model=get_model(name_model)
     with torch.no_grad():
          wav = model(input_ids=inputs["input_ids"].cuda()).waveform.cpu().numpy().reshape(-1)#.detach()
          
     return  model.config.sampling_rate,wav#remove_noise_nr(wav)

model_choices = gr.Dropdown(
                            choices=[
                                "wasmdashai/vits-ar",
                                "wasmdashai/vits-ar-sa-huba",
                                "wasmdashai/vits-ar-sa-ms",
                                "wasmdashai/vits-ar-sa-magd",
                                "wasmdashai/vtk",
                            ],
                            label="اختر النموذج",
                            value="wasmdashai/vtk",
                        )
demo = gr.Interface(fn=modelspeech, inputs=["text",model_choices], outputs=["audio"])
demo.queue()
demo.launch()