File size: 2,257 Bytes
f7fd7ba
 
 
c259b68
0c4e5eb
221682e
f7906ff
 
 
ff3f6c6
5d6531a
ff3f6c6
 
 
 
 
 
 
 
 
 
 
 
 
f7906ff
f7fd7ba
 
f7906ff
5d6531a
ff3f6c6
f7906ff
 
b3add08
ff3f6c6
f7906ff
b3add08
f7906ff
16ffd31
f7906ff
ff3f6c6
 
88facee
2872c21
ff3f6c6
 
 
 
c299287
2872c21
3646132
c8d27aa
 
3646132
ff3f6c6
 
c299287
ff3f6c6
 
59cd719
f7fd7ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer,VitsModel
import os
import numpy as np  

token=os.environ.get("key_")
tokenizer = AutoTokenizer.from_pretrained("wasmdashai/vtk",token=token)
models= {}
@spaces.GPU
def  get_model(name_model):
    global models
    if name_model in   models:
        return models[name_model]
    models[name_model]=VitsModel.from_pretrained(name_model,token=token).cuda()
    models[name_model].decoder.apply_weight_norm()
    # torch.nn.utils.weight_norm(self.decoder.conv_pre)
    # torch.nn.utils.weight_norm(self.decoder.conv_post)
    for flow in models[name_model].flow.flows:
        torch.nn.utils.weight_norm(flow.conv_pre)
        torch.nn.utils.weight_norm(flow.conv_post)
    return models[name_model]


zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' 🤔
import torch
@spaces.GPU
def   modelspeech(text,name_model):
     
    
     inputs = tokenizer(text, return_tensors="pt")
     model=get_model(name_model)
     with torch.no_grad():
          wav = model(input_ids=inputs["input_ids"].cuda()).waveform.cpu().numpy().reshape(-1)#.detach()
          
     return  model.config.sampling_rate,wav#remove_noise_nr(wav)

model_choices = gr.Dropdown(
                            choices=[
                                "wasmdashai/vits-ar",
                                "wasmdashai/vits-ar-v1",
                                "wasmdashai/vits-ar-sa-huba",
                                "wasmdashai/vits-ar-sa-ms",
                                "wasmdashai/vits-ar-sa-magd",
                                "wasmdashai/vtk",
                                "wasmdashai/mak",
                                "wasmdashai/vits-ar-sa-huba-v1",
                                 "wasmdashai/vits-ar-sa-huba-v2",
                                 "wasmdashai/vits-ar-z1",
                                 "wasmdashai/vits-ar-sa-A"
                            
                            ],
                            label="اختر النموذج",
                            value="wasmdashai/vits-ar-sa-huba-v1",
                        )
demo = gr.Interface(fn=modelspeech, inputs=["text",model_choices], outputs=["audio"])
demo.queue()
demo.launch()