File size: 2,362 Bytes
03ce9f7
 
19ae03c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03ce9f7
6ede7a5
19ae03c
 
 
 
 
6ede7a5
 
19ae03c
 
 
 
 
 
e96206b
19ae03c
 
 
 
e96206b
6ede7a5
03ce9f7
 
 
19ae03c
 
 
 
 
 
 
 
 
03ce9f7
 
 
19ae03c
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import random
import gradio as gr
from transformers import pipeline
import tempfile
import torch
from os.path import exists
import requests
from TTS.utils.synthesizer import Synthesizer
import gradio as gr

def download(url, file_name):
    if not exists(file_name):
        print(f"Downloading {file_name}")
        r = requests.get(url, allow_redirects=True)
        with open(file_name, 'wb') as file:
            file.write(r.content)
    else:
        print(f"Found {file_name}. Skipping download...")


print("downloading uk/mykyta/vits-tts")
release_number = "v2.0.0-beta"
model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model-inference.pth"
config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"

model_path = "model.pth"
config_path = "config.json"

download(model_link, model_path)
download(config_link, config_path)

p = pipeline("automatic-speech-recognition", "Yehor/wav2vec2-xls-r-300m-uk-with-small-lm")

synthesizer = Synthesizer(
    model_path, config_path, None, None, None,
)

badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.ukrainian-ai"

def transcribe(audio):
    text = p(audio)["text"]
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        with torch.no_grad():
            wavs = synthesizer.tts(text)
            synthesizer.save_wav(wavs, fp)
        return text, fp.name

gr.Interface(
    fn=transcribe, 
    inputs=gr.inputs.Audio(source="microphone", type="filepath"), 
    outputs=[gr.outputs.Textbox(label="Recognized text"),gr.outputs.Audio(label="Output")],
    article=f"<center><img src=\"{badge}\" alt=\"visitors badge\"/></center>",).launch()

def chat(message, history):
    history = history or []
    #if message.startswith("How many"):
    #    response = random.randint(1, 10)
    #elif message.startswith("How"):
    #    response = random.choice(["Great", "Good", "Okay", "Bad"])
    #elif message.startswith("Where"):
    #    response = random.choice(["Here", "There", "Somewhere"])
    #else:
    #    response = "I don't know"
    #history.append((message, response))
    return history, history


#iface = gr.Interface(
#    chat,
#    ["audio", "state"],
#    ["chatbot", "state"],
#    allow_screenshot=False,
#    allow_flagging="never",
#)
#iface.launch()