Spaces:

broadfield
/

chat-voice

Sleeping

File size: 3,049 Bytes

271f307
 
 
 
0064343
 
 
0477c9e
271f307
 
 
27fc254
8d1c101
adc6fa8
 
8037104
 
 
 
 
f64fa83
271f307
5f8069c
025291b
d96ba17
5baa862
271f307
4bb59b6
271f307
 
5baa862
271f307
 
 
 
 
 
 
 
 
fedb67a
8d1c101
2197af3
8052ea6
8d1c101
 
37f7a38
a243b5f
 
a5a9cdb
c5c3727
a5a9cdb
 
 
 
 
0064343
 
 
 
b024e83
a5a9cdb
 
 
 
5f8069c
c5c3727
6ae48e4
6299791
c5c3727
 
 
 
 
dc53cc3
 
086ac98
6ae48e4
a5a9cdb
8037104
8d1c101
a3079f8
8d1c101
0064343

from huggingface_hub import InferenceClient
import gradio as gr
import random
import prompts
from pypipertts import PyPiper
pp=PyPiper()

#client = InferenceClient("Qwen/QwQ-32B-Preview")
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def format_prompt(message, history):
    prompt=""
    if history:
        prompt = "<s>"
        
        for user_prompt, bot_response in history:
            #print (bot_response)
            prompt += f"[INST] {user_prompt} [/INST]"
            prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(prompt,history):
    if not history:
        history=[]
    seed = random.randint(1,9999999999999)
    print(seed)
    system_prompt = prompts.ASSISTANT
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1.0,
        do_sample=True,
        seed=seed,
    )

    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    buf = ""
    #out = history.append((prompt,""))
    for response in stream:
        output += response.token.text
        yield [(prompt,output)], ""
    yield [(prompt,output)], output
def tts(inp,voice,length,noise,width,sen_pause):
    #print(inp)
    #print(type(inp))
    inp=inp.strip("</s>")
    yield from pp.stream_tts(inp,voice,length,noise,width,sen_pause)
def load_mod(model):
    yield f"Loading: {model}"
    pp.load_mod(model)
    yield f"Voice Loaded: {model}"
    
with gr.Blocks() as iface:
    aud=gr.Audio(streaming=True,autoplay=True)
    #chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    chatbot=gr.Chatbot()
    prompt = gr.Textbox()
    with gr.Group():
        with gr.Row():
            submit_b = gr.Button()
            stop_b = gr.Button("Stop")
            clear = gr.ClearButton([chatbot])
    with gr.Accordion("Voice Controls",open=False):
        msg = gr.HTML("""""")
        names=gr.Dropdown(label="Voice", choices=pp.key_list,value="en_US-joe-medium")
        length=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1)
        noise=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5)
        width=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5)
        sen_pause=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1)
        upd_btn=gr.Button("Update")
    with gr.Row(visible=False):
        stt=gr.Textbox(visible=True)

    iface.load(load_mod,names,msg)
    names.change(load_mod,names,msg)
    sub_b = submit_b.click(generate, [prompt,chatbot],[chatbot,stt])
    #sub_e = prompt.submit(generate, [prompt, chatbot], [chatbot,stt])
    stt.change(pp.stream_tts,[stt,names,length,noise,width,sen_pause],aud)
    stop_b.click(None,None,None, cancels=[sub_b])
iface.queue(default_concurrency_limit=10).launch()