Spaces:

awacke1
/

VoiceChatMistral

Runtime error

App Files Files Community

awacke1 commited on Oct 11, 2023

Commit

558b853

1 Parent(s): 0325023

Create app.py

Browse files

Files changed (1) hide show

app.py +212 -0

app.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import gradio as gr
+import numpy as np
+import torch
+import nltk
+import os
+import uuid
+from __future__ import annotations
+os.environ["COQUI_TOS_AGREED"] = "1"
+nltk.download('punkt')
+from TTS.api import TTS
+from huggingface_hub import HfApi
+HF_TOKEN = os.environ.get("HF_TOKEN")
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
+title = "Voice Chat Mistral"
+DESCRIPTION = title
+css = """.toast-wrap { display: none !important } """
+api = HfApi(token=HF_TOKEN)
+repo_id = "ylacombe/voice-chat-with-lama"
+system_message = "\nYou are a helpful assistant."
+temperature = 0.9
+top_p = 0.6
+repetition_penalty = 1.2
+import gradio as gr
+import os
+import time
+import gradio as gr
+from transformers import pipeline
+import numpy as np
+from gradio_client import Client
+from huggingface_hub import InferenceClient
+whisper_client = Client("https://sanchit-gandhi-whisper-large-v2.hf.space/")
+text_client = InferenceClient(
+    "mistralai/Mistral-7B-Instruct-v0.1"
+)
+def format_prompt(message, history):
+  prompt = "<s>"
+  for user_prompt, bot_response in history:
+    prompt += f"[INST] {user_prompt} [/INST]"
+    prompt += f" {bot_response}</s> "
+  prompt += f"[INST] {message} [/INST]"
+  return prompt
+def generate(
+    prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
+):
+    temperature = float(temperature)
+    if temperature < 1e-2:
+        temperature = 1e-2
+    top_p = float(top_p)
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_new_tokens=max_new_tokens,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        do_sample=True,
+        seed=42,
+    )
+    formatted_prompt = format_prompt(prompt, history)
+    stream = text_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""
+    for response in stream:
+        output += response.token.text
+        yield output
+    return output
+def transcribe(wav_path):
+    return whisper_client.predict(
+				wav_path,	# str (filepath or URL to file) in 'inputs' Audio component
+				"transcribe",	# str in 'Task' Radio component
+				api_name="/predict"
+)
+# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.
+def add_text(history, text):
+    history = [] if history is None else history
+    history = history + [(text, None)]
+    return history, gr.update(value="", interactive=False)
+def add_file(history, file):
+    history = [] if history is None else history
+    text = transcribe(
+        file
+    )
+    history = history + [(text, None)]
+    return history
+def bot(history, system_prompt=""):
+    history = [] if history is None else history
+    if system_prompt == "":
+        system_prompt = system_message
+    history[-1][1] = ""
+    for character in generate(history[-1][0], history[:-1]):
+        history[-1][1] = character
+        yield history
+def generate_speech(history):
+    text_to_generate = history[-1][1]
+    text_to_generate = text_to_generate.replace("\n", " ").strip()
+    text_to_generate = nltk.sent_tokenize(text_to_generate)
+    filename = f"{uuid.uuid4()}.wav"
+    sampling_rate = tts.synthesizer.tts_config.audio["sample_rate"]
+    silence = [0] * int(0.25 * sampling_rate)
+    for sentence in text_to_generate:
+        try:
+            # generate speech by cloning a voice using default settings
+            wav = tts.tts(text=sentence,
+                        speaker_wav="examples/female.wav",
+                        decoder_iterations=25,
+                        decoder_sampler="dpm++2m",
+                        speed=1.2,
+                        language="en")
+            yield (sampling_rate, np.array(wav)) #np.array(wav + silence))
+        except RuntimeError as e :
+            if "device-side assert" in str(e):
+                # cannot do anything on cuda device side error, need tor estart
+                print(f"Exit due to: Unrecoverable exception caused by prompt:{sentence}", flush=True)
+                gr.Warning("Unhandled Exception encounter, please retry in a minute")
+                print("Cuda device-assert Runtime encountered need restart")
+                # HF Space specific.. This error is unrecoverable need to restart space
+                api.restart_space(repo_id=repo_id)
+            else:
+                print("RuntimeError: non device-side assert error:", str(e))
+                raise e
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(DESCRIPTION)
+    chatbot = gr.Chatbot(
+        [],
+        elem_id="chatbot",
+        avatar_images=('examples/lama.jpeg', 'examples/lama2.jpeg'),
+        bubble_full_width=False,
+    )
+    with gr.Row():
+        txt = gr.Textbox(
+            scale=3,
+            show_label=False,
+            placeholder="Enter text and press enter, or speak to your microphone",
+            container=False,
+        )
+        txt_btn = gr.Button(value="Submit text",scale=1)
+        btn = gr.Audio(source="microphone", type="filepath", scale=4)
+    with gr.Row():
+        audio = gr.Audio(type="numpy", streaming=True, autoplay=True, label="Generated audio response", show_label=True)
+    clear_btn = gr.ClearButton([chatbot, audio])
+    txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+        bot, chatbot, chatbot
+    ).then(generate_speech, chatbot, audio)
+    txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
+    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+        bot, chatbot, chatbot
+    ).then(generate_speech, chatbot, audio)
+    txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
+    file_msg = btn.stop_recording(add_file, [chatbot, btn], [chatbot], queue=False).then(
+        bot, chatbot, chatbot
+    ).then(generate_speech, chatbot, audio)
+    gr.Markdown("""
+This Space demonstrates how to speak to a chatbot, based solely on open-source models.
+It relies on 3 models:
+1. [Whisper-large-v2](https://huggingface.co/spaces/sanchit-gandhi/whisper-large-v2) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
+2. [Mistral-7b-instruct](https://huggingface.co/spaces/osanseviero/mistral-super-fast) as the chat model, the actual chat model. It is called from [huggingface_hub](https://huggingface.co/docs/huggingface_hub/guides/inference).
+3. [Coqui's XTTS](https://huggingface.co/spaces/coqui/xtts) as a TTS model, to generate the chatbot answers. This time, the model is hosted locally.
+Note:
+- By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml""")
+demo.queue()
+demo.launch(debug=True)