|
import spaces |
|
import gradio as gr |
|
import os |
|
from whisperspeech.pipeline import Pipeline |
|
import torch |
|
import torch.nn.functional as F |
|
from whisperspeech.languages import LANGUAGES |
|
from whisperspeech.pipeline import Pipeline |
|
import tempfil |
|
|
|
title = """#🙋🏻♂️ Welcome to🌟Tonic's🌬️💬📝WhisperSpeech |
|
You can use this ZeroGPU Space to test out the current model [🌬️💬📝collabora/whisperspeech](https://huggingface.co/collabora/whisperspeech). 🌬️💬📝collabora/whisperspeech is An Open Source text-to-speech system built by inverting Whisper. Previously known as spear-tts-pytorch. It's like Stable Diffusion but for speech – both powerful and easily customizable. |
|
You can also use 🌬️💬📝WhisperSpeech by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/laion-whisper?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> |
|
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻 [](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [Poly](https://github.com/tonic-ai/poly) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗 |
|
""" |
|
|
|
@spaces.GPU |
|
|
|
def whisper_speech_demo(text, lang, speaker_audio=None, mix_lang=None, mix_text=None): |
|
pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model') |
|
|
|
|
|
speaker_url = None |
|
if speaker_audio is not None: |
|
speaker_url = speaker_audio.name |
|
|
|
if mix_lang and mix_text: |
|
mixed_langs = lang.split(',') + mix_lang.split(',') |
|
mixed_texts = [text] + mix_text.split(',') |
|
stoks = pipe.t2s.generate(mixed_texts, lang=mixed_langs) |
|
audio_data = pipe.generate(stoks, speaker_url, lang=mixed_langs[0]) |
|
else: |
|
audio_data = pipe.generate(text, speaker_url, lang) |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: |
|
tmp_file_name = tmp_file.name |
|
with open(tmp_file_name, 'wb') as file: |
|
file.write(audio_data) |
|
|
|
return tmp_file_name |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(title) |
|
with gr.Row(): |
|
text_input = gr.Textbox(label="Enter text") |
|
lang_input = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language") |
|
speaker_input = gr.File(label="Upload Speaker Audio (optional)", type="file", accepts=["audio/*"]) |
|
with gr.Row(): |
|
mix_lang_input = gr.Textbox(label="Mixed Languages (optional, comma-separated)", placeholder="e.g., en,pl") |
|
mix_text_input = gr.Textbox(label="Mixed Texts (optional, for mixed languages)", placeholder="e.g., Hello, Cześć") |
|
with gr.Row(): |
|
submit_button = gr.Button("Generate Speech") |
|
output_audio = gr.Audio(label="Generated Speech") |
|
|
|
submit_button.click( |
|
whisper_speech_demo, |
|
inputs=[text_input, lang_input, speaker_input, mix_lang_input, mix_text_input], |
|
outputs=output_audio |
|
) |
|
|
|
demo.launch() |