Spaces:
Runtime error
Runtime error
Commit
·
bd8dcd1
1
Parent(s):
64f96e7
feat: use docker space
Browse files- Dockerfile +28 -0
- README.md +1 -3
- app.py +17 -227
- build.py +17 -0
- requirements.txt +1 -1
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11
|
| 2 |
+
|
| 3 |
+
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 4 |
+
ENV COQUI_TOS_AGREED=1
|
| 5 |
+
|
| 6 |
+
# Set up a new user named "user" with user ID 1000
|
| 7 |
+
RUN useradd -m -u 1000 user
|
| 8 |
+
|
| 9 |
+
# Switch to the "user" user
|
| 10 |
+
USER user
|
| 11 |
+
|
| 12 |
+
# Set home to the user's home directory
|
| 13 |
+
ENV HOME=/home/user \
|
| 14 |
+
PATH=/home/user/.local/bin:$PATH
|
| 15 |
+
|
| 16 |
+
# Set the working directory to the user's home directory
|
| 17 |
+
WORKDIR $HOME/app
|
| 18 |
+
|
| 19 |
+
# Install dependencies
|
| 20 |
+
COPY --chown=user:user requirements.txt .
|
| 21 |
+
RUN pip install -r requirements.txt
|
| 22 |
+
RUN python -m unidic download
|
| 23 |
+
|
| 24 |
+
# Install model weights
|
| 25 |
+
COPY --chown=user:user . .
|
| 26 |
+
RUN python build.py
|
| 27 |
+
|
| 28 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -3,10 +3,8 @@ title: XTTS
|
|
| 3 |
emoji: 🐸
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: red
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 3.48.0
|
| 8 |
-
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
models:
|
| 11 |
- coqui/XTTS-v2
|
| 12 |
---
|
|
|
|
| 3 |
emoji: 🐸
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: red
|
|
|
|
|
|
|
|
|
|
| 6 |
pinned: false
|
| 7 |
+
sdk: docker
|
| 8 |
models:
|
| 9 |
- coqui/XTTS-v2
|
| 10 |
---
|
app.py
CHANGED
|
@@ -1,38 +1,26 @@
|
|
| 1 |
-
import
|
| 2 |
-
import io, os, stat
|
| 3 |
import subprocess
|
| 4 |
-
import random
|
| 5 |
-
from zipfile import ZipFile
|
| 6 |
import uuid
|
| 7 |
import time
|
| 8 |
import torch
|
| 9 |
import torchaudio
|
| 10 |
|
| 11 |
-
|
| 12 |
-
#download for mecab
|
| 13 |
-
os.system('python -m unidic download')
|
| 14 |
-
|
| 15 |
-
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 16 |
-
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 17 |
-
|
| 18 |
# langid is used to detect language for longer text
|
| 19 |
# Most users expect text to be their own language, there is checkbox to disable it
|
| 20 |
import langid
|
| 21 |
-
import base64
|
| 22 |
import csv
|
| 23 |
from io import StringIO
|
| 24 |
import datetime
|
| 25 |
import re
|
| 26 |
|
| 27 |
import gradio as gr
|
| 28 |
-
from scipy.io.wavfile import write
|
| 29 |
-
from pydub import AudioSegment
|
| 30 |
|
| 31 |
-
from TTS.api import TTS
|
| 32 |
from TTS.tts.configs.xtts_config import XttsConfig
|
| 33 |
from TTS.tts.models.xtts import Xtts
|
| 34 |
from TTS.utils.generic_utils import get_user_data_dir
|
| 35 |
|
|
|
|
|
|
|
| 36 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 37 |
|
| 38 |
from huggingface_hub import HfApi
|
|
@@ -41,21 +29,10 @@ from huggingface_hub import HfApi
|
|
| 41 |
api = HfApi(token=HF_TOKEN)
|
| 42 |
repo_id = "coqui/xtts"
|
| 43 |
|
| 44 |
-
|
| 45 |
-
print("Export newer ffmpeg binary for denoise filter")
|
| 46 |
-
ZipFile("ffmpeg.zip").extractall()
|
| 47 |
-
print("Make ffmpeg binary executable")
|
| 48 |
-
st = os.stat("ffmpeg")
|
| 49 |
-
os.chmod("ffmpeg", st.st_mode | stat.S_IEXEC)
|
| 50 |
-
|
| 51 |
-
# This will trigger downloading model
|
| 52 |
-
print("Downloading if not downloaded Coqui XTTS V2")
|
| 53 |
-
from TTS.utils.manage import ModelManager
|
| 54 |
|
| 55 |
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 56 |
-
ModelManager().download_model(model_name)
|
| 57 |
model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
|
| 58 |
-
print("XTTS downloaded")
|
| 59 |
|
| 60 |
config = XttsConfig()
|
| 61 |
config.load_json(os.path.join(model_path, "config.json"))
|
|
@@ -66,9 +43,15 @@ model.load_checkpoint(
|
|
| 66 |
checkpoint_path=os.path.join(model_path, "model.pth"),
|
| 67 |
vocab_path=os.path.join(model_path, "vocab.json"),
|
| 68 |
eval=True,
|
| 69 |
-
use_deepspeed=
|
| 70 |
)
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# This is for debugging purposes only
|
| 74 |
DEVICE_ASSERT_DETECTED = 0
|
|
@@ -81,8 +64,6 @@ def predict(
|
|
| 81 |
prompt,
|
| 82 |
language,
|
| 83 |
audio_file_pth,
|
| 84 |
-
mic_file_path,
|
| 85 |
-
use_mic,
|
| 86 |
voice_cleanup,
|
| 87 |
no_lang_auto_detect,
|
| 88 |
agree,
|
|
@@ -130,22 +111,7 @@ def predict(
|
|
| 130 |
None,
|
| 131 |
)
|
| 132 |
|
| 133 |
-
|
| 134 |
-
if mic_file_path is not None:
|
| 135 |
-
speaker_wav = mic_file_path
|
| 136 |
-
else:
|
| 137 |
-
gr.Warning(
|
| 138 |
-
"Please record your voice with Microphone, or uncheck Use Microphone to use reference audios"
|
| 139 |
-
)
|
| 140 |
-
return (
|
| 141 |
-
None,
|
| 142 |
-
None,
|
| 143 |
-
None,
|
| 144 |
-
None,
|
| 145 |
-
)
|
| 146 |
-
|
| 147 |
-
else:
|
| 148 |
-
speaker_wav = audio_file_pth
|
| 149 |
|
| 150 |
# Filtering for microphone input, as it has BG noise, maybe silence in beginning and end
|
| 151 |
# This is fast filtering not perfect
|
|
@@ -328,8 +294,6 @@ def predict(
|
|
| 328 |
prompt,
|
| 329 |
language,
|
| 330 |
audio_file_pth,
|
| 331 |
-
mic_file_path,
|
| 332 |
-
use_mic,
|
| 333 |
voice_cleanup,
|
| 334 |
no_lang_auto_detect,
|
| 335 |
agree,
|
|
@@ -450,160 +414,6 @@ article = """
|
|
| 450 |
<p>We collect data only for error cases for improvement.</p>
|
| 451 |
</div>
|
| 452 |
"""
|
| 453 |
-
examples = [
|
| 454 |
-
[
|
| 455 |
-
"Once when I was six years old I saw a magnificent picture",
|
| 456 |
-
"en",
|
| 457 |
-
"examples/female.wav",
|
| 458 |
-
None,
|
| 459 |
-
False,
|
| 460 |
-
False,
|
| 461 |
-
False,
|
| 462 |
-
True,
|
| 463 |
-
],
|
| 464 |
-
[
|
| 465 |
-
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
| 466 |
-
"fr",
|
| 467 |
-
"examples/male.wav",
|
| 468 |
-
None,
|
| 469 |
-
False,
|
| 470 |
-
False,
|
| 471 |
-
False,
|
| 472 |
-
True,
|
| 473 |
-
],
|
| 474 |
-
[
|
| 475 |
-
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
| 476 |
-
"de",
|
| 477 |
-
"examples/female.wav",
|
| 478 |
-
None,
|
| 479 |
-
False,
|
| 480 |
-
False,
|
| 481 |
-
False,
|
| 482 |
-
True,
|
| 483 |
-
],
|
| 484 |
-
[
|
| 485 |
-
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
| 486 |
-
"es",
|
| 487 |
-
"examples/male.wav",
|
| 488 |
-
None,
|
| 489 |
-
False,
|
| 490 |
-
False,
|
| 491 |
-
False,
|
| 492 |
-
True,
|
| 493 |
-
],
|
| 494 |
-
[
|
| 495 |
-
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
| 496 |
-
"pt",
|
| 497 |
-
"examples/female.wav",
|
| 498 |
-
None,
|
| 499 |
-
False,
|
| 500 |
-
False,
|
| 501 |
-
False,
|
| 502 |
-
True,
|
| 503 |
-
],
|
| 504 |
-
[
|
| 505 |
-
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
| 506 |
-
"pl",
|
| 507 |
-
"examples/male.wav",
|
| 508 |
-
None,
|
| 509 |
-
False,
|
| 510 |
-
False,
|
| 511 |
-
False,
|
| 512 |
-
True,
|
| 513 |
-
],
|
| 514 |
-
[
|
| 515 |
-
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
| 516 |
-
"it",
|
| 517 |
-
"examples/female.wav",
|
| 518 |
-
None,
|
| 519 |
-
False,
|
| 520 |
-
False,
|
| 521 |
-
False,
|
| 522 |
-
True,
|
| 523 |
-
],
|
| 524 |
-
[
|
| 525 |
-
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
| 526 |
-
"tr",
|
| 527 |
-
"examples/female.wav",
|
| 528 |
-
None,
|
| 529 |
-
False,
|
| 530 |
-
False,
|
| 531 |
-
False,
|
| 532 |
-
True,
|
| 533 |
-
],
|
| 534 |
-
[
|
| 535 |
-
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
| 536 |
-
"ru",
|
| 537 |
-
"examples/female.wav",
|
| 538 |
-
None,
|
| 539 |
-
False,
|
| 540 |
-
False,
|
| 541 |
-
False,
|
| 542 |
-
True,
|
| 543 |
-
],
|
| 544 |
-
[
|
| 545 |
-
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
| 546 |
-
"nl",
|
| 547 |
-
"examples/male.wav",
|
| 548 |
-
None,
|
| 549 |
-
False,
|
| 550 |
-
False,
|
| 551 |
-
False,
|
| 552 |
-
True,
|
| 553 |
-
],
|
| 554 |
-
[
|
| 555 |
-
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
| 556 |
-
"cs",
|
| 557 |
-
"examples/female.wav",
|
| 558 |
-
None,
|
| 559 |
-
False,
|
| 560 |
-
False,
|
| 561 |
-
False,
|
| 562 |
-
True,
|
| 563 |
-
],
|
| 564 |
-
[
|
| 565 |
-
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
| 566 |
-
"zh-cn",
|
| 567 |
-
"examples/female.wav",
|
| 568 |
-
None,
|
| 569 |
-
False,
|
| 570 |
-
False,
|
| 571 |
-
False,
|
| 572 |
-
True,
|
| 573 |
-
],
|
| 574 |
-
[
|
| 575 |
-
"かつて 六歳のとき、素晴らしい絵を見ました",
|
| 576 |
-
"ja",
|
| 577 |
-
"examples/female.wav",
|
| 578 |
-
None,
|
| 579 |
-
False,
|
| 580 |
-
True,
|
| 581 |
-
False,
|
| 582 |
-
True,
|
| 583 |
-
],
|
| 584 |
-
[
|
| 585 |
-
"한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
|
| 586 |
-
"ko",
|
| 587 |
-
"examples/female.wav",
|
| 588 |
-
None,
|
| 589 |
-
False,
|
| 590 |
-
True,
|
| 591 |
-
False,
|
| 592 |
-
True,
|
| 593 |
-
],
|
| 594 |
-
[
|
| 595 |
-
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
| 596 |
-
"hu",
|
| 597 |
-
"examples/male.wav",
|
| 598 |
-
None,
|
| 599 |
-
False,
|
| 600 |
-
True,
|
| 601 |
-
False,
|
| 602 |
-
True,
|
| 603 |
-
],
|
| 604 |
-
]
|
| 605 |
-
|
| 606 |
-
|
| 607 |
|
| 608 |
with gr.Blocks(analytics_enabled=False) as demo:
|
| 609 |
with gr.Row():
|
|
@@ -651,7 +461,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 651 |
"ko",
|
| 652 |
"hu"
|
| 653 |
],
|
| 654 |
-
max_choices=1,
|
| 655 |
value="en",
|
| 656 |
)
|
| 657 |
ref_gr = gr.Audio(
|
|
@@ -660,17 +469,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 660 |
type="filepath",
|
| 661 |
value="examples/female.wav",
|
| 662 |
)
|
| 663 |
-
mic_gr = gr.Audio(
|
| 664 |
-
source="microphone",
|
| 665 |
-
type="filepath",
|
| 666 |
-
info="Use your microphone to record audio",
|
| 667 |
-
label="Use Microphone for Reference",
|
| 668 |
-
)
|
| 669 |
-
use_mic_gr = gr.Checkbox(
|
| 670 |
-
label="Use Microphone",
|
| 671 |
-
value=False,
|
| 672 |
-
info="Notice: Microphone input may not work properly under traffic",
|
| 673 |
-
)
|
| 674 |
clean_ref_gr = gr.Checkbox(
|
| 675 |
label="Cleanup Reference Voice",
|
| 676 |
value=False,
|
|
@@ -696,15 +494,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 696 |
out_text_gr = gr.Text(label="Metrics")
|
| 697 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
| 698 |
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr],
|
| 704 |
-
fn=predict,
|
| 705 |
-
cache_examples=False,)
|
| 706 |
-
|
| 707 |
-
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
|
| 708 |
-
|
| 709 |
-
demo.queue()
|
| 710 |
-
demo.launch(debug=True, show_api=True)
|
|
|
|
| 1 |
+
import os
|
|
|
|
| 2 |
import subprocess
|
|
|
|
|
|
|
| 3 |
import uuid
|
| 4 |
import time
|
| 5 |
import torch
|
| 6 |
import torchaudio
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# langid is used to detect language for longer text
|
| 9 |
# Most users expect text to be their own language, there is checkbox to disable it
|
| 10 |
import langid
|
|
|
|
| 11 |
import csv
|
| 12 |
from io import StringIO
|
| 13 |
import datetime
|
| 14 |
import re
|
| 15 |
|
| 16 |
import gradio as gr
|
|
|
|
|
|
|
| 17 |
|
|
|
|
| 18 |
from TTS.tts.configs.xtts_config import XttsConfig
|
| 19 |
from TTS.tts.models.xtts import Xtts
|
| 20 |
from TTS.utils.generic_utils import get_user_data_dir
|
| 21 |
|
| 22 |
+
print("application starting")
|
| 23 |
+
|
| 24 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 25 |
|
| 26 |
from huggingface_hub import HfApi
|
|
|
|
| 29 |
api = HfApi(token=HF_TOKEN)
|
| 30 |
repo_id = "coqui/xtts"
|
| 31 |
|
| 32 |
+
print("loading model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
|
|
|
|
| 35 |
model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
|
|
|
|
| 36 |
|
| 37 |
config = XttsConfig()
|
| 38 |
config.load_json(os.path.join(model_path, "config.json"))
|
|
|
|
| 43 |
checkpoint_path=os.path.join(model_path, "model.pth"),
|
| 44 |
vocab_path=os.path.join(model_path, "vocab.json"),
|
| 45 |
eval=True,
|
| 46 |
+
use_deepspeed=False,
|
| 47 |
)
|
| 48 |
+
|
| 49 |
+
if torch.cuda.is_available():
|
| 50 |
+
model.cuda()
|
| 51 |
+
else:
|
| 52 |
+
model.cpu()
|
| 53 |
+
|
| 54 |
+
print("Model loaded")
|
| 55 |
|
| 56 |
# This is for debugging purposes only
|
| 57 |
DEVICE_ASSERT_DETECTED = 0
|
|
|
|
| 64 |
prompt,
|
| 65 |
language,
|
| 66 |
audio_file_pth,
|
|
|
|
|
|
|
| 67 |
voice_cleanup,
|
| 68 |
no_lang_auto_detect,
|
| 69 |
agree,
|
|
|
|
| 111 |
None,
|
| 112 |
)
|
| 113 |
|
| 114 |
+
speaker_wav = audio_file_pth
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# Filtering for microphone input, as it has BG noise, maybe silence in beginning and end
|
| 117 |
# This is fast filtering not perfect
|
|
|
|
| 294 |
prompt,
|
| 295 |
language,
|
| 296 |
audio_file_pth,
|
|
|
|
|
|
|
| 297 |
voice_cleanup,
|
| 298 |
no_lang_auto_detect,
|
| 299 |
agree,
|
|
|
|
| 414 |
<p>We collect data only for error cases for improvement.</p>
|
| 415 |
</div>
|
| 416 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
with gr.Blocks(analytics_enabled=False) as demo:
|
| 419 |
with gr.Row():
|
|
|
|
| 461 |
"ko",
|
| 462 |
"hu"
|
| 463 |
],
|
|
|
|
| 464 |
value="en",
|
| 465 |
)
|
| 466 |
ref_gr = gr.Audio(
|
|
|
|
| 469 |
type="filepath",
|
| 470 |
value="examples/female.wav",
|
| 471 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
clean_ref_gr = gr.Checkbox(
|
| 473 |
label="Cleanup Reference Voice",
|
| 474 |
value=False,
|
|
|
|
| 494 |
out_text_gr = gr.Text(label="Metrics")
|
| 495 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
| 496 |
|
| 497 |
+
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
|
| 498 |
+
|
| 499 |
+
print("Starting server")
|
| 500 |
+
demo.queue().launch(debug=True, show_api=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, stat
|
| 2 |
+
from zipfile import ZipFile
|
| 3 |
+
|
| 4 |
+
# Use never ffmpeg binary for Ubuntu20 to use denoising for microphone input
|
| 5 |
+
print("Export newer ffmpeg binary for denoise filter")
|
| 6 |
+
ZipFile("ffmpeg.zip").extractall()
|
| 7 |
+
print("Make ffmpeg binary executable")
|
| 8 |
+
st = os.stat("ffmpeg")
|
| 9 |
+
os.chmod("ffmpeg", st.st_mode | stat.S_IEXEC)
|
| 10 |
+
|
| 11 |
+
# This will trigger downloading model
|
| 12 |
+
print("Downloading if not downloaded Coqui XTTS V2")
|
| 13 |
+
from TTS.utils.manage import ModelManager
|
| 14 |
+
|
| 15 |
+
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 16 |
+
ModelManager().download_model(model_name)
|
| 17 |
+
print("XTTS downloaded")
|
requirements.txt
CHANGED
|
@@ -8,5 +8,5 @@ mecab-python3==1.0.6
|
|
| 8 |
unidic-lite==1.0.8
|
| 9 |
unidic==1.1.0
|
| 10 |
langid
|
| 11 |
-
deepspeed
|
| 12 |
pydub
|
|
|
|
|
|
| 8 |
unidic-lite==1.0.8
|
| 9 |
unidic==1.1.0
|
| 10 |
langid
|
|
|
|
| 11 |
pydub
|
| 12 |
+
gradio
|