Spaces:

DeepLearning101
/

GPT-SoVITS_TWMAN

Running

App Files Files Community

DeepLearning101 commited on Apr 13

Commit

52ad4ef

verified ·

1 Parent(s): 0cef237

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -24

app.py CHANGED Viewed

@@ -3,47 +3,59 @@
 @author:XuMing([email protected])
 @description: Re-train by TWMAN
 """
 import hashlib
 import os
 import ssl
 import gradio as gr
 import torch
 from loguru import logger
-import subprocess
 ssl._create_default_https_context = ssl._create_unverified_context
-import nltk
-def upgrade_LangSegment():
     try:
-        subprocess.check_call([os.sys.executable, "-m", "pip", "install", "LangSegment", "-i",  "https://pypi.org/simple", "--upgrade"])
-        print("升級成功")
     except subprocess.CalledProcessError:
-        print("升級失敗")
-# 呼叫升級函數
-upgrade_LangSegment()
-# 檢查是否已下載資源，若未下載則進行下載
 nltk_data_path = os.path.expanduser('~/nltk_data')
 if not os.path.exists(os.path.join(nltk_data_path, 'corpora/cmudict.zip')):
     nltk.download('cmudict', download_dir=nltk_data_path)
 if not os.path.exists(os.path.join(nltk_data_path, 'taggers/averaged_perceptron_tagger.zip')):
     nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_path)
 from parrots import TextToSpeech
-# 設定裝置與模式
 device = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"device: {device}")
 half = True if device == "cuda" else False
-# 初始化語音合成模型
-m = TextToSpeech(speaker_model_path="DeepLearning101/GPT-SoVITS_TWMAN", speaker_name="TWMAN", device=device, half=half)
-# 用於檢查和生成語音的音訊檔案
 def get_text_hash(text: str):
     return hashlib.md5(text.encode('utf-8')).hexdigest()
@@ -54,7 +66,7 @@ def do_tts_wav_predict(text: str, output_path: str = None):
         m.predict(text, text_language="auto", output_path=output_path)
     return output_path
-# 建立 Gradio WebUI
 with gr.Blocks(title="TTS WebUI") as app:
     gr.Markdown("""
     # 線上語音合成 (TWMAN)
@@ -81,22 +93,25 @@ with gr.Blocks(title="TTS WebUI") as app:
     - [用PPOCRLabel來幫PaddleOCR做OCR的微調和標註](https://blog.twman.org/2023/07/wsl.html)
     - [基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析](https://blog.twman.org/2023/07/HugIE.html)
     """)
-    # 設定語音合成輸入與按鈕
     with gr.Group():
-        gr.Markdown("*請在下方輸入要進行語音合成的文字*")
         with gr.Row():
-            text = gr.Textbox(label="想語音合成的文字 (100字以内)", value="床前明月光，疑是地上霜。舉頭望明月，低頭思故鄉。", placeholder="請輸入您想要的文字", lines=3)
-            inference_button = gr.Button("語音合成", variant="primary")
-            output = gr.Audio(label="合成的語音")
-        # 設定按鈕點擊事件
         inference_button.click(
             do_tts_wav_predict,
             [text],
             [output],
         )
-# 啟動 Gradio 應用
 app.queue(max_size=10)
-app.launch(share=True, inbrowser=True)

 @author:XuMing([email protected])
 @description: Re-train by TWMAN
 """
 import hashlib
 import os
 import ssl
+import subprocess
 import gradio as gr
 import torch
 from loguru import logger
+import nltk
+# 設定 HTTPS context 避免證書錯誤
 ssl._create_default_https_context = ssl._create_unverified_context
+# 🛠 安裝相容的 LangSegment（v0.1.5）
+def install_compatible_LangSegment():
     try:
+        subprocess.check_call([
+            os.sys.executable, "-m", "pip",
+            "install", "LangSegment==0.1.5",
+            "-i", "https://pypi.org/simple",
+            "--force-reinstall"
+        ])
+        print("✅ LangSegment 降級成功")
     except subprocess.CalledProcessError:
+        print("❌ LangSegment 降級失敗")
+install_compatible_LangSegment()
+# 🧠 下載 NLTK 所需資源
 nltk_data_path = os.path.expanduser('~/nltk_data')
 if not os.path.exists(os.path.join(nltk_data_path, 'corpora/cmudict.zip')):
     nltk.download('cmudict', download_dir=nltk_data_path)
 if not os.path.exists(os.path.join(nltk_data_path, 'taggers/averaged_perceptron_tagger.zip')):
     nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_path)
+# 📦 匯入 parrots
 from parrots import TextToSpeech
+# 設定裝置與精度
 device = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"device: {device}")
 half = True if device == "cuda" else False
+# 初始化 TTS 模型
+m = TextToSpeech(
+    speaker_model_path="DeepLearning101/GPT-SoVITS_TWMAN",
+    speaker_name="TWMAN",
+    device=device,
+    half=half
+)
+# 🔊 音訊生成邏輯
 def get_text_hash(text: str):
     return hashlib.md5(text.encode('utf-8')).hexdigest()
         m.predict(text, text_language="auto", output_path=output_path)
     return output_path
+# 🌐 Gradio WebUI 設定
 with gr.Blocks(title="TTS WebUI") as app:
     gr.Markdown("""
     # 線上語音合成 (TWMAN)
     - [用PPOCRLabel來幫PaddleOCR做OCR的微調和標註](https://blog.twman.org/2023/07/wsl.html)
     - [基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析](https://blog.twman.org/2023/07/HugIE.html)
     """)
     with gr.Group():
+        gr.Markdown("🔤 請輸入要進行語音合成的文字：")
         with gr.Row():
+            text = gr.Textbox(
+                label="輸入文字（建議 100 字內）",
+                value="床前明月光，疑是地上霜。舉頭望明月，低頭思故鄉。",
+                placeholder="請輸入文字...",
+                lines=3
+            )
+            inference_button = gr.Button("🎤 語音合成", variant="primary")
+            output = gr.Audio(label="🔊 合成的語音")
         inference_button.click(
             do_tts_wav_predict,
             [text],
             [output],
         )
+# 啟動 Gradio App
 app.queue(max_size=10)
+app.launch(share=True, inbrowser=True)