kevinwang676
/

GPT-SoVITS-v3-api

ONNX

Model card Files Files and versions Community

kevinwang676 commited on Mar 29

Commit

a9f4026

verified ·

1 Parent(s): c16b111

Update api_v2.py

Browse files

Files changed (1) hide show

api_v2.py +72 -3

api_v2.py CHANGED Viewed

@@ -126,6 +126,63 @@ from pydantic import BaseModel
 i18n = I18nAuto()
 cut_method_names = get_cut_method_names()
 parser = argparse.ArgumentParser(description="GPT-SoVITS api")
 parser.add_argument("-c", "--tts_config", type=str, default="GPT_SoVITS/configs/tts_infer.yaml", help="tts_infer路径")
 parser.add_argument("-a", "--bind_addr", type=str, default="127.0.0.1", help="default: 127.0.0.1")
@@ -281,8 +338,8 @@ async def tts_handle(req:dict):
             {
                 "text": "",                   # str.(required) text to be synthesized
                 "text_lang: "",               # str.(required) language of the text to be synthesized
-                "ref_audio_path": "",         # str.(required) reference audio path
-                "aux_ref_audio_paths": [],    # list.(optional) auxiliary reference audio paths for multi-speaker synthesis
                 "prompt_text": "",            # str.(optional) prompt text for the reference audio
                 "prompt_lang": "",            # str.(required) language of the prompt text for the reference audio
                 "top_k": 5,                   # int. top k sampling
@@ -318,6 +375,16 @@ async def tts_handle(req:dict):
         req["return_fragment"] = True
     try:
         tts_generator=tts_pipeline.run(req)
         if streaming_mode:
@@ -413,7 +480,9 @@ async def tts_post_endpoint(request: TTS_Request):
 @APP.get("/set_refer_audio")
 async def set_refer_aduio(refer_audio_path: str = None):
     try:
-        tts_pipeline.set_ref_audio(refer_audio_path)
     except Exception as e:
         return JSONResponse(status_code=400, content={"message": f"set refer audio failed", "Exception": str(e)})
     return JSONResponse(status_code=200, content={"message": "success"})

 i18n = I18nAuto()
 cut_method_names = get_cut_method_names()
+import os
+import sys
+import traceback
+from typing import Generator
+import requests
+import tempfile
+import urllib.parse
+from pathlib import Path
+# Function to check if a path is a URL and download it if needed
+def process_audio_path(audio_path):
+    if audio_path and (audio_path.startswith('http://') or audio_path.startswith('https://') or
+                      audio_path.startswith('s3://')):
+        try:
+            # Create temp directory if it doesn't exist
+            temp_dir = os.path.join(now_dir, "temp_audio")
+            os.makedirs(temp_dir, exist_ok=True)
+            # Generate a filename from the URL
+            parsed_url = urllib.parse.urlparse(audio_path)
+            filename = os.path.basename(parsed_url.path)
+            if not filename:
+                filename = f"temp_audio_{hash(audio_path)}.wav"
+            # Full path for downloaded file
+            local_path = os.path.join(temp_dir, filename)
+            # Download file
+            if audio_path.startswith('s3://'):
+                # For S3 URLs, you would use boto3 here
+                # This is a placeholder - you'll need to add boto3 import and proper S3 handling
+                print(f"Downloading from S3: {audio_path}")
+                # Example boto3 code (commented out as boto3 import not in original code)
+                # import boto3
+                # s3_client = boto3.client('s3')
+                # bucket = parsed_url.netloc
+                # key = parsed_url.path.lstrip('/')
+                # s3_client.download_file(bucket, key, local_path)
+                raise NotImplementedError("S3 download not implemented. Add boto3 library and implementation.")
+            else:
+                # HTTP/HTTPS download
+                print(f"Downloading from URL: {audio_path}")
+                response = requests.get(audio_path, stream=True)
+                response.raise_for_status()
+                with open(local_path, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        f.write(chunk)
+            print(f"Downloaded to: {local_path}")
+            return local_path
+        except Exception as e:
+            print(f"Error downloading audio file: {e}")
+            raise Exception(f"Failed to download audio from URL: {e}")
+    # If not a URL or download failed, return the original path
+    return audio_path
 parser = argparse.ArgumentParser(description="GPT-SoVITS api")
 parser.add_argument("-c", "--tts_config", type=str, default="GPT_SoVITS/configs/tts_infer.yaml", help="tts_infer路径")
 parser.add_argument("-a", "--bind_addr", type=str, default="127.0.0.1", help="default: 127.0.0.1")
             {
                 "text": "",                   # str.(required) text to be synthesized
                 "text_lang: "",               # str.(required) language of the text to be synthesized
+                "ref_audio_path": "",         # str.(required) reference audio path or URL
+                "aux_ref_audio_paths": [],    # list.(optional) auxiliary reference audio paths or URLs
                 "prompt_text": "",            # str.(optional) prompt text for the reference audio
                 "prompt_lang": "",            # str.(required) language of the prompt text for the reference audio
                 "top_k": 5,                   # int. top k sampling
         req["return_fragment"] = True
     try:
+        # Process ref_audio_path (download if it's a URL)
+        req["ref_audio_path"] = process_audio_path(req["ref_audio_path"])
+        # Process aux_ref_audio_paths (download if they're URLs)
+        if req.get("aux_ref_audio_paths"):
+            aux_paths = []
+            for aux_path in req["aux_ref_audio_paths"]:
+                aux_paths.append(process_audio_path(aux_path))
+            req["aux_ref_audio_paths"] = aux_paths
         tts_generator=tts_pipeline.run(req)
         if streaming_mode:
 @APP.get("/set_refer_audio")
 async def set_refer_aduio(refer_audio_path: str = None):
     try:
+        # Process the path (download if it's a URL)
+        local_path = process_audio_path(refer_audio_path)
+        tts_pipeline.set_ref_audio(local_path)
     except Exception as e:
         return JSONResponse(status_code=400, content={"message": f"set refer audio failed", "Exception": str(e)})
     return JSONResponse(status_code=200, content={"message": "success"})