aiplexdeveloper commited on
Commit
8ec2046
·
verified ·
1 Parent(s): 3670279

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. arjun_das_output_audio.mp3 +3 -0
  3. handler.py +41 -0
  4. requirements.txt +75 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ arjun_das_output_audio.mp3 filter=lfs diff=lfs merge=lfs -text
arjun_das_output_audio.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a2dcb4c1e2e75c316cbfb4fb38a8f4b63641082ee2dfb9613a43274a30e65f
3
+ size 207654
handler.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torchaudio as ta
2
+ from chatterbox.tts import ChatterboxTTS
3
+ from typing import Dict, Any, List
4
+ import soundfile as sf
5
+ import io
6
+ import base64
7
+
8
+
9
+ class EndpointHandler:
10
+ def __init__(self, path: str = ""):
11
+ try:
12
+ self.model = ChatterboxTTS.from_pretrained(device="cuda")
13
+ except Exception as e:
14
+ raise RuntimeError(f"[ERROR] Failed to load model: {e}")
15
+
16
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: #, data: Dict[str, Any]) -> List[Dict[str, Any]]
17
+ try:
18
+ inputs = data.get("inputs", {})
19
+ text = inputs.get("text")
20
+ exaggeration = inputs.get("exaggeration", 0.3)
21
+ cfg_weight = inputs.get("cfg_weight", 0.5)
22
+ print(exaggeration, cfg_weight)
23
+
24
+ AUDIO_PROMPT_PATH="arjun_das_output_audio.mp3"
25
+ wav = self.model.generate(text, audio_prompt_path=AUDIO_PROMPT_PATH, exaggeration = exaggeration, cfg_weight=cfg_weight)
26
+
27
+ # ta.save("test-2.wav", wav, self.model.sr)
28
+ # Convert the tensor to numpy and write to an in-memory buffer
29
+ buffer = io.BytesIO()
30
+ sf.write(buffer, wav.cpu().numpy().T, self.model.sr, format='WAV')
31
+ buffer.seek(0)
32
+
33
+ # Encode to base64
34
+ audio_base64 = base64.b64encode(buffer.read()).decode('utf-8')
35
+
36
+ return [{"audio_base64": audio_base64}]
37
+
38
+
39
+ except Exception as e:
40
+ print(f"[ERROR] Inference failed: {e}")
41
+ return [{"error": str(e)}]
requirements.txt ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audioread==3.0.1
2
+ certifi==2025.6.15
3
+ cffi==1.17.1
4
+ cfgv==3.4.0
5
+ charset-normalizer==3.4.2
6
+ chatterbox-tts==0.1.2
7
+ conformer==0.3.2
8
+ decorator==5.2.1
9
+ diffusers==0.29.0
10
+ distlib==0.3.9
11
+ einops==0.8.1
12
+ filelock==3.18.0
13
+ fsspec==2025.5.1
14
+ hf-xet==1.1.5
15
+ huggingface-hub==0.33.1
16
+ identify==2.6.12
17
+ idna==3.10
18
+ importlib_metadata==8.7.0
19
+ Jinja2==3.1.6
20
+ joblib==1.5.1
21
+ lazy_loader==0.4
22
+ librosa==0.11.0
23
+ llvmlite==0.44.0
24
+ MarkupSafe==3.0.2
25
+ mpmath==1.3.0
26
+ msgpack==1.1.1
27
+ networkx==3.5
28
+ nodeenv==1.9.1
29
+ numba==0.61.2
30
+ numpy==2.2.6
31
+ nvidia-cublas-cu12==12.4.5.8
32
+ nvidia-cuda-cupti-cu12==12.4.127
33
+ nvidia-cuda-nvrtc-cu12==12.4.127
34
+ nvidia-cuda-runtime-cu12==12.4.127
35
+ nvidia-cudnn-cu12==9.1.0.70
36
+ nvidia-cufft-cu12==11.2.1.3
37
+ nvidia-cufile-cu12==1.11.1.6
38
+ nvidia-curand-cu12==10.3.5.147
39
+ nvidia-cusolver-cu12==11.6.1.9
40
+ nvidia-cusparse-cu12==12.3.1.170
41
+ nvidia-cusparselt-cu12==0.6.2
42
+ nvidia-nccl-cu12==2.21.5
43
+ nvidia-nvjitlink-cu12==12.4.127
44
+ nvidia-nvtx-cu12==12.4.127
45
+ onnx==1.18.0
46
+ packaging==25.0
47
+ pillow==11.2.1
48
+ platformdirs==4.3.8
49
+ pooch==1.8.2
50
+ pre_commit==4.2.0
51
+ protobuf==6.31.1
52
+ pycparser==2.22
53
+ PyYAML==6.0.2
54
+ regex==2024.11.6
55
+ requests==2.32.4
56
+ resemble-perth==1.0.1
57
+ s3tokenizer==0.1.7
58
+ safetensors==0.5.3
59
+ scikit-learn==1.7.0
60
+ scipy==1.16.0
61
+ setuptools==80.9.0
62
+ soundfile==0.13.1
63
+ soxr==0.5.0.post1
64
+ sympy==1.13.1
65
+ threadpoolctl==3.6.0
66
+ tokenizers==0.20.3
67
+ torch==2.6.0
68
+ torchaudio==2.6.0
69
+ tqdm==4.67.1
70
+ transformers==4.46.3
71
+ triton==3.2.0
72
+ typing_extensions==4.14.0
73
+ urllib3==2.5.0
74
+ virtualenv==20.31.2
75
+ zipp==3.23.0