muhtasham commited on
Commit
0bb0b13
·
verified ·
1 Parent(s): 21f182e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +203 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import httpx
3
+ import os
4
+ import atexit
5
+ from loguru import logger
6
+
7
+ # FastAPI endpoint URL - adjust this to match your actual endpoint
8
+ API_URL = os.getenv("API_URL").rstrip('/')
9
+
10
+ # Configure httpx client with retries and timeouts
11
+ client = httpx.Client(
12
+ timeout=httpx.Timeout(
13
+ connect=10.0, # connection timeout
14
+ read=120.0, # read timeout
15
+ write=10.0, # write timeout
16
+ pool=None, # pool timeout
17
+ ),
18
+ limits=httpx.Limits(
19
+ max_keepalive_connections=5,
20
+ max_connections=10,
21
+ keepalive_expiry=30.0
22
+ ),
23
+ transport=httpx.HTTPTransport(
24
+ retries=3, # Number of retries
25
+ )
26
+ )
27
+
28
+ def check_api_health():
29
+ """Check if the API is healthy before making requests"""
30
+ try:
31
+ response = client.get(f"{API_URL}/")
32
+ response.raise_for_status()
33
+ logger.info("API health check passed")
34
+ return True
35
+ except httpx.TimeoutException as e:
36
+ logger.error(f"API health check timed out: {str(e)}")
37
+ return False
38
+ except httpx.HTTPError as e:
39
+ logger.error(f"API health check failed: {str(e)}")
40
+ return False
41
+
42
+ def generate_speech(text, temperature, top_p, repetition_penalty, max_new_tokens, progress=gr.Progress()):
43
+ if not text.strip():
44
+ logger.warning("Empty text input received")
45
+ return None
46
+
47
+ try:
48
+ # Check API health first
49
+ if not check_api_health():
50
+ logger.error("API is not healthy, aborting request")
51
+ raise gr.Error("The API service is currently unavailable. Please try again later.")
52
+
53
+ # Log input parameters
54
+ logger.info(f"Generating speech for text: {text[:50]}... with params: temp={temperature}, top_p={top_p}, rep_penalty={repetition_penalty}, max_tokens={max_new_tokens}")
55
+
56
+ # Prepare the request payload
57
+ payload = {
58
+ "text": text.strip(),
59
+ "return_type": "wav", # Request WAV format directly
60
+ "temperature": temperature,
61
+ "top_p": top_p,
62
+ "repetition_penalty": repetition_penalty,
63
+ "max_new_tokens": max_new_tokens
64
+ }
65
+
66
+ # Update progress
67
+ progress(0.3, "Sending request to server ...")
68
+
69
+ # Make request to FastAPI endpoint
70
+ response = client.post(
71
+ f"{API_URL}/tts",
72
+ json=payload,
73
+ headers={"Content-Type": "application/json"}
74
+ )
75
+
76
+ # Log response status
77
+ logger.debug(f"Received response with status {response.status_code} and content-type {response.headers.get('content-type')}")
78
+
79
+ # Return the WAV bytes directly
80
+ if response.status_code == 200:
81
+ logger.info("Successfully generated speech in WAV format")
82
+ return response.content
83
+ else:
84
+ error_msg = f"API returned error status {response.status_code}"
85
+ logger.error(error_msg)
86
+ raise gr.Error(error_msg)
87
+
88
+ except httpx.TimeoutException as e:
89
+ error_msg = "Request timed out. The server took too long to respond."
90
+ logger.error(f"{error_msg}: {str(e)}")
91
+ raise gr.Error(error_msg)
92
+ except httpx.HTTPError as e:
93
+ error_msg = f"Network error while generating speech: {str(e)}"
94
+ logger.error(error_msg)
95
+ raise gr.Error(error_msg)
96
+ except Exception as e:
97
+ error_msg = f"Error generating speech: {str(e)}"
98
+ logger.error(error_msg, exc_info=True)
99
+ raise gr.Error(error_msg)
100
+
101
+ # Clean up client on exit
102
+ atexit.register(client.close)
103
+
104
+ # Examples for the UI
105
+ examples = [
106
+ [
107
+ "Салом, номи ман Али аст ва ман имрӯз мехоҳам ба шумо дар бораи забони тоҷикӣ ва аҳамияти он дар фарҳанги мо нақл кунам.",
108
+ 0.6, 0.95, 1.1, 1800
109
+ ],
110
+ [
111
+ "Имрӯз ҳаво хеле хуб аст ва ман қарор додам, ки бо дӯстонам ба боғ равам ва якҷоя вақт гузаронем.",
112
+ 0.6, 0.95, 1.1, 1200
113
+ ],
114
+ [
115
+ "Ман забони тоҷикӣ меомӯзам, зеро мехоҳам бо мардумони гуногун сӯҳбат кунам ва фарҳанги онҳоро беҳтар фаҳмам.",
116
+ 0.6, 0.95, 1.1, 1200
117
+ ],
118
+ [
119
+ "Лутфан як пиёла чой диҳед, зеро ман имрӯз хеле хаста шудам ва мехоҳам каме истироҳат кунам.",
120
+ 0.6, 0.95, 1.1, 1200
121
+ ],
122
+ [
123
+ "Шумо аз куҷо ҳастед ва чӣ гуна ба омӯзиши забони тоҷикӣ шурӯъ кардед?",
124
+ 0.6, 0.95, 1.1, 1200
125
+ ],
126
+ ]
127
+
128
+ # Create Gradio interface
129
+ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
130
+ gr.Markdown("""
131
+ # 🎵 [Tajik Orpheus Text-to-Speech](https://github.com/canopyai/Orpheus-TTS)
132
+ Enter your text below and hear it converted to natural-sounding speech with the Orpheus TTS model.
133
+
134
+ ## Tips for better prompts:
135
+ - Short text prompts generally work better than very long phrases
136
+ - Increasing `repetition_penalty` and `temperature` makes the model speak faster.
137
+ """)
138
+ with gr.Row():
139
+ with gr.Column(scale=3):
140
+ text_input = gr.Textbox(
141
+ label="Text to speak",
142
+ placeholder="Enter your text here...",
143
+ lines=5
144
+ )
145
+
146
+ with gr.Accordion("Advanced Settings", open=False):
147
+ temperature = gr.Slider(
148
+ minimum=0.1, maximum=1.5, value=0.6, step=0.05,
149
+ label="Temperature",
150
+ info="Higher values (0.7-1.0) create more expressive but less stable speech"
151
+ )
152
+ top_p = gr.Slider(
153
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05,
154
+ label="Top P",
155
+ info="Nucleus sampling threshold"
156
+ )
157
+ repetition_penalty = gr.Slider(
158
+ minimum=1.0, maximum=2.0, value=1.1, step=0.05,
159
+ label="Repetition Penalty",
160
+ info="Higher values discourage repetitive patterns"
161
+ )
162
+ max_new_tokens = gr.Slider(
163
+ minimum=100, maximum=2000, value=1200, step=100,
164
+ label="Max Length",
165
+ info="Maximum length of generated audio (in tokens)"
166
+ )
167
+
168
+ with gr.Row():
169
+ submit_btn = gr.Button("Generate Speech", variant="primary")
170
+ clear_btn = gr.Button("Clear")
171
+
172
+ with gr.Column(scale=2):
173
+ # Audio component that can handle WAV bytes
174
+ audio_output = gr.Audio(
175
+ label="Generated Speech",
176
+ type="filepath" # Changed from "auto" to "filepath" to handle WAV bytes
177
+ )
178
+
179
+ # Set up examples
180
+ gr.Examples(
181
+ examples=examples,
182
+ inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
183
+ outputs=audio_output,
184
+ fn=generate_speech,
185
+ cache_examples=False,
186
+ )
187
+
188
+ # Set up event handlers
189
+ submit_btn.click(
190
+ fn=generate_speech,
191
+ inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
192
+ outputs=audio_output
193
+ )
194
+
195
+ clear_btn.click(
196
+ fn=lambda: (None, None),
197
+ inputs=[],
198
+ outputs=[text_input, audio_output]
199
+ )
200
+
201
+ # Launch the app
202
+ if __name__ == "__main__":
203
+ demo.queue().launch(share=False, ssr_mode=False)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ httpx
3
+ loguru