suprimedev commited on
Commit
c296f8a
·
verified ·
1 Parent(s): 4fbf2be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -128
app.py CHANGED
@@ -1,151 +1,186 @@
1
- # -*- coding: utf-8 -*-
2
  import gradio as gr
3
  import requests
4
  import json
5
- from pydub import AudioSegment
6
  import os
 
 
 
 
 
 
 
 
 
7
 
8
- # API Keys and Endpoints
9
- TTS_URL = "https://talkbot.ir/TTS-tkun"
10
- TEXT_GEN_URL = "https://talkbot.ir/api/v1/chat/completions"
11
- TEXT_GEN_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5"
12
- TEXT_GEN_MODEL = "deepseek-v3-0324"
13
 
14
- def generate_podcast(topic):
15
  """
16
- Generates a podcast based on a given topic.
17
  """
18
- try:
19
- # 1. Generate podcast script using the text generation API
20
- print("Generating podcast script...")
21
- headers = {
22
- "Authorization": f"Bearer {TEXT_GEN_API_KEY}",
23
- "Content-Type": "application/json"
24
- }
25
- payload = {
26
- "model": TEXT_GEN_MODEL,
27
- "messages": [
28
- {"role": "system", "content": "به عنوان یک تولیدکننده پادکست، یک متن جذاب برای پادکست با دو گوینده (صدای اول و صدای دوم) در مورد موضوع زیر ایجاد کنید. متن باید شامل گفتگو بین دو گوینده باشد."},
29
- {"role": "user", "content": f"موضوع: {topic}"}
30
- ],
31
- "stream": False
32
- }
33
-
34
- response = requests.post(TEXT_GEN_URL, headers=headers, json=payload)
35
- response.raise_for_status() # Raise an exception for bad status codes
36
- script_data = response.json()
37
- podcast_script = script_data['choices'][0]['message']['content']
38
- print("Podcast script generated.")
39
-
40
- # 2. Split the script into sections for each voice (This is a simplified approach)
41
- # You might need a more sophisticated parsing method for real-world scenarios
42
- lines = podcast_script.split('\n')
43
- voice1_lines = []
44
- voice2_lines = []
45
- current_voice = 1 # Assuming the first line is for voice 1
46
-
47
- for line in lines:
48
- if line.strip().startswith("صدای اول:"):
49
- voice1_lines.append(line.replace("صدای اول:", "", 1).strip())
50
- current_voice = 1
51
- elif line.strip().startswith("صدای دوم:"):
52
- voice2_lines.append(line.replace("صدای دوم:", "", 1).strip())
53
- current_voice = 2
54
- elif line.strip(): # Lines withoutexplicit voice indicators
55
- if current_voice == 1:
56
- voice1_lines.append(line.strip())
57
- else:
58
- voice2_lines.append(line.strip())
59
-
60
-
61
- voice1_text = " ".join(voice1_lines)
62
- voice2_text = " ".join(voice2_lines)
63
-
64
- if not voice1_text.strip() and not voice2_text.strip():
65
- return None, "خطا در تجزیه متن پادکست. لطفاً متن generado شده را بررسی کنید.", None
66
-
67
- # 3. Generate audio for each voice
68
- print("Generating audio for voice 1...")
69
- voice1_audio_url = requests.get(TTS_URL, params={"text": voice1_text}).text.strip()
70
- print(f"Voice 1 audio URL: {voice1_audio_url}")
71
-
72
- print("Generating audio for voice 2...")
73
- voice2_audio_url = requests.get(TTS_URL, params={"text": voice2_text}).text.strip()
74
- print(f"Voice 2 audio URL: {voice2_audio_url}")
75
-
76
- if not voice1_audio_url.startswith("http") or not voice2_audio_url.startswith("http"):
77
- return None, "خطا در تولید صدای TTS. لطفاً متن را بررسی کنید.", None
78
-
79
- # 4. Download audio files
80
- print("Downloading audio files...")
81
- voice1_audio_response = requests.get(voice1_audio_url)
82
- voice1_audio_response.raise_for_status()
83
- voice1_audio_path = "voice1.wav"
84
- with open(voice1_audio_path, "wb") as f:
85
- f.write(voice1_audio_response.content)
86
 
87
- voice2_audio_response = requests.get(voice2_audio_url)
88
- voice2_audio_response.raise_for_status()
89
- voice2_audio_path = "voice2.wav"
90
- with open(voice2_audio_path, "wb") as f:
91
- f.write(voice2_audio_response.content)
92
-
93
- print("Audio files downloaded.")
94
-
95
- # 5. Combine audio files (This is a simple concatenation for demonstration)
96
- # For a proper podcast with alternating speakers, you would need more complex pydub manipulation
97
- print("Combining audio files...")
98
- audio1 = AudioSegment.from_wav(voice1_audio_path)
99
- audio2 = AudioSegment.from_wav(voice2_audio_path)
100
-
101
- # A simple way to interleave: This assumes segments are short and alternate
102
- # For longer narrations, you'd need to split the audio files further
103
- combined_audio = AudioSegment.empty()
104
- max_len = max(len(audio1), len(audio2))
105
- segment_length = 5000 # Example segment length in milliseconds
106
-
107
- for i in range(0, max_len, segment_length):
108
- segment1 = audio1[i : i + segment_length]
109
- segment2 = audio2[i : i + segment_length]
110
- if segment1:
111
- combined_audio += segment1
112
- if segment2:
113
- combined_audio += segment2
114
 
115
- output_mp3_path = "podcast.mp3"
116
- combined_audio.export(output_mp3_path, format="mp3")
117
- print(f"Podcast saved as {output_mp3_path}")
 
 
 
 
 
 
118
 
119
- # 6. Clean up temporary files
120
- os.remove(voice1_audio_path)
121
- os.remove(voice2_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- return podcast_script, None, output_mp3_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- except requests.exceptions.RequestException as e:
126
- return None, f"خطا در اتصال به API ها: {e}", None
127
  except Exception as e:
128
- return None, f"خطای نامشخص: {e}", None
129
 
130
- # Gradio Interface
131
- def gradio_interface(topic):
132
- script, error, audio_path = generate_podcast(topic)
133
- if error:
134
- return error, None
135
- else:
136
- return script, audio_path
137
 
138
  with gr.Blocks() as demo:
139
- gr.Markdown("## ساخت پادکست با هوش مصنوعی")
140
- topic_input = gr.Textbox(label="موضوع پادکست خود را وارد کنید")
141
- generate_button = gr.Button("شروع تولید پادکست")
142
- script_output = gr.Textbox(label="متن پادکست", interactive=False)
143
- audio_output = gr.Audio(label="پادکست تولید شده", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  generate_button.click(
146
- fn=gradio_interface,
147
  inputs=topic_input,
148
- outputs=[script_output, audio_output]
149
  )
150
 
151
  if __name__ == "__main__":
 
 
1
  import gradio as gr
2
  import requests
3
  import json
 
4
  import os
5
+ from pydub import AudioSegment
6
+ from pydub.playback import play
7
+
8
+ # --- Configuration ---
9
+ TALKBOT_TTS_URL = "https://talkbot.ir/TTS-tkun"
10
+ TALKBOT_API_BASE_URL = "https://talkbot.ir/api/v1/chat/completions"
11
+ TALKBOT_API_KEY = "sk-4fb613f510bdcf731e801b904cd890" # Replace with your actual Talkbot API key
12
+ # TALKBOT_API_KEY = os.environ.get("TALKBOT_API_KEY", "YOUR_DEFAULT_API_KEY_HERE") # More secure way
13
+ MODEL_NAME = "deepseek-v3-0324"
14
 
15
+ # --- Functions ---
 
 
 
 
16
 
17
+ def get_tts_audio_link(text: str) -> str:
18
  """
19
+ Retrieves a WAV audio link for the given text using TalkBot TTS.
20
  """
21
+ params = {"text": text}
22
+ response = requests.get(TALKBOT_TTS_URL, params=params)
23
+ response.raise_for_status() # Raise an exception for HTTP errors
24
+ return response.url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def generate_podcast_script_ai(prompt: str) -> str:
27
+ """
28
+ Generates a podcast script using TalkBot AI.
29
+ """
30
+ headers = {
31
+ "Content-Type": "application/json",
32
+ "Authorization": f"Bearer {TALKBOT_API_KEY}"
33
+ }
34
+
35
+ data = {
36
+ "model": MODEL_NAME,
37
+ "messages": [
38
+ {"role": "system", "content": "شما یک هوش مصنوعی برای تولید متن پادکست هستید. خروجی شما باید متن پادکست باشد."},
39
+ {"role": "user", "content": prompt}
40
+ ],
41
+ "temperature": 0.7,
42
+ "max_tokens": 1000
43
+ }
 
 
 
 
 
 
 
 
 
44
 
45
+ try:
46
+ response = requests.post(TALKBOT_API_BASE_URL, headers=headers, json=data)
47
+ response.raise_for_status()
48
+ result = response.json()
49
+ return result['choices'][0]['message']['content'].strip()
50
+ except requests.exceptions.RequestException as e:
51
+ return f"Error generating script: {e}"
52
+ except (KeyError, IndexError) as e:
53
+ return f"Error parsing AI response: {e}. Full response: {response.json()}"
54
 
55
+ def create_podcast(podcast_topic: str) -> tuple[str, str, gr.Audio | None]:
56
+ """
57
+ Generates a podcast script using AI, then creates audio for two distinct voices,
58
+ and finally merges them into an MP3 file.
59
+ """
60
+ if not TALKBOT_API_KEY or TALKBOT_API_KEY == "YOUR_DEFAULT_API_KEY_HERE":
61
+ return "خطا: کلید API Talkbot تنظیم نشده است. لطفاً آن را در کد وارد کنید.", None, None
62
+
63
+ # 1. Generate Podcast Script
64
+ gr.Info("در حال تولید متن پادکست توسط هوش مصنوعی...")
65
+ ai_prompt = f"یک متن پادکست کوتاه و جذاب در مورد '{podcast_topic}' با دو بخش مجزا برای دو گوینده (صدای اول و صدای دوم) بنویسید. هر بخش را با عنوان 'صدای اول:' و 'صدای دوم:' مشخص کنید. متن پادکست باید حدود 150-250 کلمه باشد."
66
+
67
+ generated_script = generate_podcast_script_ai(ai_prompt)
68
+
69
+ if "Error" in generated_script:
70
+ return generated_script, None, None
71
+
72
+ # 2. Extract Voices (simple split for demonstration)
73
+ gr.Info("در حال تفکیک و تولید صداها...")
74
+
75
+ voice1_text = ""
76
+ voice2_text = ""
77
+
78
+ # Simple parsing to get voice sections
79
+ script_lines = generated_script.split('\n')
80
+ current_voice = None
81
+ for line in script_lines:
82
+ if "صدای اول:" in line:
83
+ current_voice = 1
84
+ voice1_text += line.replace("صدای اول:", "").strip() + " "
85
+ elif "صدای دوم:" in line:
86
+ current_voice = 2
87
+ voice2_text += line.replace("صدای دوم:", "").strip() + " "
88
+ elif current_voice == 1:
89
+ voice1_text += line.strip() + " "
90
+ elif current_voice == 2:
91
+ voice2_text += line.strip() + " "
92
+
93
+ if not voice1_text or not voice2_text:
94
+ return f"خطا: متن پادکست تولید شده شامل 'صدای اول:' یا 'صدای دوم:' استاندارد نیست. متن کامل: \n{generated_script}", None, None
95
+
96
+ # 3. Generate Audio for each voice
97
+ try:
98
+ gr.Info("در حال دریافت صدای اول...")
99
+ voice1_wav_link = get_tts_audio_link(voice1_text.strip())
100
+ voice1_audio_response = requests.get(voice1_wav_link)
101
+ voice1_audio_response.raise_for_status()
102
+
103
+ with open("voice1.wav", "wb") as f:
104
+ f.write(voice1_audio_response.content)
105
+
106
+ gr.Info("در حال دریافت صدای دوم...")
107
+ voice2_wav_link = get_tts_audio_link(voice2_text.strip())
108
+ voice2_audio_response = requests.get(voice2_wav_link)
109
+ voice2_audio_response.raise_or_status()
110
+
111
+ with open("voice2.wav", "wb") as f:
112
+ f.write(voice2_audio_response.content)
113
+
114
+ except requests.exceptions.HTTPError as e:
115
+ return f"خطا در دریافت صدا از TTS: {e}. URL: {e.request.url}", None, None
116
+ except Exception as e:
117
+ return f"خطای unexpected در دریافت صدا: {e}", None, None
118
 
119
+ # 4. Merge Audio files
120
+ gr.Info("در حال ترکیب صداها و تولید فایل نهایی MP3...")
121
+ try:
122
+ audio1 = AudioSegment.from_wav("voice1.wav")
123
+ audio2 = AudioSegment.from_wav("voice2.wav")
124
+
125
+ # Simple alternating merge - adjust as needed for more complex dialogue
126
+ # For simplicity, let's just concatenate them here. A more sophisticated
127
+ # approach would involve splitting the script into turns and interleaving.
128
+ # Given the prompt, a simple concatenation of voice1's full speech then voice2's full speech might suffice as a starting point.
129
+ # Or, we can interleave by short segments if the AI output is structured that way.
130
+
131
+ # A basic concatenation for demonstration:
132
+ final_podcast_audio = audio1 + audio2
133
+
134
+ # If the input text parsing above resulted in alternating segments,
135
+ # you would need to process those. For now, assuming voice1 speaks, then voice2 speaks.
136
+
137
+ output_mp3_path = "podcast_output.mp3"
138
+ final_podcast_audio.export(output_mp3_path, format="mp3")
139
+
140
+ # Clean up temporary WAV files
141
+ os.remove("voice1.wav")
142
+ os.remove("voice2.wav")
143
+
144
+ gr.Info("تولید پادکست با موفقیت انجام شد!")
145
+ return generated_script, output_mp3_path, gr.Audio(output_mp3_path, type="filepath", label="پادکست نهایی")
146
 
 
 
147
  except Exception as e:
148
+ return f"خطا در ترکیب فایل‌های صوتی: {e}", None, None
149
 
150
+ # --- Gradio Interface ---
 
 
 
 
 
 
151
 
152
  with gr.Blocks() as demo:
153
+ gr.Markdown(
154
+ """
155
+ # تولیدکننده پادکست هوشمند 🎙️
156
+ با وارد کردن یک موضوع، هوش مصنوعی ما یک متن پادکست تولید می‌کند و سپس آن را با دو صدای مجزا به یک فایل MP3 پادکست تبدیل می‌کند.
157
+ """
158
+ )
159
+
160
+ with gr.Row():
161
+ topic_input = gr.Textbox(
162
+ label="موضوع پادکست",
163
+ placeholder="مثال: تاریخچه هوش مصنوعی، فواید مدیتیشن، آینده سفر فضایی",
164
+ value="مزایای یادگیری زبان پایتون"
165
+ )
166
+ generate_button = gr.Button("شروع تولید پادکست 🚀")
167
+
168
+ with gr.Column():
169
+ script_output = gr.Textbox(label="متن پادکست تولید شده", interactive=False, lines=10)
170
+ audio_output = gr.Audio(None, type="filepath", label="پادکست نهایی (MP3)", format="mp3")
171
+ download_link = gr.File(label="دانلود فایل MP3", file_count="single", visible=False)
172
+
173
+ def on_generate_button_click(topic):
174
+ script_result, audio_path, audio_component = create_podcast(topic)
175
+ if audio_path:
176
+ return script_result, audio_component, gr.File(value=audio_path, visible=True)
177
+ else:
178
+ return script_result, None, gr.File(visible=False)
179
 
180
  generate_button.click(
181
+ fn=on_generate_button_click,
182
  inputs=topic_input,
183
+ outputs=[script_output, audio_output, download_link]
184
  )
185
 
186
  if __name__ == "__main__":