Spaces:

suprimedev
/

talk-dial

Running

App Files Files Community

suprimedev commited on Jun 4

Commit

c296f8a

verified ·

1 Parent(s): 4fbf2be

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -128

app.py CHANGED Viewed

@@ -1,151 +1,186 @@
-# -*- coding: utf-8 -*-
 import gradio as gr
 import requests
 import json
-from pydub import AudioSegment
 import os
-# API Keys and Endpoints
-TTS_URL = "https://talkbot.ir/TTS-tkun"
-TEXT_GEN_URL = "https://talkbot.ir/api/v1/chat/completions"
-TEXT_GEN_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5"
-TEXT_GEN_MODEL = "deepseek-v3-0324"
-def generate_podcast(topic):
     """
-    Generates a podcast based on a given topic.
     """
-    try:
-        # 1. Generate podcast script using the text generation API
-        print("Generating podcast script...")
-        headers = {
-            "Authorization": f"Bearer {TEXT_GEN_API_KEY}",
-            "Content-Type": "application/json"
-        }
-        payload = {
-            "model": TEXT_GEN_MODEL,
-            "messages": [
-                {"role": "system", "content": "به عنوان یک تولیدکننده پادکست، یک متن جذاب برای پادکست با دو گوینده (صدای اول و صدای دوم) در مورد موضوع زیر ایجاد کنید. متن باید شامل گفتگو بین دو گوینده باشد."},
-                {"role": "user", "content": f"موضوع: {topic}"}
-            ],
-             "stream": False
-        }
-        response = requests.post(TEXT_GEN_URL, headers=headers, json=payload)
-        response.raise_for_status()  # Raise an exception for bad status codes
-        script_data = response.json()
-        podcast_script = script_data['choices'][0]['message']['content']
-        print("Podcast script generated.")
-        # 2. Split the script into sections for each voice (This is a simplified approach)
-        # You might need a more sophisticated parsing method for real-world scenarios
-        lines = podcast_script.split('\n')
-        voice1_lines = []
-        voice2_lines = []
-        current_voice = 1 # Assuming the first line is for voice 1
-        for line in lines:
-            if line.strip().startswith("صدای اول:"):
-                voice1_lines.append(line.replace("صدای اول:", "", 1).strip())
-                current_voice = 1
-            elif line.strip().startswith("صدای دوم:"):
-                voice2_lines.append(line.replace("صدای دوم:", "", 1).strip())
-                current_voice = 2
-            elif line.strip(): # Lines withoutexplicit voice indicators
-                 if current_voice == 1:
-                     voice1_lines.append(line.strip())
-                 else:
-                     voice2_lines.append(line.strip())
-        voice1_text = " ".join(voice1_lines)
-        voice2_text = " ".join(voice2_lines)
-        if not voice1_text.strip() and not voice2_text.strip():
-            return None, "خطا در تجزیه متن پادکست. لطفاً متن generado شده را بررسی کنید.", None
-        # 3. Generate audio for each voice
-        print("Generating audio for voice 1...")
-        voice1_audio_url = requests.get(TTS_URL, params={"text": voice1_text}).text.strip()
-        print(f"Voice 1 audio URL: {voice1_audio_url}")
-        print("Generating audio for voice 2...")
-        voice2_audio_url = requests.get(TTS_URL, params={"text": voice2_text}).text.strip()
-        print(f"Voice 2 audio URL: {voice2_audio_url}")
-        if not voice1_audio_url.startswith("http") or not voice2_audio_url.startswith("http"):
-             return None, "خطا در تولید صدای TTS. لطفاً متن را بررسی کنید.", None
-        # 4. Download audio files
-        print("Downloading audio files...")
-        voice1_audio_response = requests.get(voice1_audio_url)
-        voice1_audio_response.raise_for_status()
-        voice1_audio_path = "voice1.wav"
-        with open(voice1_audio_path, "wb") as f:
-            f.write(voice1_audio_response.content)
-        voice2_audio_response = requests.get(voice2_audio_url)
-        voice2_audio_response.raise_for_status()
-        voice2_audio_path = "voice2.wav"
-        with open(voice2_audio_path, "wb") as f:
-             f.write(voice2_audio_response.content)
-        print("Audio files downloaded.")
-        # 5. Combine audio files (This is a simple concatenation for demonstration)
-        # For a proper podcast with alternating speakers, you would need more complex pydub manipulation
-        print("Combining audio files...")
-        audio1 = AudioSegment.from_wav(voice1_audio_path)
-        audio2 = AudioSegment.from_wav(voice2_audio_path)
-        # A simple way to interleave: This assumes segments are short and alternate
-        # For longer narrations, you'd need to split the audio files further
-        combined_audio = AudioSegment.empty()
-        max_len = max(len(audio1), len(audio2))
-        segment_length = 5000 # Example segment length in milliseconds
-        for i in range(0, max_len, segment_length):
-            segment1 = audio1[i : i + segment_length]
-            segment2 = audio2[i : i + segment_length]
-            if segment1:
-                combined_audio += segment1
-            if segment2:
-                combined_audio += segment2
-        output_mp3_path = "podcast.mp3"
-        combined_audio.export(output_mp3_path, format="mp3")
-        print(f"Podcast saved as {output_mp3_path}")
-        # 6. Clean up temporary files
-        os.remove(voice1_audio_path)
-        os.remove(voice2_audio_path)
-        return podcast_script, None, output_mp3_path
-    except requests.exceptions.RequestException as e:
-        return None, f"خطا در اتصال به API ها: {e}", None
     except Exception as e:
-        return None, f"خطای نامشخص: {e}", None
-# Gradio Interface
-def gradio_interface(topic):
-    script, error, audio_path = generate_podcast(topic)
-    if error:
-        return error, None
-    else:
-        return script, audio_path
 with gr.Blocks() as demo:
-    gr.Markdown("## ساخت پادکست با هوش مصنوعی")
-    topic_input = gr.Textbox(label="موضوع پادکست خود را وارد کنید")
-    generate_button = gr.Button("شروع تولید پادکست")
-    script_output = gr.Textbox(label="متن پادکست", interactive=False)
-    audio_output = gr.Audio(label="پادکست تولید شده", type="filepath")
     generate_button.click(
-        fn=gradio_interface,
         inputs=topic_input,
-        outputs=[script_output, audio_output]
     )
 if __name__ == "__main__":

 import gradio as gr
 import requests
 import json
 import os
+from pydub import AudioSegment
+from pydub.playback import play
+# --- Configuration ---
+TALKBOT_TTS_URL = "https://talkbot.ir/TTS-tkun"
+TALKBOT_API_BASE_URL = "https://talkbot.ir/api/v1/chat/completions"
+TALKBOT_API_KEY = "sk-4fb613f510bdcf731e801b904cd890"  # Replace with your actual Talkbot API key
+# TALKBOT_API_KEY = os.environ.get("TALKBOT_API_KEY", "YOUR_DEFAULT_API_KEY_HERE") # More secure way
+MODEL_NAME = "deepseek-v3-0324"
+# --- Functions ---
+def get_tts_audio_link(text: str) -> str:
     """
+    Retrieves a WAV audio link for the given text using TalkBot TTS.
     """
+    params = {"text": text}
+    response = requests.get(TALKBOT_TTS_URL, params=params)
+    response.raise_for_status()  # Raise an exception for HTTP errors
+    return response.url
+def generate_podcast_script_ai(prompt: str) -> str:
+    """
+    Generates a podcast script using TalkBot AI.
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {TALKBOT_API_KEY}"
+    }
+    data = {
+        "model": MODEL_NAME,
+        "messages": [
+            {"role": "system", "content": "شما یک هوش مصنوعی برای تولید متن پادکست هستید. خروجی شما باید متن پادکست باشد."},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": 0.7,
+        "max_tokens": 1000
+    }
+    try:
+        response = requests.post(TALKBOT_API_BASE_URL, headers=headers, json=data)
+        response.raise_for_status()
+        result = response.json()
+        return result['choices'][0]['message']['content'].strip()
+    except requests.exceptions.RequestException as e:
+        return f"Error generating script: {e}"
+    except (KeyError, IndexError) as e:
+        return f"Error parsing AI response: {e}. Full response: {response.json()}"
+def create_podcast(podcast_topic: str) -> tuple[str, str, gr.Audio | None]:
+    """
+    Generates a podcast script using AI, then creates audio for two distinct voices,
+    and finally merges them into an MP3 file.
+    """
+    if not TALKBOT_API_KEY or TALKBOT_API_KEY == "YOUR_DEFAULT_API_KEY_HERE":
+        return "خطا: کلید API Talkbot تنظیم نشده است. لطفاً آن را در کد وارد کنید.", None, None
+    # 1. Generate Podcast Script
+    gr.Info("در حال تولید متن پادکست توسط هوش مصنوعی...")
+    ai_prompt = f"یک متن پادکست کوتاه و جذاب در مورد '{podcast_topic}' با دو بخش مجزا برای دو گوینده (صدای اول و صدای دوم) بنویسید. هر بخش را با عنوان 'صدای اول:' و 'صدای دوم:' مشخص کنید. متن پادکست باید حدود 150-250 کلمه باشد."
+    generated_script = generate_podcast_script_ai(ai_prompt)
+    if "Error" in generated_script:
+        return generated_script, None, None
+    # 2. Extract Voices (simple split for demonstration)
+    gr.Info("در حال تفکیک و تولید صداها...")
+    voice1_text = ""
+    voice2_text = ""
+    # Simple parsing to get voice sections
+    script_lines = generated_script.split('\n')
+    current_voice = None
+    for line in script_lines:
+        if "صدای اول:" in line:
+            current_voice = 1
+            voice1_text += line.replace("صدای اول:", "").strip() + " "
+        elif "صدای دوم:" in line:
+            current_voice = 2
+            voice2_text += line.replace("صدای دوم:", "").strip() + " "
+        elif current_voice == 1:
+            voice1_text += line.strip() + " "
+        elif current_voice == 2:
+            voice2_text += line.strip() + " "
+    if not voice1_text or not voice2_text:
+        return f"خطا: متن پادکست تولید شده شامل 'صدای اول:' یا 'صدای دوم:' استاندارد نیست. متن کامل: \n{generated_script}", None, None
+    # 3. Generate Audio for each voice
+    try:
+        gr.Info("در حال دریافت صدای اول...")
+        voice1_wav_link = get_tts_audio_link(voice1_text.strip())
+        voice1_audio_response = requests.get(voice1_wav_link)
+        voice1_audio_response.raise_for_status()
+        with open("voice1.wav", "wb") as f:
+            f.write(voice1_audio_response.content)
+        gr.Info("در حال دریافت صدای دوم...")
+        voice2_wav_link = get_tts_audio_link(voice2_text.strip())
+        voice2_audio_response = requests.get(voice2_wav_link)
+        voice2_audio_response.raise_or_status()
+        with open("voice2.wav", "wb") as f:
+            f.write(voice2_audio_response.content)
+    except requests.exceptions.HTTPError as e:
+        return f"خطا در دریافت صدا از TTS: {e}. URL: {e.request.url}", None, None
+    except Exception as e:
+        return f"خطای unexpected در دریافت صدا: {e}", None, None
+    # 4. Merge Audio files
+    gr.Info("در حال ترکیب صداها و تولید فایل نهایی MP3...")
+    try:
+        audio1 = AudioSegment.from_wav("voice1.wav")
+        audio2 = AudioSegment.from_wav("voice2.wav")
+        # Simple alternating merge - adjust as needed for more complex dialogue
+        # For simplicity, let's just concatenate them here. A more sophisticated
+        # approach would involve splitting the script into turns and interleaving.
+        # Given the prompt, a simple concatenation of voice1's full speech then voice2's full speech might suffice as a starting point.
+        # Or, we can interleave by short segments if the AI output is structured that way.
+        # A basic concatenation for demonstration:
+        final_podcast_audio = audio1 + audio2
+        # If the input text parsing above resulted in alternating segments,
+        # you would need to process those. For now, assuming voice1 speaks, then voice2 speaks.
+        output_mp3_path = "podcast_output.mp3"
+        final_podcast_audio.export(output_mp3_path, format="mp3")
+        # Clean up temporary WAV files
+        os.remove("voice1.wav")
+        os.remove("voice2.wav")
+        gr.Info("تولید پادکست با موفقیت انجام شد!")
+        return generated_script, output_mp3_path, gr.Audio(output_mp3_path, type="filepath", label="پادکست نهایی")
     except Exception as e:
+        return f"خطا در ترکیب فایل‌های صوتی: {e}", None, None
+# --- Gradio Interface ---
 with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # تولیدکننده پادکست هوشمند 🎙️
+        با وارد کردن یک موضوع، هوش مصنوعی ما یک متن پادکست تولید می‌کند و سپس آن را با دو صدای مجزا به یک فایل MP3 پادکست تبدیل می‌کند.
+        """
+    )
+    with gr.Row():
+        topic_input = gr.Textbox(
+            label="موضوع پادکست",
+            placeholder="مثال: تاریخچه هوش مصنوعی، فواید مدیتیشن، آینده سفر فضایی",
+            value="مزایای یادگیری زبان پایتون"
+        )
+        generate_button = gr.Button("شروع تولید پادکست 🚀")
+    with gr.Column():
+        script_output = gr.Textbox(label="متن پادکست تولید شده", interactive=False, lines=10)
+        audio_output = gr.Audio(None, type="filepath", label="پادکست نهایی (MP3)", format="mp3")
+        download_link = gr.File(label="دانلود فایل MP3", file_count="single", visible=False)
+    def on_generate_button_click(topic):
+        script_result, audio_path, audio_component = create_podcast(topic)
+        if audio_path:
+            return script_result, audio_component, gr.File(value=audio_path, visible=True)
+        else:
+            return script_result, None, gr.File(visible=False)
     generate_button.click(
+        fn=on_generate_button_click,
         inputs=topic_input,
+        outputs=[script_output, audio_output, download_link]
     )
 if __name__ == "__main__":