Live-Podcast

Running on Zero

App Files Files Community

openfree commited on 17 days ago

Commit

adf8558

verified ·

1 Parent(s): 707248b

Update app.py

Browse files

Files changed (1) hide show

app.py +448 -0

app.py CHANGED Viewed

	@@ -1638,3 +1638,451 @@ class UnifiedAudioConverter:
1638
1639
1640

+conversation_json["conversation"])
+        )
+        return final_audio_path, conversation_text
+    def _create_output_directory(self) -> str:
+        """Create a unique output directory"""
+        random_bytes = os.urandom(8)
+        folder_name = base64.urlsafe_b64encode(random_bytes).decode("utf-8")
+        os.makedirs(folder_name, exist_ok=True)
+        return folder_name
+    def _combine_audio_files(self, filenames: List[str], output_file: str) -> None:
+        """Combine multiple audio files into one"""
+        if not filenames:
+            raise ValueError("No input files provided")
+        try:
+            audio_segments = []
+            for filename in filenames:
+                if os.path.exists(filename):
+                    audio_segment = AudioSegment.from_file(filename)
+                    audio_segments.append(audio_segment)
+            if audio_segments:
+                combined = sum(audio_segments)
+                combined.export(output_file, format="wav")
+            # Clean up temporary files
+            for filename in filenames:
+                if os.path.exists(filename):
+                    os.remove(filename)
+        except Exception as e:
+            raise RuntimeError(f"Failed to combine audio files: {e}")
+# Global converter instance
+converter = UnifiedAudioConverter(ConversationConfig())
+async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
+    """Main synthesis function - handles URL, PDF, and Keyword inputs"""
+    try:
+        # Extract text based on input type
+        if input_type == "URL":
+            if not article_input or not isinstance(article_input, str):
+                return "Please provide a valid URL.", None
+            text = converter.fetch_text(article_input)
+        elif input_type == "PDF":
+            if not article_input:
+                return "Please upload a PDF file.", None
+            text = converter.extract_text_from_pdf(article_input)
+        else:  # Keyword
+            if not article_input or not isinstance(article_input, str):
+                return "Please provide a keyword or topic.", None
+            text = search_and_compile_content(article_input, language)
+            text = f"Keyword-based content:\n{text}"
+        # Limit text to max words
+        words = text.split()
+        if len(words) > converter.config.max_words:
+            text = " ".join(words[:converter.config.max_words])
+        # Extract conversation based on mode
+        if mode == "Local":
+            try:
+                conversation_json = converter.extract_conversation_local(text, language)
+            except Exception as e:
+                print(f"Local mode failed: {e}, trying API fallback")
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                if api_key:
+                    converter.initialize_api_mode(api_key)
+                    conversation_json = converter.extract_conversation_api(text, language)
+                else:
+                    raise RuntimeError("Local mode failed and no API key available for fallback")
+        else:  # API mode
+            api_key = os.environ.get("TOGETHER_API_KEY")
+            if not api_key:
+                print("API key not found, falling back to local mode")
+                conversation_json = converter.extract_conversation_local(text, language)
+            else:
+                try:
+                    converter.initialize_api_mode(api_key)
+                    conversation_json = converter.extract_conversation_api(text, language)
+                except Exception as e:
+                    print(f"API mode failed: {e}, falling back to local mode")
+                    conversation_json = converter.extract_conversation_local(text, language)
+        # Generate conversation text
+        conversation_text = "\n".join(
+            f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
+            for i, turn in enumerate(conversation_json["conversation"])
+        )
+        return conversation_text, None
+    except Exception as e:
+        return f"Error: {str(e)}", None
+async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
+    """Regenerate audio from edited conversation text"""
+    if not conversation_text.strip():
+        return "Please provide conversation text.", None
+    try:
+        conversation_json = converter.parse_conversation_text(conversation_text)
+        if not conversation_json["conversation"]:
+            return "No valid conversation found in the text.", None
+        # Edge TTS 전용 언어는 자동으로 Edge-TTS 사용
+        if language in EDGE_TTS_ONLY_LANGUAGES and tts_engine != "Edge-TTS":
+            tts_engine = "Edge-TTS"
+        # Generate audio based on TTS engine
+        if tts_engine == "Edge-TTS":
+            output_file, _ = await converter.text_to_speech_edge(conversation_json, language)
+        elif tts_engine == "Spark-TTS":
+            if not SPARK_AVAILABLE:
+                return "Spark TTS not available. Please install required dependencies and clone the Spark-TTS repository.", None
+            converter.initialize_spark_tts()
+            output_file, _ = converter.text_to_speech_spark(conversation_json, language)
+        else:  # MeloTTS
+            if not MELO_AVAILABLE:
+                return "MeloTTS not available. Please install required dependencies.", None
+            if language in EDGE_TTS_ONLY_LANGUAGES:
+                return f"MeloTTS does not support {language}. Please use Edge-TTS for this language.", None
+            converter.initialize_melo_tts()
+            output_file, _ = converter.text_to_speech_melo(conversation_json)
+        return "Audio generated successfully!", output_file
+    except Exception as e:
+        return f"Error generating audio: {str(e)}", None
+def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
+    """Synchronous wrapper for async synthesis"""
+    return asyncio.run(synthesize(article_input, input_type, mode, tts_engine, language))
+def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
+    """Synchronous wrapper for async audio regeneration"""
+    return asyncio.run(regenerate_audio(conversation_text, tts_engine, language))
+def update_tts_engine_for_language(language):
+    """언어별 TTS 엔진 옵션 업데이트"""
+    if language in EDGE_TTS_ONLY_LANGUAGES:
+        language_info = {
+            "Korean": "한국어는 Edge-TTS만 지원됩니다",
+            "Japanese": "日本語はEdge-TTSのみサポートされています",
+            "French": "Le français n'est pris en charge que par Edge-TTS",
+            "German": "Deutsch wird nur von Edge-TTS unterstützt",
+            "Spanish": "El español solo es compatible con Edge-TTS",
+            "Italian": "L'italiano è supportato solo da Edge-TTS",
+            "Portuguese": "O português é suportado apenas pelo Edge-TTS",
+            "Dutch": "Nederlands wordt alleen ondersteund door Edge-TTS",
+            "Thai": "ภาษาไทยรองรับเฉพาะ Edge-TTS เท่านั้น",
+            "Vietnamese": "Tiếng Việt chỉ được hỗ trợ bởi Edge-TTS",
+            "Arabic": "العربية مدعومة فقط من Edge-TTS",
+            "Hebrew": "עברית נתמכת רק על ידי Edge-TTS",
+            "Indonesian": "Bahasa Indonesia hanya didukung oleh Edge-TTS",
+            "Hindi": "हिंदी केवल Edge-TTS द्वारा समर्थित है",
+            "Russian": "Русский поддерживается только Edge-TTS",
+            "Chinese": "中文仅支持Edge-TTS"
+        }
+        info_text = language_info.get(language, f"{language} is only supported by Edge-TTS")
+        return gr.Radio(
+            choices=["Edge-TTS"],
+            value="Edge-TTS",
+            label="TTS Engine",
+            info=info_text,
+            interactive=False
+        )
+    else:
+        return gr.Radio(
+            choices=["Edge-TTS", "Spark-TTS", "MeloTTS"],
+            value="Edge-TTS",
+            label="TTS Engine",
+            info="Edge-TTS: Cloud-based, natural voices | Spark-TTS: Local AI model | MeloTTS: Local, requires GPU",
+            interactive=True
+        )
+def toggle_input_visibility(input_type):
+    """Toggle visibility of URL input, file upload, and keyword input based on input type"""
+    if input_type == "URL":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+    elif input_type == "PDF":
+        return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
+    else:  # Keyword
+        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
+# 모델 초기화 (앱 시작 시)
+if LLAMA_CPP_AVAILABLE:
+    try:
+        model_path = hf_hub_download(
+            repo_id=converter.config.local_model_repo,
+            filename=converter.config.local_model_name,
+            local_dir="./models"
+        )
+        print(f"Model downloaded to: {model_path}")
+    except Exception as e:
+        print(f"Failed to download model at startup: {e}")
+# Gradio Interface - 개선된 다국어 레이아웃
+with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
+    .container {max-width: 1200px; margin: auto; padding: 20px;}
+    .header-text {text-align: center; margin-bottom: 30px;}
+    .input-group {background: #f7f7f7; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
+    .output-group {background: #f0f0f0; padding: 20px; border-radius: 10px;}
+    .status-box {background: #e8f4f8; padding: 15px; border-radius: 8px; margin-top: 10px;}
+""") as demo:
+    with gr.Column(elem_classes="container"):
+        # 헤더
+        with gr.Row(elem_classes="header-text"):
+            gr.Markdown("""
+            # 🎙️ AI Podcast Generator - Professional Multi-Language Edition
+            ### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation in 24+ languages!
+            """)
+        with gr.Row(elem_classes="discord-badge"):
+            gr.HTML("""
+            <p style="text-align: center;">
+                <a href="https://discord.gg/openfreeai" target="_blank">
+                    <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="badge">
+                </a>
+            </p>
+            """)
+        # 상태 표시 섹션
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(f"""
+                #### 🤖 System Status
+                - **LLM**: {converter.config.local_model_name.split('.')[0]}
+                - **Fallback**: {converter.config.api_model_name.split('/')[-1]}
+                - **Llama CPP**: {"✅ Ready" if LLAMA_CPP_AVAILABLE else "❌ Not Available"}
+                - **Search**: {"✅ Brave API" if BRAVE_KEY else "❌ No API"}
+                """)
+            with gr.Column(scale=1):
+                gr.Markdown("""
+                #### 🌍 Multi-Language Support
+                - **24+ Languages**: Korean, Japanese, French, German, Spanish, Italian, etc.
+                - **Native Voices**: Optimized for each language
+                - **Professional Style**: Expert discussions with data & insights
+                - **Auto-TTS Selection**: Best engine per language
+                """)
+        # 메인 입력 섹션
+        with gr.Group(elem_classes="input-group"):
+            with gr.Row():
+                # 왼쪽: 입력 옵션들
+                with gr.Column(scale=2):
+                    # 입력 타입 선택
+                    input_type_selector = gr.Radio(
+                        choices=["URL", "PDF", "Keyword"],
+                        value="URL",
+                        label="📥 Input Type",
+                        info="Choose your content source"
+                    )
+                    # URL 입력
+                    url_input = gr.Textbox(
+                        label="🔗 Article URL",
+                        placeholder="Enter the article URL here...",
+                        value="",
+                        visible=True,
+                        lines=2
+                    )
+                    # PDF 업로드
+                    pdf_input = gr.File(
+                        label="📄 Upload PDF",
+                        file_types=[".pdf"],
+                        visible=False
+                    )
+                    # 키워드 입력
+                    keyword_input = gr.Textbox(
+                        label="🔍 Topic/Keyword",
+                        placeholder="Enter a topic (e.g., 'AI trends 2024', '인공지능', 'IA tendances', 'KI Trends')",
+                        value="",
+                        visible=False,
+                        info="System will search and compile latest information",
+                        lines=2
+                    )
+                # 오른쪽: 설정 옵션들
+                with gr.Column(scale=1):
+                    # 언어 선택
+                    language_selector = gr.Radio(
+                        choices=[
+                            "English", "Korean", "Japanese", "French", "German",
+                            "Spanish", "Italian", "Portuguese", "Dutch", "Thai",
+                            "Vietnamese", "Arabic", "Hebrew", "Indonesian", "Hindi",
+                            "Russian", "Chinese", "Norwegian", "Swedish", "Finnish",
+                            "Danish", "Polish", "Turkish", "Greek", "Czech"
+                        ],
+                        value="English",
+                        label="🌐 Language / 언어 / 语言",
+                        info="Select podcast language"
+                    )
+                    # 처리 모드
+                    mode_selector = gr.Radio(
+                        choices=["Local", "API"],
+                        value="Local",
+                        label="⚙️ Processing Mode",
+                        info="Local: On-device | API: Cloud"
+                    )
+                    # TTS 엔진
+                    tts_selector = gr.Radio(
+                        choices=["Edge-TTS", "Spark-TTS", "MeloTTS"],
+                        value="Edge-TTS",
+                        label="🔊 TTS Engine",
+                        info="Voice synthesis engine"
+                    )
+            # 생성 버튼
+            with gr.Row():
+                convert_btn = gr.Button(
+                    "🎯 Generate Professional Conversation",
+                    variant="primary",
+                    size="lg",
+                    scale=1
+                )
+        # 출력 섹션
+        with gr.Group(elem_classes="output-group"):
+            with gr.Row():
+                # 왼쪽: 대화 텍스트
+                with gr.Column(scale=3):
+                    conversation_output = gr.Textbox(
+                        label="💬 Generated Professional Conversation (Editable)",
+                        lines=25,
+                        max_lines=50,
+                        interactive=True,
+                        placeholder="Professional podcast conversation will appear here...\n전문 팟캐스트 대화가 여기에 표시됩니다...\nLa conversation professionnelle du podcast apparaîtra ici...",
+                        info="Edit the conversation as needed. Format: 'Speaker Name: Text'"
+                    )
+                    # 오디오 생성 버튼
+                    with gr.Row():
+                        generate_audio_btn = gr.Button(
+                            "🎙️ Generate Audio from Text",
+                            variant="secondary",
+                            size="lg"
+                        )
+                # 오른쪽: 오디오 출력 및 상태
+                with gr.Column(scale=2):
+                    audio_output = gr.Audio(
+                        label="🎧 Professional Podcast Audio",
+                        type="filepath",
+                        interactive=False
+                    )
+                    status_output = gr.Textbox(
+                        label="📊 Status",
+                        interactive=False,
+                        lines=3,
+                        elem_classes="status-box"
+                    )
+                    # 도움말
+                    gr.Markdown("""
+                    #### 💡 Quick Tips:
+                    - **URL**: Paste any article link
+                    - **PDF**: Upload documents directly
+                    - **Keyword**: Enter topics for AI research
+                    - **24+ Languages** fully supported
+                    - Edit conversation before audio generation
+                    - Auto TTS engine selection per language
+                    """)
+        # 예제 섹션
+        with gr.Accordion("📚 Multi-Language Examples", open=False):
+            gr.Examples(
+                examples=[
+                    ["https://huggingface.co/blog/openfreeai/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
+                    ["quantum computing breakthroughs", "Keyword", "Local", "Edge-TTS", "English"],
+                    ["인공지능 윤리와 규제", "Keyword", "Local", "Edge-TTS", "Korean"],
+                    ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Japanese"],
+                    ["intelligence artificielle tendances", "Keyword", "Local", "Edge-TTS", "French"],
+                    ["künstliche intelligenz entwicklung", "Keyword", "Local", "Edge-TTS", "German"],
+                    ["inteligencia artificial avances", "Keyword", "Local", "Edge-TTS", "Spanish"],
+                ],
+                inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
+                outputs=[conversation_output, status_output],
+                fn=synthesize_sync,
+                cache_examples=False,
+            )
+    # Input type change handler
+    input_type_selector.change(
+        fn=toggle_input_visibility,
+        inputs=[input_type_selector],
+        outputs=[url_input, pdf_input, keyword_input]
+    )
+    # 언어 변경 시 TTS 엔진 옵션 업데이트
+    language_selector.change(
+        fn=update_tts_engine_for_language,
+        inputs=[language_selector],
+        outputs=[tts_selector]
+    )
+    # 이벤트 연결
+    def get_article_input(input_type, url_input, pdf_input, keyword_input):
+        """Get the appropriate input based on input type"""
+        if input_type == "URL":
+            return url_input
+        elif input_type == "PDF":
+            return pdf_input
+        else:  # Keyword
+            return keyword_input
+    convert_btn.click(
+        fn=lambda input_type, url_input, pdf_input, keyword_input, mode, tts, lang: synthesize_sync(
+            get_article_input(input_type, url_input, pdf_input, keyword_input), input_type, mode, tts, lang
+        ),
+        inputs=[input_type_selector, url_input, pdf_input, keyword_input, mode_selector, tts_selector, language_selector],
+        outputs=[conversation_output, status_output]
+    )
+    generate_audio_btn.click(
+        fn=regenerate_audio_sync,
+        inputs=[conversation_output, tts_selector, language_selector],
+        outputs=[status_output, audio_output]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.queue(api_open=True, default_concurrency_limit=10).launch(
+        show_api=True,
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860
+    )