Live-Podcast

Running on Zero

App Files Files Community

openfree commited on 25 days ago

Commit

3233647

verified ·

1 Parent(s): 2298a96

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -23

app.py CHANGED Viewed

@@ -20,6 +20,9 @@ from pathlib import Path
 from threading import Thread
 from dotenv import load_dotenv
 # Edge TTS imports
 import edge_tts
 from pydub import AudioSegment
@@ -198,6 +201,28 @@ class UnifiedAudioConverter:
         except httpx.HTTPError as e:
             raise RuntimeError(f"Failed to fetch URL: {e}")
     def _get_messages_formatter_type(self, model_name):
         """Get appropriate message formatter for the model"""
         if "Mistral" in model_name or "BitSix" in model_name:
@@ -431,8 +456,9 @@ class UnifiedAudioConverter:
             # 언어별 음성 설정
             if language == "Korean":
                 voices = [
-                    "ko-KR-SunHiNeural",  # 여성 음성 (자연스러운 한국어)
-                    "ko-KR-HyunsuNeural"  # 남성 음성 (자연스러운 한국어)
                 ]
             else:
                 voices = [
@@ -489,7 +515,7 @@ class UnifiedAudioConverter:
             # Create different voice characteristics for different speakers
             if language == "Korean":
                 voice_configs = [
-                    {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 진행자입니다.", "gender": "female"},
                     {"prompt_text": "안녕하세요, 오늘 게스트로 참여하게 되어 기쁩니다.", "gender": "male"}
                 ]
             else:
@@ -643,14 +669,18 @@ class UnifiedAudioConverter:
 converter = UnifiedAudioConverter(ConversationConfig())
-async def synthesize(article_url: str, mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
-    """Main synthesis function - Local is now primary, API is fallback"""
-    if not article_url:
-        return "Please provide a valid URL.", None
     try:
-        # Fetch text from URL
-        text = converter.fetch_text(article_url)
         # Limit text to max words
         words = text.split()
@@ -734,9 +764,9 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
         return f"Error generating audio: {str(e)}", None
-def synthesize_sync(article_url: str, mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
     """Synchronous wrapper for async synthesis"""
-    return asyncio.run(synthesize(article_url, mode, tts_engine, language))
 def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
@@ -764,6 +794,14 @@ def update_tts_engine_for_korean(language):
         )
 # 모델 초기화 (앱 시작 시)
 if LLAMA_CPP_AVAILABLE:
     try:
@@ -778,9 +816,9 @@ if LLAMA_CPP_AVAILABLE:
 # Gradio Interface
-with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
-    gr.Markdown("# 🎙️ URL to Podcast Converter")
-    gr.Markdown("Convert any article, blog, or news into an engaging podcast conversation!")
     # 상단에 로컬 LLM 상태 표시
     with gr.Row():
@@ -793,11 +831,29 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
     with gr.Row():
         with gr.Column(scale=3):
             url_input = gr.Textbox(
                 label="Article URL",
                 placeholder="Enter the article URL here...",
-                value=""
             )
         with gr.Column(scale=1):
             # 언어 선택 추가
             language_selector = gr.Radio(
@@ -871,16 +927,23 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
     gr.Examples(
         examples=[
-            ["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
-            ["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
-            ["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
         ],
-        inputs=[url_input, mode_selector, tts_selector, language_selector],
         outputs=[conversation_output, status_output],
         fn=synthesize_sync,
         cache_examples=False,
     )
     # 언어 변경 시 TTS 엔진 옵션 업데이트
     language_selector.change(
         fn=update_tts_engine_for_korean,
@@ -888,10 +951,19 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
         outputs=[tts_selector]
     )
-    # 이벤트 연결
     convert_btn.click(
-        fn=synthesize_sync,
-        inputs=[url_input, mode_selector, tts_selector, language_selector],
         outputs=[conversation_output, status_output]
     )

 from threading import Thread
 from dotenv import load_dotenv
+# PDF processing imports
+from langchain_community.document_loaders import PyPDFLoader
 # Edge TTS imports
 import edge_tts
 from pydub import AudioSegment
         except httpx.HTTPError as e:
             raise RuntimeError(f"Failed to fetch URL: {e}")
+    def extract_text_from_pdf(self, pdf_file) -> str:
+        """Extract text content from PDF file"""
+        try:
+            # 임시 파일로 저장
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+                tmp_file.write(pdf_file.read())
+                tmp_path = tmp_file.name
+            # PDF 로드 및 텍스트 추출
+            loader = PyPDFLoader(tmp_path)
+            pages = loader.load()
+            # 모든 페이지의 텍스트를 결합
+            text = "\n".join([page.page_content for page in pages])
+            # 임시 파일 삭제
+            os.unlink(tmp_path)
+            return text
+        except Exception as e:
+            raise RuntimeError(f"Failed to extract text from PDF: {e}")
     def _get_messages_formatter_type(self, model_name):
         """Get appropriate message formatter for the model"""
         if "Mistral" in model_name or "BitSix" in model_name:
             # 언어별 음성 설정
             if language == "Korean":
                 voices = [
+                    "ko-KR-HyunsuNeural",  # 남성 음성 (자연스러운 한국어)
+                    "ko-KR-InJoonNeural"  # 남남성 음성 (자연스러운 한국어)
                 ]
             else:
                 voices = [
             # Create different voice characteristics for different speakers
             if language == "Korean":
                 voice_configs = [
+                    {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 진행자입니다.", "gender": "male"},
                     {"prompt_text": "안녕하세요, 오늘 게스트로 참여하게 되어 기쁩니다.", "gender": "male"}
                 ]
             else:
 converter = UnifiedAudioConverter(ConversationConfig())
+async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
+    """Main synthesis function - handles both URL and PDF inputs"""
     try:
+        # Extract text based on input type
+        if input_type == "URL":
+            if not article_input or not isinstance(article_input, str):
+                return "Please provide a valid URL.", None
+            text = converter.fetch_text(article_input)
+        else:  # PDF
+            if not article_input:
+                return "Please upload a PDF file.", None
+            text = converter.extract_text_from_pdf(article_input)
         # Limit text to max words
         words = text.split()
         return f"Error generating audio: {str(e)}", None
+def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
     """Synchronous wrapper for async synthesis"""
+    return asyncio.run(synthesize(article_input, input_type, mode, tts_engine, language))
 def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
         )
+def toggle_input_visibility(input_type):
+    """Toggle visibility of URL input and file upload based on input type"""
+    if input_type == "URL":
+        return gr.update(visible=True), gr.update(visible=False)
+    else:  # PDF
+        return gr.update(visible=False), gr.update(visible=True)
 # 모델 초기화 (앱 시작 시)
 if LLAMA_CPP_AVAILABLE:
     try:
 # Gradio Interface
+with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
+    gr.Markdown("# 🎙️ URL/PDF to Podcast Converter")
+    gr.Markdown("Convert any article, blog, news, or PDF document into an engaging podcast conversation!")
     # 상단에 로컬 LLM 상태 표시
     with gr.Row():
     with gr.Row():
         with gr.Column(scale=3):
+            # Input type selector
+            input_type_selector = gr.Radio(
+                choices=["URL", "PDF"],
+                value="URL",
+                label="Input Type",
+                info="Choose between URL or PDF file upload"
+            )
+            # URL input
             url_input = gr.Textbox(
                 label="Article URL",
                 placeholder="Enter the article URL here...",
+                value="",
+                visible=True
             )
+            # PDF upload
+            pdf_input = gr.File(
+                label="Upload PDF",
+                file_types=[".pdf"],
+                visible=False
+            )
         with gr.Column(scale=1):
             # 언어 선택 추가
             language_selector = gr.Radio(
     gr.Examples(
         examples=[
+            ["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
+            ["https://www.bbc.com/news/technology-67988517", "URL", "Local", "Spark-TTS", "English"],
+            ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
         ],
+        inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
         outputs=[conversation_output, status_output],
         fn=synthesize_sync,
         cache_examples=False,
     )
+    # Input type change handler
+    input_type_selector.change(
+        fn=toggle_input_visibility,
+        inputs=[input_type_selector],
+        outputs=[url_input, pdf_input]
+    )
     # 언어 변경 시 TTS 엔진 옵션 업데이트
     language_selector.change(
         fn=update_tts_engine_for_korean,
         outputs=[tts_selector]
     )
+    # 이벤트 연결 - 수정된 부분
+    def get_article_input(input_type, url_input, pdf_input):
+        """Get the appropriate input based on input type"""
+        if input_type == "URL":
+            return url_input
+        else:
+            return pdf_input
     convert_btn.click(
+        fn=lambda input_type, url_input, pdf_input, mode, tts, lang: synthesize_sync(
+            get_article_input(input_type, url_input, pdf_input), input_type, mode, tts, lang
+        ),
+        inputs=[input_type_selector, url_input, pdf_input, mode_selector, tts_selector, language_selector],
         outputs=[conversation_output, status_output]
     )