Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,9 @@ from pathlib import Path
|
|
20 |
from threading import Thread
|
21 |
from dotenv import load_dotenv
|
22 |
|
|
|
|
|
|
|
23 |
# Edge TTS imports
|
24 |
import edge_tts
|
25 |
from pydub import AudioSegment
|
@@ -198,6 +201,28 @@ class UnifiedAudioConverter:
|
|
198 |
except httpx.HTTPError as e:
|
199 |
raise RuntimeError(f"Failed to fetch URL: {e}")
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
def _get_messages_formatter_type(self, model_name):
|
202 |
"""Get appropriate message formatter for the model"""
|
203 |
if "Mistral" in model_name or "BitSix" in model_name:
|
@@ -431,8 +456,9 @@ class UnifiedAudioConverter:
|
|
431 |
# ์ธ์ด๋ณ ์์ฑ ์ค์
|
432 |
if language == "Korean":
|
433 |
voices = [
|
434 |
-
"ko-KR-
|
435 |
-
"ko-KR-
|
|
|
436 |
]
|
437 |
else:
|
438 |
voices = [
|
@@ -489,7 +515,7 @@ class UnifiedAudioConverter:
|
|
489 |
# Create different voice characteristics for different speakers
|
490 |
if language == "Korean":
|
491 |
voice_configs = [
|
492 |
-
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์ ์งํ์์
๋๋ค.", "gender": "
|
493 |
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ๊ฒ์คํธ๋ก ์ฐธ์ฌํ๊ฒ ๋์ด ๊ธฐ์ฉ๋๋ค.", "gender": "male"}
|
494 |
]
|
495 |
else:
|
@@ -643,14 +669,18 @@ class UnifiedAudioConverter:
|
|
643 |
converter = UnifiedAudioConverter(ConversationConfig())
|
644 |
|
645 |
|
646 |
-
async def synthesize(
|
647 |
-
"""Main synthesis function -
|
648 |
-
if not article_url:
|
649 |
-
return "Please provide a valid URL.", None
|
650 |
-
|
651 |
try:
|
652 |
-
#
|
653 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
654 |
|
655 |
# Limit text to max words
|
656 |
words = text.split()
|
@@ -734,9 +764,9 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
|
|
734 |
return f"Error generating audio: {str(e)}", None
|
735 |
|
736 |
|
737 |
-
def synthesize_sync(
|
738 |
"""Synchronous wrapper for async synthesis"""
|
739 |
-
return asyncio.run(synthesize(
|
740 |
|
741 |
|
742 |
def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
|
@@ -764,6 +794,14 @@ def update_tts_engine_for_korean(language):
|
|
764 |
)
|
765 |
|
766 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
767 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
768 |
if LLAMA_CPP_AVAILABLE:
|
769 |
try:
|
@@ -778,9 +816,9 @@ if LLAMA_CPP_AVAILABLE:
|
|
778 |
|
779 |
|
780 |
# Gradio Interface
|
781 |
-
with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
782 |
-
gr.Markdown("# ๐๏ธ URL to Podcast Converter")
|
783 |
-
gr.Markdown("Convert any article, blog, or
|
784 |
|
785 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
786 |
with gr.Row():
|
@@ -793,11 +831,29 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
793 |
|
794 |
with gr.Row():
|
795 |
with gr.Column(scale=3):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
796 |
url_input = gr.Textbox(
|
797 |
label="Article URL",
|
798 |
placeholder="Enter the article URL here...",
|
799 |
-
value=""
|
|
|
800 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
801 |
with gr.Column(scale=1):
|
802 |
# ์ธ์ด ์ ํ ์ถ๊ฐ
|
803 |
language_selector = gr.Radio(
|
@@ -871,16 +927,23 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
871 |
|
872 |
gr.Examples(
|
873 |
examples=[
|
874 |
-
["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
|
875 |
-
["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
|
876 |
-
["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
|
877 |
],
|
878 |
-
inputs=[url_input, mode_selector, tts_selector, language_selector],
|
879 |
outputs=[conversation_output, status_output],
|
880 |
fn=synthesize_sync,
|
881 |
cache_examples=False,
|
882 |
)
|
883 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
884 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
885 |
language_selector.change(
|
886 |
fn=update_tts_engine_for_korean,
|
@@ -888,10 +951,19 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
888 |
outputs=[tts_selector]
|
889 |
)
|
890 |
|
891 |
-
# ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
892 |
convert_btn.click(
|
893 |
-
fn=synthesize_sync
|
894 |
-
|
|
|
|
|
895 |
outputs=[conversation_output, status_output]
|
896 |
)
|
897 |
|
|
|
20 |
from threading import Thread
|
21 |
from dotenv import load_dotenv
|
22 |
|
23 |
+
# PDF processing imports
|
24 |
+
from langchain_community.document_loaders import PyPDFLoader
|
25 |
+
|
26 |
# Edge TTS imports
|
27 |
import edge_tts
|
28 |
from pydub import AudioSegment
|
|
|
201 |
except httpx.HTTPError as e:
|
202 |
raise RuntimeError(f"Failed to fetch URL: {e}")
|
203 |
|
204 |
+
def extract_text_from_pdf(self, pdf_file) -> str:
|
205 |
+
"""Extract text content from PDF file"""
|
206 |
+
try:
|
207 |
+
# ์์ ํ์ผ๋ก ์ ์ฅ
|
208 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
209 |
+
tmp_file.write(pdf_file.read())
|
210 |
+
tmp_path = tmp_file.name
|
211 |
+
|
212 |
+
# PDF ๋ก๋ ๋ฐ ํ
์คํธ ์ถ์ถ
|
213 |
+
loader = PyPDFLoader(tmp_path)
|
214 |
+
pages = loader.load()
|
215 |
+
|
216 |
+
# ๋ชจ๋ ํ์ด์ง์ ํ
์คํธ๋ฅผ ๊ฒฐํฉ
|
217 |
+
text = "\n".join([page.page_content for page in pages])
|
218 |
+
|
219 |
+
# ์์ ํ์ผ ์ญ์
|
220 |
+
os.unlink(tmp_path)
|
221 |
+
|
222 |
+
return text
|
223 |
+
except Exception as e:
|
224 |
+
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
225 |
+
|
226 |
def _get_messages_formatter_type(self, model_name):
|
227 |
"""Get appropriate message formatter for the model"""
|
228 |
if "Mistral" in model_name or "BitSix" in model_name:
|
|
|
456 |
# ์ธ์ด๋ณ ์์ฑ ์ค์
|
457 |
if language == "Korean":
|
458 |
voices = [
|
459 |
+
"ko-KR-HyunsuNeural", # ๋จ์ฑ ์์ฑ (์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด)
|
460 |
+
"ko-KR-InJoonNeural" # ๋จ๋จ์ฑ ์์ฑ (์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด)
|
461 |
+
|
462 |
]
|
463 |
else:
|
464 |
voices = [
|
|
|
515 |
# Create different voice characteristics for different speakers
|
516 |
if language == "Korean":
|
517 |
voice_configs = [
|
518 |
+
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์ ์งํ์์
๋๋ค.", "gender": "male"},
|
519 |
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ๊ฒ์คํธ๋ก ์ฐธ์ฌํ๊ฒ ๋์ด ๊ธฐ์ฉ๋๋ค.", "gender": "male"}
|
520 |
]
|
521 |
else:
|
|
|
669 |
converter = UnifiedAudioConverter(ConversationConfig())
|
670 |
|
671 |
|
672 |
+
async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
673 |
+
"""Main synthesis function - handles both URL and PDF inputs"""
|
|
|
|
|
|
|
674 |
try:
|
675 |
+
# Extract text based on input type
|
676 |
+
if input_type == "URL":
|
677 |
+
if not article_input or not isinstance(article_input, str):
|
678 |
+
return "Please provide a valid URL.", None
|
679 |
+
text = converter.fetch_text(article_input)
|
680 |
+
else: # PDF
|
681 |
+
if not article_input:
|
682 |
+
return "Please upload a PDF file.", None
|
683 |
+
text = converter.extract_text_from_pdf(article_input)
|
684 |
|
685 |
# Limit text to max words
|
686 |
words = text.split()
|
|
|
764 |
return f"Error generating audio: {str(e)}", None
|
765 |
|
766 |
|
767 |
+
def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
768 |
"""Synchronous wrapper for async synthesis"""
|
769 |
+
return asyncio.run(synthesize(article_input, input_type, mode, tts_engine, language))
|
770 |
|
771 |
|
772 |
def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
|
|
|
794 |
)
|
795 |
|
796 |
|
797 |
+
def toggle_input_visibility(input_type):
|
798 |
+
"""Toggle visibility of URL input and file upload based on input type"""
|
799 |
+
if input_type == "URL":
|
800 |
+
return gr.update(visible=True), gr.update(visible=False)
|
801 |
+
else: # PDF
|
802 |
+
return gr.update(visible=False), gr.update(visible=True)
|
803 |
+
|
804 |
+
|
805 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
806 |
if LLAMA_CPP_AVAILABLE:
|
807 |
try:
|
|
|
816 |
|
817 |
|
818 |
# Gradio Interface
|
819 |
+
with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
820 |
+
gr.Markdown("# ๐๏ธ URL/PDF to Podcast Converter")
|
821 |
+
gr.Markdown("Convert any article, blog, news, or PDF document into an engaging podcast conversation!")
|
822 |
|
823 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
824 |
with gr.Row():
|
|
|
831 |
|
832 |
with gr.Row():
|
833 |
with gr.Column(scale=3):
|
834 |
+
# Input type selector
|
835 |
+
input_type_selector = gr.Radio(
|
836 |
+
choices=["URL", "PDF"],
|
837 |
+
value="URL",
|
838 |
+
label="Input Type",
|
839 |
+
info="Choose between URL or PDF file upload"
|
840 |
+
)
|
841 |
+
|
842 |
+
# URL input
|
843 |
url_input = gr.Textbox(
|
844 |
label="Article URL",
|
845 |
placeholder="Enter the article URL here...",
|
846 |
+
value="",
|
847 |
+
visible=True
|
848 |
)
|
849 |
+
|
850 |
+
# PDF upload
|
851 |
+
pdf_input = gr.File(
|
852 |
+
label="Upload PDF",
|
853 |
+
file_types=[".pdf"],
|
854 |
+
visible=False
|
855 |
+
)
|
856 |
+
|
857 |
with gr.Column(scale=1):
|
858 |
# ์ธ์ด ์ ํ ์ถ๊ฐ
|
859 |
language_selector = gr.Radio(
|
|
|
927 |
|
928 |
gr.Examples(
|
929 |
examples=[
|
930 |
+
["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
|
931 |
+
["https://www.bbc.com/news/technology-67988517", "URL", "Local", "Spark-TTS", "English"],
|
932 |
+
["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
|
933 |
],
|
934 |
+
inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
|
935 |
outputs=[conversation_output, status_output],
|
936 |
fn=synthesize_sync,
|
937 |
cache_examples=False,
|
938 |
)
|
939 |
|
940 |
+
# Input type change handler
|
941 |
+
input_type_selector.change(
|
942 |
+
fn=toggle_input_visibility,
|
943 |
+
inputs=[input_type_selector],
|
944 |
+
outputs=[url_input, pdf_input]
|
945 |
+
)
|
946 |
+
|
947 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
948 |
language_selector.change(
|
949 |
fn=update_tts_engine_for_korean,
|
|
|
951 |
outputs=[tts_selector]
|
952 |
)
|
953 |
|
954 |
+
# ์ด๋ฒคํธ ์ฐ๊ฒฐ - ์์ ๋ ๋ถ๋ถ
|
955 |
+
def get_article_input(input_type, url_input, pdf_input):
|
956 |
+
"""Get the appropriate input based on input type"""
|
957 |
+
if input_type == "URL":
|
958 |
+
return url_input
|
959 |
+
else:
|
960 |
+
return pdf_input
|
961 |
+
|
962 |
convert_btn.click(
|
963 |
+
fn=lambda input_type, url_input, pdf_input, mode, tts, lang: synthesize_sync(
|
964 |
+
get_article_input(input_type, url_input, pdf_input), input_type, mode, tts, lang
|
965 |
+
),
|
966 |
+
inputs=[input_type_selector, url_input, pdf_input, mode_selector, tts_selector, language_selector],
|
967 |
outputs=[conversation_output, status_output]
|
968 |
)
|
969 |
|