openfree commited on
Commit
3233647
ยท
verified ยท
1 Parent(s): 2298a96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -23
app.py CHANGED
@@ -20,6 +20,9 @@ from pathlib import Path
20
  from threading import Thread
21
  from dotenv import load_dotenv
22
 
 
 
 
23
  # Edge TTS imports
24
  import edge_tts
25
  from pydub import AudioSegment
@@ -198,6 +201,28 @@ class UnifiedAudioConverter:
198
  except httpx.HTTPError as e:
199
  raise RuntimeError(f"Failed to fetch URL: {e}")
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def _get_messages_formatter_type(self, model_name):
202
  """Get appropriate message formatter for the model"""
203
  if "Mistral" in model_name or "BitSix" in model_name:
@@ -431,8 +456,9 @@ class UnifiedAudioConverter:
431
  # ์–ธ์–ด๋ณ„ ์Œ์„ฑ ์„ค์ •
432
  if language == "Korean":
433
  voices = [
434
- "ko-KR-SunHiNeural", # ์—ฌ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
435
- "ko-KR-HyunsuNeural" # ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
 
436
  ]
437
  else:
438
  voices = [
@@ -489,7 +515,7 @@ class UnifiedAudioConverter:
489
  # Create different voice characteristics for different speakers
490
  if language == "Korean":
491
  voice_configs = [
492
- {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ํŒŸ์บ์ŠคํŠธ ์ง„ํ–‰์„ ๋งก์€ ์ง„ํ–‰์ž์ž…๋‹ˆ๋‹ค.", "gender": "female"},
493
  {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ๊ฒŒ์ŠคํŠธ๋กœ ์ฐธ์—ฌํ•˜๊ฒŒ ๋˜์–ด ๊ธฐ์ฉ๋‹ˆ๋‹ค.", "gender": "male"}
494
  ]
495
  else:
@@ -643,14 +669,18 @@ class UnifiedAudioConverter:
643
  converter = UnifiedAudioConverter(ConversationConfig())
644
 
645
 
646
- async def synthesize(article_url: str, mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
647
- """Main synthesis function - Local is now primary, API is fallback"""
648
- if not article_url:
649
- return "Please provide a valid URL.", None
650
-
651
  try:
652
- # Fetch text from URL
653
- text = converter.fetch_text(article_url)
 
 
 
 
 
 
 
654
 
655
  # Limit text to max words
656
  words = text.split()
@@ -734,9 +764,9 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
734
  return f"Error generating audio: {str(e)}", None
735
 
736
 
737
- def synthesize_sync(article_url: str, mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
738
  """Synchronous wrapper for async synthesis"""
739
- return asyncio.run(synthesize(article_url, mode, tts_engine, language))
740
 
741
 
742
  def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
@@ -764,6 +794,14 @@ def update_tts_engine_for_korean(language):
764
  )
765
 
766
 
 
 
 
 
 
 
 
 
767
  # ๋ชจ๋ธ ์ดˆ๊ธฐํ™” (์•ฑ ์‹œ์ž‘ ์‹œ)
768
  if LLAMA_CPP_AVAILABLE:
769
  try:
@@ -778,9 +816,9 @@ if LLAMA_CPP_AVAILABLE:
778
 
779
 
780
  # Gradio Interface
781
- with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
782
- gr.Markdown("# ๐ŸŽ™๏ธ URL to Podcast Converter")
783
- gr.Markdown("Convert any article, blog, or news into an engaging podcast conversation!")
784
 
785
  # ์ƒ๋‹จ์— ๋กœ์ปฌ LLM ์ƒํƒœ ํ‘œ์‹œ
786
  with gr.Row():
@@ -793,11 +831,29 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
793
 
794
  with gr.Row():
795
  with gr.Column(scale=3):
 
 
 
 
 
 
 
 
 
796
  url_input = gr.Textbox(
797
  label="Article URL",
798
  placeholder="Enter the article URL here...",
799
- value=""
 
800
  )
 
 
 
 
 
 
 
 
801
  with gr.Column(scale=1):
802
  # ์–ธ์–ด ์„ ํƒ ์ถ”๊ฐ€
803
  language_selector = gr.Radio(
@@ -871,16 +927,23 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
871
 
872
  gr.Examples(
873
  examples=[
874
- ["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
875
- ["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
876
- ["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
877
  ],
878
- inputs=[url_input, mode_selector, tts_selector, language_selector],
879
  outputs=[conversation_output, status_output],
880
  fn=synthesize_sync,
881
  cache_examples=False,
882
  )
883
 
 
 
 
 
 
 
 
884
  # ์–ธ์–ด ๋ณ€๊ฒฝ ์‹œ TTS ์—”์ง„ ์˜ต์…˜ ์—…๋ฐ์ดํŠธ
885
  language_selector.change(
886
  fn=update_tts_engine_for_korean,
@@ -888,10 +951,19 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
888
  outputs=[tts_selector]
889
  )
890
 
891
- # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
 
 
 
 
 
 
 
892
  convert_btn.click(
893
- fn=synthesize_sync,
894
- inputs=[url_input, mode_selector, tts_selector, language_selector],
 
 
895
  outputs=[conversation_output, status_output]
896
  )
897
 
 
20
  from threading import Thread
21
  from dotenv import load_dotenv
22
 
23
+ # PDF processing imports
24
+ from langchain_community.document_loaders import PyPDFLoader
25
+
26
  # Edge TTS imports
27
  import edge_tts
28
  from pydub import AudioSegment
 
201
  except httpx.HTTPError as e:
202
  raise RuntimeError(f"Failed to fetch URL: {e}")
203
 
204
+ def extract_text_from_pdf(self, pdf_file) -> str:
205
+ """Extract text content from PDF file"""
206
+ try:
207
+ # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
208
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
209
+ tmp_file.write(pdf_file.read())
210
+ tmp_path = tmp_file.name
211
+
212
+ # PDF ๋กœ๋“œ ๋ฐ ํ…์ŠคํŠธ ์ถ”์ถœ
213
+ loader = PyPDFLoader(tmp_path)
214
+ pages = loader.load()
215
+
216
+ # ๋ชจ๋“  ํŽ˜์ด์ง€์˜ ํ…์ŠคํŠธ๋ฅผ ๊ฒฐํ•ฉ
217
+ text = "\n".join([page.page_content for page in pages])
218
+
219
+ # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
220
+ os.unlink(tmp_path)
221
+
222
+ return text
223
+ except Exception as e:
224
+ raise RuntimeError(f"Failed to extract text from PDF: {e}")
225
+
226
  def _get_messages_formatter_type(self, model_name):
227
  """Get appropriate message formatter for the model"""
228
  if "Mistral" in model_name or "BitSix" in model_name:
 
456
  # ์–ธ์–ด๋ณ„ ์Œ์„ฑ ์„ค์ •
457
  if language == "Korean":
458
  voices = [
459
+ "ko-KR-HyunsuNeural", # ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
460
+ "ko-KR-InJoonNeural" # ๋‚จ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
461
+
462
  ]
463
  else:
464
  voices = [
 
515
  # Create different voice characteristics for different speakers
516
  if language == "Korean":
517
  voice_configs = [
518
+ {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ํŒŸ์บ์ŠคํŠธ ์ง„ํ–‰์„ ๋งก์€ ์ง„ํ–‰์ž์ž…๋‹ˆ๋‹ค.", "gender": "male"},
519
  {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ๊ฒŒ์ŠคํŠธ๋กœ ์ฐธ์—ฌํ•˜๊ฒŒ ๋˜์–ด ๊ธฐ์ฉ๋‹ˆ๋‹ค.", "gender": "male"}
520
  ]
521
  else:
 
669
  converter = UnifiedAudioConverter(ConversationConfig())
670
 
671
 
672
+ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
673
+ """Main synthesis function - handles both URL and PDF inputs"""
 
 
 
674
  try:
675
+ # Extract text based on input type
676
+ if input_type == "URL":
677
+ if not article_input or not isinstance(article_input, str):
678
+ return "Please provide a valid URL.", None
679
+ text = converter.fetch_text(article_input)
680
+ else: # PDF
681
+ if not article_input:
682
+ return "Please upload a PDF file.", None
683
+ text = converter.extract_text_from_pdf(article_input)
684
 
685
  # Limit text to max words
686
  words = text.split()
 
764
  return f"Error generating audio: {str(e)}", None
765
 
766
 
767
+ def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
768
  """Synchronous wrapper for async synthesis"""
769
+ return asyncio.run(synthesize(article_input, input_type, mode, tts_engine, language))
770
 
771
 
772
  def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
 
794
  )
795
 
796
 
797
+ def toggle_input_visibility(input_type):
798
+ """Toggle visibility of URL input and file upload based on input type"""
799
+ if input_type == "URL":
800
+ return gr.update(visible=True), gr.update(visible=False)
801
+ else: # PDF
802
+ return gr.update(visible=False), gr.update(visible=True)
803
+
804
+
805
  # ๋ชจ๋ธ ์ดˆ๊ธฐํ™” (์•ฑ ์‹œ์ž‘ ์‹œ)
806
  if LLAMA_CPP_AVAILABLE:
807
  try:
 
816
 
817
 
818
  # Gradio Interface
819
+ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
820
+ gr.Markdown("# ๐ŸŽ™๏ธ URL/PDF to Podcast Converter")
821
+ gr.Markdown("Convert any article, blog, news, or PDF document into an engaging podcast conversation!")
822
 
823
  # ์ƒ๋‹จ์— ๋กœ์ปฌ LLM ์ƒํƒœ ํ‘œ์‹œ
824
  with gr.Row():
 
831
 
832
  with gr.Row():
833
  with gr.Column(scale=3):
834
+ # Input type selector
835
+ input_type_selector = gr.Radio(
836
+ choices=["URL", "PDF"],
837
+ value="URL",
838
+ label="Input Type",
839
+ info="Choose between URL or PDF file upload"
840
+ )
841
+
842
+ # URL input
843
  url_input = gr.Textbox(
844
  label="Article URL",
845
  placeholder="Enter the article URL here...",
846
+ value="",
847
+ visible=True
848
  )
849
+
850
+ # PDF upload
851
+ pdf_input = gr.File(
852
+ label="Upload PDF",
853
+ file_types=[".pdf"],
854
+ visible=False
855
+ )
856
+
857
  with gr.Column(scale=1):
858
  # ์–ธ์–ด ์„ ํƒ ์ถ”๊ฐ€
859
  language_selector = gr.Radio(
 
927
 
928
  gr.Examples(
929
  examples=[
930
+ ["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
931
+ ["https://www.bbc.com/news/technology-67988517", "URL", "Local", "Spark-TTS", "English"],
932
+ ["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
933
  ],
934
+ inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
935
  outputs=[conversation_output, status_output],
936
  fn=synthesize_sync,
937
  cache_examples=False,
938
  )
939
 
940
+ # Input type change handler
941
+ input_type_selector.change(
942
+ fn=toggle_input_visibility,
943
+ inputs=[input_type_selector],
944
+ outputs=[url_input, pdf_input]
945
+ )
946
+
947
  # ์–ธ์–ด ๋ณ€๊ฒฝ ์‹œ TTS ์—”์ง„ ์˜ต์…˜ ์—…๋ฐ์ดํŠธ
948
  language_selector.change(
949
  fn=update_tts_engine_for_korean,
 
951
  outputs=[tts_selector]
952
  )
953
 
954
+ # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ - ์ˆ˜์ •๋œ ๋ถ€๋ถ„
955
+ def get_article_input(input_type, url_input, pdf_input):
956
+ """Get the appropriate input based on input type"""
957
+ if input_type == "URL":
958
+ return url_input
959
+ else:
960
+ return pdf_input
961
+
962
  convert_btn.click(
963
+ fn=lambda input_type, url_input, pdf_input, mode, tts, lang: synthesize_sync(
964
+ get_article_input(input_type, url_input, pdf_input), input_type, mode, tts, lang
965
+ ),
966
+ inputs=[input_type_selector, url_input, pdf_input, mode_selector, tts_selector, language_selector],
967
  outputs=[conversation_output, status_output]
968
  )
969