Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -81,7 +81,7 @@ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
|
81 |
|
82 |
@dataclass
|
83 |
class ConversationConfig:
|
84 |
-
max_words: int = 4000
|
85 |
prefix_url: str = "https://r.jina.ai/"
|
86 |
api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
|
87 |
legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
|
@@ -89,8 +89,10 @@ class ConversationConfig:
|
|
89 |
local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
90 |
local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
|
91 |
# ํ ํฐ ์ ์ฆ๊ฐ
|
92 |
-
max_tokens: int =
|
93 |
-
max_new_tokens: int =
|
|
|
|
|
94 |
|
95 |
|
96 |
def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
|
@@ -119,20 +121,28 @@ def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
|
|
119 |
logging.error(f"Brave search error: {e}")
|
120 |
return []
|
121 |
|
122 |
-
def format_search_results(query: str) -> str:
|
123 |
-
"""๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ํฌ๋งทํ
ํ์ฌ ๋ฐํ
|
124 |
-
|
|
|
|
|
125 |
if not rows:
|
126 |
-
return ""
|
127 |
|
128 |
-
# ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ ๊ฐ๋ตํ๊ฒ ์์ฝ
|
129 |
results = []
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
return "\n".join(results) + "\n"
|
136 |
|
137 |
def extract_keywords_for_search(text: str, language: str = "English") -> List[str]:
|
138 |
"""ํ
์คํธ์์ ๊ฒ์ํ ํค์๋ ์ถ์ถ (๊ฐ์ )"""
|
@@ -157,6 +167,47 @@ def extract_keywords_for_search(text: str, language: str = "English") -> List[st
|
|
157 |
return [max(keywords, key=len)] # ๊ฐ์ฅ ๊ธด ๋จ์ด 1๊ฐ
|
158 |
return []
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
class UnifiedAudioConverter:
|
161 |
def __init__(self, config: ConversationConfig):
|
162 |
self.config = config
|
@@ -310,16 +361,25 @@ class UnifiedAudioConverter:
|
|
310 |
|
311 |
|
312 |
def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
|
313 |
-
"""Build prompt for conversation generation with
|
314 |
-
|
315 |
-
max_text_length =
|
316 |
if len(text) > max_text_length:
|
317 |
text = text[:max_text_length] + "..."
|
318 |
|
319 |
if language == "Korean":
|
|
|
320 |
template = """
|
321 |
{
|
322 |
"conversation": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
{"speaker": "์ค์", "text": ""},
|
324 |
{"speaker": "๋ฏผํธ", "text": ""},
|
325 |
{"speaker": "์ค์", "text": ""},
|
@@ -328,7 +388,6 @@ class UnifiedAudioConverter:
|
|
328 |
}
|
329 |
"""
|
330 |
|
331 |
-
# ๊ฒ์ ์ปจํ
์คํธ๊ฐ ์์ ๋๋ง ํฌํจ
|
332 |
context_part = ""
|
333 |
if search_context:
|
334 |
context_part = f"# ์ต์ ๊ด๋ จ ์ ๋ณด:\n{search_context}\n"
|
@@ -336,22 +395,38 @@ class UnifiedAudioConverter:
|
|
336 |
base_prompt = (
|
337 |
f"# ์๋ณธ ์ฝํ
์ธ :\n{text}\n\n"
|
338 |
f"{context_part}"
|
339 |
-
f"์ ๋ด์ฉ์ผ๋ก
|
340 |
f"## ํต์ฌ ์ง์นจ:\n"
|
341 |
-
f"
|
342 |
-
f"
|
343 |
-
f"-
|
344 |
-
f"-
|
345 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
f"JSON ํ์์ผ๋ก๋ง ๋ฐํ:\n{template}"
|
347 |
)
|
348 |
|
349 |
return base_prompt
|
350 |
|
351 |
else:
|
|
|
352 |
template = """
|
353 |
{
|
354 |
"conversation": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
{"speaker": "Alex", "text": ""},
|
356 |
{"speaker": "Jordan", "text": ""},
|
357 |
{"speaker": "Alex", "text": ""},
|
@@ -367,13 +442,20 @@ class UnifiedAudioConverter:
|
|
367 |
base_prompt = (
|
368 |
f"# Content:\n{text}\n\n"
|
369 |
f"{context_part}"
|
370 |
-
f"Create a
|
371 |
-
f"Guidelines:\n"
|
372 |
-
f"
|
373 |
-
f"
|
374 |
-
f"-
|
375 |
-
f"-
|
376 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
377 |
f"Return JSON only:\n{template}"
|
378 |
)
|
379 |
|
@@ -382,27 +464,30 @@ class UnifiedAudioConverter:
|
|
382 |
|
383 |
|
384 |
def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
|
385 |
-
"""Build messages for local LLM with enhanced
|
386 |
if language == "Korean":
|
387 |
system_message = (
|
388 |
-
"๋น์ ์ ํ๊ตญ ์ต๊ณ ์
|
389 |
-
"
|
390 |
-
"๋งค๋ ฅ์ ์ด๊ณ ์ ์ตํ ๋ํ๋ฅผ ๋ง๋ค์ด๋
๋๋ค.\n\n"
|
391 |
"ํต์ฌ ์์น:\n"
|
392 |
-
"1.
|
393 |
-
"2.
|
394 |
-
"3.
|
395 |
-
"4.
|
396 |
-
"5.
|
397 |
-
"
|
398 |
)
|
399 |
else:
|
400 |
system_message = (
|
401 |
-
"You are an expert
|
402 |
-
"natural conversations that
|
403 |
-
"
|
404 |
-
"
|
405 |
-
"
|
|
|
|
|
|
|
|
|
406 |
)
|
407 |
|
408 |
return [
|
@@ -412,15 +497,14 @@ class UnifiedAudioConverter:
|
|
412 |
|
413 |
@spaces.GPU(duration=120)
|
414 |
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
415 |
-
"""Extract conversation using new local LLM with search
|
416 |
try:
|
417 |
-
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ
|
418 |
search_context = ""
|
419 |
-
if BRAVE_KEY:
|
420 |
try:
|
421 |
keywords = extract_keywords_for_search(text, language)
|
422 |
if keywords:
|
423 |
-
# ์ฒซ ๋ฒ์งธ ํค์๋๋ก ๊ฒ์
|
424 |
search_query = keywords[0] if language == "Korean" else f"{keywords[0]} latest news"
|
425 |
search_context = format_search_results(search_query)
|
426 |
print(f"Search context added for: {search_query}")
|
@@ -433,26 +517,31 @@ class UnifiedAudioConverter:
|
|
433 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
434 |
provider = LlamaCppPythonProvider(self.local_llm)
|
435 |
|
436 |
-
# ๊ฐํ๋
|
437 |
if language == "Korean":
|
438 |
system_message = (
|
439 |
-
"๋น์ ์
|
440 |
-
"
|
441 |
-
"
|
442 |
-
"
|
443 |
-
"
|
444 |
-
"
|
445 |
-
"
|
446 |
-
"
|
|
|
|
|
447 |
)
|
448 |
else:
|
449 |
system_message = (
|
450 |
-
"You are
|
451 |
-
"
|
452 |
-
"
|
453 |
-
"
|
454 |
-
"
|
455 |
-
"
|
|
|
|
|
|
|
456 |
)
|
457 |
|
458 |
agent = LlamaCppAgent(
|
@@ -463,7 +552,7 @@ class UnifiedAudioConverter:
|
|
463 |
)
|
464 |
|
465 |
settings = provider.get_provider_default_settings()
|
466 |
-
settings.temperature = 0.
|
467 |
settings.top_k = 40
|
468 |
settings.top_p = 0.95
|
469 |
settings.max_tokens = self.config.max_tokens # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
@@ -486,7 +575,12 @@ class UnifiedAudioConverter:
|
|
486 |
json_match = re.search(pattern, response)
|
487 |
|
488 |
if json_match:
|
489 |
-
|
|
|
|
|
|
|
|
|
|
|
490 |
else:
|
491 |
raise ValueError("No valid JSON found in local LLM response")
|
492 |
|
@@ -496,24 +590,24 @@ class UnifiedAudioConverter:
|
|
496 |
|
497 |
@spaces.GPU(duration=120)
|
498 |
def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
|
499 |
-
"""Extract conversation using legacy local model
|
500 |
try:
|
501 |
self.initialize_legacy_local_mode()
|
502 |
|
503 |
-
# ๊ฐํ๋
|
504 |
if language == "Korean":
|
505 |
system_message = (
|
506 |
-
"๋น์ ์
|
507 |
-
"
|
508 |
-
"
|
509 |
-
"
|
510 |
-
"์ต์ ์ ๋ณด๋ ์์ฐ์ค๋ฝ๊ฒ ํฌํจ์์ผ์ฃผ์ธ์."
|
511 |
)
|
512 |
else:
|
513 |
system_message = (
|
514 |
-
"You are
|
515 |
-
"Create natural
|
516 |
-
"
|
|
|
517 |
)
|
518 |
|
519 |
chat = [
|
@@ -540,7 +634,7 @@ class UnifiedAudioConverter:
|
|
540 |
streamer=streamer,
|
541 |
max_new_tokens=self.config.max_new_tokens, # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
542 |
do_sample=True,
|
543 |
-
temperature=0.
|
544 |
eos_token_id=terminators,
|
545 |
)
|
546 |
|
@@ -561,35 +655,59 @@ class UnifiedAudioConverter:
|
|
561 |
|
562 |
except Exception as e:
|
563 |
print(f"Legacy local model also failed: {e}")
|
564 |
-
# Return default template
|
565 |
if language == "Korean":
|
566 |
-
return
|
567 |
-
"conversation": [
|
568 |
-
{"speaker": "์ค์", "text": "์๋
ํ์ธ์, ์ฌ๋ฌ๋ถ! ์ค๋๋ ์ ํฌ ํ์บ์คํธ๋ฅผ ์ฐพ์์ฃผ์
์ ์ ๋ง ๊ฐ์ฌํฉ๋๋ค. ๋ฏผํธ์จ, ์ค๋ ์ ๋ง ํฅ๋ฏธ๋ก์ด ์ฃผ์ ๋ฅผ ์ค๋นํด์ฃผ์
จ๋ค๊ณ ๋ค์์ด์."},
|
569 |
-
{"speaker": "๋ฏผํธ", "text": "๋ค, ์๋
ํ์ธ์! ์ค์์จ ๋ง์์ฒ๋ผ ์ค๋์ ์ ๋ง ํฅ๋ฏธ๋ก์ด ์ด์ผ๊ธฐ๋ฅผ ์ค๋นํ์ต๋๋ค. ์ฒญ์ทจ์ ์ฌ๋ฌ๋ถ๋ค๊ป์๋ ๋ง์ ๊ด์ฌ์ ๊ฐ์ง๊ณ ๊ณ์ค ์ฃผ์ ์ธ ๊ฒ ๊ฐ์์."},
|
570 |
-
{"speaker": "์ค์", "text": "์ ๋ง ๊ธฐ๋๋๋๋ฐ์. ๊ทธ๋ฐ๋ฐ ๋ฏผํธ์จ, ์ด ์ฃผ์ ๊ฐ ์์ฆ ์ ์ด๋ ๊ฒ ํ์ ๊ฐ ๋๊ณ ์๋ ๊ฑด๊ฐ์? ์ฒญ์ทจ์ ์ฌ๋ฌ๋ถ๋ค๋ ๊ถ๊ธํดํ์ค ๊ฒ ๊ฐ์์."},
|
571 |
-
{"speaker": "๋ฏผํธ", "text": "์ข์ ์ง๋ฌธ์ด์ธ์, ์ค์์จ. ์ฌ์ค ์ต๊ทผ์ ์ด ๋ถ์ผ์ ๋ง์ ๋ณํ๊ฐ ์์๊ฑฐ๋ ์. ๊ทธ๋ผ ๋ณธ๊ฒฉ์ ์ผ๋ก ํ๋์ฉ ์ฐจ๊ทผ์ฐจ๊ทผ ์ค๋ช
ํด๋๋ฆด๊ฒ์."}
|
572 |
-
]
|
573 |
-
}
|
574 |
else:
|
575 |
-
return
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
583 |
|
584 |
def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
|
585 |
-
"""Extract conversation using API with
|
586 |
if not self.llm_client:
|
587 |
raise RuntimeError("API mode not initialized")
|
588 |
|
589 |
try:
|
590 |
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ
|
591 |
search_context = ""
|
592 |
-
if BRAVE_KEY:
|
593 |
try:
|
594 |
keywords = extract_keywords_for_search(text, language)
|
595 |
if keywords:
|
@@ -599,21 +717,24 @@ class UnifiedAudioConverter:
|
|
599 |
except Exception as e:
|
600 |
print(f"Search failed, continuing without context: {e}")
|
601 |
|
602 |
-
# ๊ฐํ๋
|
603 |
if language == "Korean":
|
604 |
system_message = (
|
605 |
-
"๋น์ ์
|
606 |
-
"
|
607 |
-
"
|
608 |
-
"
|
609 |
-
"
|
|
|
610 |
)
|
611 |
else:
|
612 |
system_message = (
|
613 |
-
"You are
|
614 |
-
"natural
|
615 |
-
"
|
616 |
-
"
|
|
|
|
|
617 |
)
|
618 |
|
619 |
chat_completion = self.llm_client.chat.completions.create(
|
@@ -622,6 +743,7 @@ class UnifiedAudioConverter:
|
|
622 |
{"role": "user", "content": self._build_prompt(text, language, search_context)}
|
623 |
],
|
624 |
model=self.config.api_model_name,
|
|
|
625 |
)
|
626 |
|
627 |
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
@@ -871,18 +993,23 @@ converter = UnifiedAudioConverter(ConversationConfig())
|
|
871 |
|
872 |
|
873 |
async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
874 |
-
"""Main synthesis function - handles
|
875 |
try:
|
876 |
# Extract text based on input type
|
877 |
if input_type == "URL":
|
878 |
if not article_input or not isinstance(article_input, str):
|
879 |
return "Please provide a valid URL.", None
|
880 |
text = converter.fetch_text(article_input)
|
881 |
-
|
882 |
if not article_input:
|
883 |
return "Please upload a PDF file.", None
|
884 |
-
# Gradio returns the file path as a string
|
885 |
text = converter.extract_text_from_pdf(article_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
886 |
|
887 |
# Limit text to max words
|
888 |
words = text.split()
|
@@ -942,7 +1069,7 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
|
|
942 |
|
943 |
# ํ๊ตญ์ด์ธ ๊ฒฝ์ฐ Edge-TTS๋ง ์ฌ์ฉ (๋ค๋ฅธ TTS๋ ํ๊ตญ์ด ์ง์์ด ์ ํ์ )
|
944 |
if language == "Korean" and tts_engine != "Edge-TTS":
|
945 |
-
|
946 |
|
947 |
# Generate audio based on TTS engine
|
948 |
if tts_engine == "Edge-TTS":
|
@@ -997,11 +1124,13 @@ def update_tts_engine_for_korean(language):
|
|
997 |
|
998 |
|
999 |
def toggle_input_visibility(input_type):
|
1000 |
-
"""Toggle visibility of URL input
|
1001 |
if input_type == "URL":
|
1002 |
-
return gr.update(visible=True), gr.update(visible=False)
|
1003 |
-
|
1004 |
-
return gr.update(visible=False), gr.update(visible=True)
|
|
|
|
|
1005 |
|
1006 |
|
1007 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
@@ -1018,29 +1147,30 @@ if LLAMA_CPP_AVAILABLE:
|
|
1018 |
|
1019 |
|
1020 |
# Gradio Interface
|
1021 |
-
with gr.Blocks(theme='soft', title="
|
1022 |
-
gr.Markdown("# ๐๏ธ
|
1023 |
-
gr.Markdown("Convert any article, blog,
|
1024 |
|
1025 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
1026 |
with gr.Row():
|
1027 |
gr.Markdown(f"""
|
1028 |
-
### ๐ค
|
1029 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
1030 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
1031 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
1032 |
-
- **
|
1033 |
- **Search**: {"โ
Brave Search Enabled" if BRAVE_KEY else "โ Brave Search Not Available - Set BSEARCH_API"}
|
|
|
1034 |
""")
|
1035 |
|
1036 |
with gr.Row():
|
1037 |
with gr.Column(scale=3):
|
1038 |
-
# Input type selector
|
1039 |
input_type_selector = gr.Radio(
|
1040 |
-
choices=["URL", "PDF"],
|
1041 |
value="URL",
|
1042 |
label="Input Type",
|
1043 |
-
info="Choose between URL
|
1044 |
)
|
1045 |
|
1046 |
# URL input
|
@@ -1058,8 +1188,17 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
1058 |
visible=False
|
1059 |
)
|
1060 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1061 |
with gr.Column(scale=1):
|
1062 |
-
# ์ธ์ด ์ ํ
|
1063 |
language_selector = gr.Radio(
|
1064 |
choices=["English", "Korean"],
|
1065 |
value="English",
|
@@ -1085,17 +1224,22 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
1085 |
)
|
1086 |
|
1087 |
gr.Markdown("""
|
1088 |
-
|
1089 |
-
-
|
1090 |
-
-
|
|
|
|
|
1091 |
|
1092 |
-
|
1093 |
-
-
|
|
|
|
|
1094 |
|
1095 |
-
|
1096 |
-
-
|
1097 |
-
-
|
1098 |
-
-
|
|
|
1099 |
""")
|
1100 |
|
1101 |
convert_btn = gr.Button("๐ฏ Generate Conversation / ๋ํ ์์ฑ", variant="primary", size="lg")
|
@@ -1104,10 +1248,10 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
1104 |
with gr.Column():
|
1105 |
conversation_output = gr.Textbox(
|
1106 |
label="Generated Conversation (Editable) / ์์ฑ๋ ๋ํ (ํธ์ง ๊ฐ๋ฅ)",
|
1107 |
-
lines=
|
1108 |
-
max_lines=
|
1109 |
interactive=True,
|
1110 |
-
placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์์ฑ๋ ๋ํ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค. ์ค๋์ค ์์ฑ ์ ์ ํธ์งํ ์ ์์ต๋๋ค.\n\n
|
1111 |
info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ์์ ๋ฐ๋ผ ๋ํ๋ฅผ ํธ์งํ์ธ์. ํ์: 'ํ์ ์ด๋ฆ: ํ
์คํธ'"
|
1112 |
)
|
1113 |
|
@@ -1134,8 +1278,9 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
1134 |
gr.Examples(
|
1135 |
examples=[
|
1136 |
["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
|
1137 |
-
["
|
1138 |
["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
|
|
|
1139 |
],
|
1140 |
inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
|
1141 |
outputs=[conversation_output, status_output],
|
@@ -1143,11 +1288,11 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
1143 |
cache_examples=False,
|
1144 |
)
|
1145 |
|
1146 |
-
# Input type change handler
|
1147 |
input_type_selector.change(
|
1148 |
fn=toggle_input_visibility,
|
1149 |
inputs=[input_type_selector],
|
1150 |
-
outputs=[url_input, pdf_input]
|
1151 |
)
|
1152 |
|
1153 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
@@ -1158,18 +1303,20 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
|
1158 |
)
|
1159 |
|
1160 |
# ์ด๋ฒคํธ ์ฐ๊ฒฐ - ์์ ๋ ๋ถ๋ถ
|
1161 |
-
def get_article_input(input_type, url_input, pdf_input):
|
1162 |
"""Get the appropriate input based on input type"""
|
1163 |
if input_type == "URL":
|
1164 |
return url_input
|
1165 |
-
|
1166 |
return pdf_input
|
|
|
|
|
1167 |
|
1168 |
convert_btn.click(
|
1169 |
-
fn=lambda input_type, url_input, pdf_input, mode, tts, lang: synthesize_sync(
|
1170 |
-
get_article_input(input_type, url_input, pdf_input), input_type, mode, tts, lang
|
1171 |
),
|
1172 |
-
inputs=[input_type_selector, url_input, pdf_input, mode_selector, tts_selector, language_selector],
|
1173 |
outputs=[conversation_output, status_output]
|
1174 |
)
|
1175 |
|
|
|
81 |
|
82 |
@dataclass
|
83 |
class ConversationConfig:
|
84 |
+
max_words: int = 6000 # 4000์์ 6000์ผ๋ก ์ฆ๊ฐ (1.5๋ฐฐ)
|
85 |
prefix_url: str = "https://r.jina.ai/"
|
86 |
api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
|
87 |
legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
|
|
|
89 |
local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
90 |
local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
|
91 |
# ํ ํฐ ์ ์ฆ๊ฐ
|
92 |
+
max_tokens: int = 4500 # 3000์์ 4500์ผ๋ก ์ฆ๊ฐ (1.5๋ฐฐ)
|
93 |
+
max_new_tokens: int = 9000 # 6000์์ 9000์ผ๋ก ์ฆ๊ฐ (1.5๋ฐฐ)
|
94 |
+
min_conversation_turns: int = 12 # ์ต์ ๋ํ ํด ์
|
95 |
+
max_conversation_turns: int = 15 # ์ต๋ ๋ํ ํด ์
|
96 |
|
97 |
|
98 |
def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
|
|
|
121 |
logging.error(f"Brave search error: {e}")
|
122 |
return []
|
123 |
|
124 |
+
def format_search_results(query: str, for_keyword: bool = False) -> str:
|
125 |
+
"""๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ํฌ๋งทํ
ํ์ฌ ๋ฐํ"""
|
126 |
+
# ํค์๋ ๊ฒ์์ ๊ฒฝ์ฐ ๋ ๋ง์ ๊ฒฐ๊ณผ ์ฌ์ฉ
|
127 |
+
count = 5 if for_keyword else 3
|
128 |
+
rows = brave_search(query, count, freshness_days=7 if not for_keyword else None)
|
129 |
if not rows:
|
130 |
+
return ""
|
131 |
|
|
|
132 |
results = []
|
133 |
+
# ํค์๋ ๊ฒ์์ ๊ฒฝ์ฐ ๋ ์์ธํ ์ ๋ณด ํฌํจ
|
134 |
+
max_results = 4 if for_keyword else 2
|
135 |
+
for r in rows[:max_results]:
|
136 |
+
if for_keyword:
|
137 |
+
# ํค์๋ ๊ฒ์์ ๋ ๊ธด ์ค๋ํซ ์ฌ์ฉ
|
138 |
+
snippet = r['snippet'][:200] + "..." if len(r['snippet']) > 200 else r['snippet']
|
139 |
+
results.append(f"**{r['title']}**\n{snippet}\nSource: {r['host']}")
|
140 |
+
else:
|
141 |
+
# ์ผ๋ฐ ๊ฒ์์ ์งง์ ์ค๋ํซ
|
142 |
+
snippet = r['snippet'][:100] + "..." if len(r['snippet']) > 100 else r['snippet']
|
143 |
+
results.append(f"- {r['title']}: {snippet}")
|
144 |
|
145 |
+
return "\n\n".join(results) + "\n"
|
146 |
|
147 |
def extract_keywords_for_search(text: str, language: str = "English") -> List[str]:
|
148 |
"""ํ
์คํธ์์ ๊ฒ์ํ ํค์๋ ์ถ์ถ (๊ฐ์ )"""
|
|
|
167 |
return [max(keywords, key=len)] # ๊ฐ์ฅ ๊ธด ๋จ์ด 1๊ฐ
|
168 |
return []
|
169 |
|
170 |
+
def search_and_compile_content(keyword: str, language: str = "English") -> str:
|
171 |
+
"""ํค์๋๋ก ๊ฒ์ํ์ฌ ์ฝํ
์ธ ์ปดํ์ผ"""
|
172 |
+
if not BRAVE_KEY:
|
173 |
+
return f"Search API not available. Using keyword: {keyword}"
|
174 |
+
|
175 |
+
# ์ธ์ด์ ๋ฐ๋ฅธ ๊ฒ์ ์ฟผ๋ฆฌ ์กฐ์
|
176 |
+
if language == "Korean":
|
177 |
+
queries = [
|
178 |
+
f"{keyword} ์ต์ ๋ด์ค",
|
179 |
+
f"{keyword} ์ ๋ณด",
|
180 |
+
f"{keyword} ํธ๋ ๋ 2024"
|
181 |
+
]
|
182 |
+
else:
|
183 |
+
queries = [
|
184 |
+
f"{keyword} latest news",
|
185 |
+
f"{keyword} explained",
|
186 |
+
f"{keyword} trends 2024"
|
187 |
+
]
|
188 |
+
|
189 |
+
all_content = []
|
190 |
+
|
191 |
+
for query in queries:
|
192 |
+
results = brave_search(query, count=3)
|
193 |
+
for r in results[:2]: # ๊ฐ ์ฟผ๋ฆฌ๋น ์์ 2๊ฐ ๊ฒฐ๊ณผ
|
194 |
+
content = f"**{r['title']}**\n{r['snippet']}\n"
|
195 |
+
all_content.append(content)
|
196 |
+
|
197 |
+
if not all_content:
|
198 |
+
return f"No search results found for: {keyword}"
|
199 |
+
|
200 |
+
# ์ปดํ์ผ๋ ์ฝํ
์ธ ๋ฐํ
|
201 |
+
compiled = "\n\n".join(all_content)
|
202 |
+
|
203 |
+
# ํค์๋ ๊ธฐ๋ฐ ์๊ฐ ์ถ๊ฐ
|
204 |
+
if language == "Korean":
|
205 |
+
intro = f"'{keyword}'์ ๋ํ ์ต์ ์ ๋ณด์ ํธ๋ ๋:\n\n"
|
206 |
+
else:
|
207 |
+
intro = f"Latest information and trends about '{keyword}':\n\n"
|
208 |
+
|
209 |
+
return intro + compiled
|
210 |
+
|
211 |
class UnifiedAudioConverter:
|
212 |
def __init__(self, config: ConversationConfig):
|
213 |
self.config = config
|
|
|
361 |
|
362 |
|
363 |
def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
|
364 |
+
"""Build prompt for conversation generation with enhanced radio talk show style"""
|
365 |
+
# ํ
์คํธ ๊ธธ์ด ์ ํ
|
366 |
+
max_text_length = 4500 if search_context else 6000
|
367 |
if len(text) > max_text_length:
|
368 |
text = text[:max_text_length] + "..."
|
369 |
|
370 |
if language == "Korean":
|
371 |
+
# ๋ํ ํ
ํ๋ฆฟ์ ๋ ๋ง์ ํด์ผ๋ก ํ์ฅ
|
372 |
template = """
|
373 |
{
|
374 |
"conversation": [
|
375 |
+
{"speaker": "์ค์", "text": ""},
|
376 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
377 |
+
{"speaker": "์ค์", "text": ""},
|
378 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
379 |
+
{"speaker": "์ค์", "text": ""},
|
380 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
381 |
+
{"speaker": "์ค์", "text": ""},
|
382 |
+
{"speaker": "๋ฏผํธ", "text": ""},
|
383 |
{"speaker": "์ค์", "text": ""},
|
384 |
{"speaker": "๋ฏผํธ", "text": ""},
|
385 |
{"speaker": "์ค์", "text": ""},
|
|
|
388 |
}
|
389 |
"""
|
390 |
|
|
|
391 |
context_part = ""
|
392 |
if search_context:
|
393 |
context_part = f"# ์ต์ ๊ด๋ จ ์ ๋ณด:\n{search_context}\n"
|
|
|
395 |
base_prompt = (
|
396 |
f"# ์๋ณธ ์ฝํ
์ธ :\n{text}\n\n"
|
397 |
f"{context_part}"
|
398 |
+
f"์ ๋ด์ฉ์ผ๋ก ๋ผ๋์ค ๋๋ด ํ๋ก๊ทธ๋จ ๋๋ณธ์ ์์ฑํด์ฃผ์ธ์.\n\n"
|
399 |
f"## ํต์ฌ ์ง์นจ:\n"
|
400 |
+
f"1. **๋ํ ์คํ์ผ**: ์ค์ ๋ผ๋์ค ๋๋ด์ฒ๋ผ ์์ฃผ ์์ฐ์ค๋ฝ๊ณ ํธ์ํ ๊ตฌ์ด์ฒด ์ฌ์ฉ\n"
|
401 |
+
f"2. **ํ์ ์ญํ **:\n"
|
402 |
+
f" - ์ค์: ์งํ์/ํธ์คํธ (์ฃผ๋ก ์ง๋ฌธํ๊ณ ๋ํ๋ฅผ ์ด๋์ด๊ฐ)\n"
|
403 |
+
f" - ๋ฏผํธ: ์ ๋ฌธ๊ฐ (์ง๋ฌธ์ ๋ตํ๊ณ ์ค๋ช
ํจ)\n"
|
404 |
+
f"3. **๋ํ ํจํด**:\n"
|
405 |
+
f" - ์ค์๋ ์ฃผ๋ก ์งง์ ์ง๋ฌธ์ด๋ ๋ฆฌ์ก์
(\"์, ๊ทธ๋ ๊ตฐ์\", \"ํฅ๋ฏธ๋กญ๋ค์\", \"๊ทธ๋ผ ~๋ ์ด๋ค๊ฐ์?\")\n"
|
406 |
+
f" - ๋ฏผํธ๋ 1-2๋ฌธ์ฅ์ผ๋ก ๊ฐ๊ฒฐํ๊ฒ ๋ต๋ณ\n"
|
407 |
+
f" - ์ ๋ ํ ์ฌ๋์ด 3๋ฌธ์ฅ ์ด์ ์ฐ์์ผ๋ก ๋งํ์ง ์์\n"
|
408 |
+
f"4. **์์ฐ์ค๋ฌ์**:\n"
|
409 |
+
f" - \"์...\", \"์...\", \"๋ค,\" ๊ฐ์ ์ถ์์ ์ฌ์ฉ\n"
|
410 |
+
f" - ๋๋ก๋ ์๋๋ฐฉ ๋ง์ ์งง๊ฒ ๋ฐ์ (\"๋ง์์\", \"๊ทธ๋ ์ฃ \")\n"
|
411 |
+
f"5. **ํ์ ๊ท์น**: ์๋ก ์กด๋๋ง ์ฌ์ฉ, 12-15ํ ๋ํ ๊ตํ\n\n"
|
412 |
f"JSON ํ์์ผ๋ก๋ง ๋ฐํ:\n{template}"
|
413 |
)
|
414 |
|
415 |
return base_prompt
|
416 |
|
417 |
else:
|
418 |
+
# ์์ด ํ
ํ๋ฆฟ๋ ํ์ฅ
|
419 |
template = """
|
420 |
{
|
421 |
"conversation": [
|
422 |
+
{"speaker": "Alex", "text": ""},
|
423 |
+
{"speaker": "Jordan", "text": ""},
|
424 |
+
{"speaker": "Alex", "text": ""},
|
425 |
+
{"speaker": "Jordan", "text": ""},
|
426 |
+
{"speaker": "Alex", "text": ""},
|
427 |
+
{"speaker": "Jordan", "text": ""},
|
428 |
+
{"speaker": "Alex", "text": ""},
|
429 |
+
{"speaker": "Jordan", "text": ""},
|
430 |
{"speaker": "Alex", "text": ""},
|
431 |
{"speaker": "Jordan", "text": ""},
|
432 |
{"speaker": "Alex", "text": ""},
|
|
|
442 |
base_prompt = (
|
443 |
f"# Content:\n{text}\n\n"
|
444 |
f"{context_part}"
|
445 |
+
f"Create a natural radio talk show conversation.\n\n"
|
446 |
+
f"## Key Guidelines:\n"
|
447 |
+
f"1. **Style**: Natural, conversational English like a real radio show\n"
|
448 |
+
f"2. **Roles**:\n"
|
449 |
+
f" - Alex: Host (asks questions, guides conversation)\n"
|
450 |
+
f" - Jordan: Expert (answers, explains)\n"
|
451 |
+
f"3. **Pattern**:\n"
|
452 |
+
f" - Alex mostly asks short questions or reacts (\"I see\", \"Interesting\", \"What about...?\")\n"
|
453 |
+
f" - Jordan gives brief 1-2 sentence answers\n"
|
454 |
+
f" - Never more than 2-3 sentences per turn\n"
|
455 |
+
f"4. **Natural flow**:\n"
|
456 |
+
f" - Use fillers like \"Well,\" \"You know,\" \"Actually,\"\n"
|
457 |
+
f" - Short reactions (\"Right\", \"Exactly\")\n"
|
458 |
+
f"5. **Length**: 12-15 exchanges total\n\n"
|
459 |
f"Return JSON only:\n{template}"
|
460 |
)
|
461 |
|
|
|
464 |
|
465 |
|
466 |
def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
|
467 |
+
"""Build messages for local LLM with enhanced radio talk show style"""
|
468 |
if language == "Korean":
|
469 |
system_message = (
|
470 |
+
"๋น์ ์ ํ๊ตญ ์ต๊ณ ์ ๋ผ๋์ค ๋๋ด ํ๋ก๊ทธ๋จ ์๊ฐ์
๋๋ค. "
|
471 |
+
"์ค์ ๋ผ๋์ค ๋ฐฉ์ก์ฒ๋ผ ์์ฐ์ค๋ฝ๊ณ ์๋๊ฐ ์๋ ๋ํ๋ฅผ ๋ง๋ค์ด๋
๋๋ค.\n\n"
|
|
|
472 |
"ํต์ฌ ์์น:\n"
|
473 |
+
"1. ๋ผ๋์ค ์งํ์(์ค์)๋ ์ฃผ๋ก ์งง์ ์ง๋ฌธ๊ณผ ๋ฆฌ์ก์
์ผ๋ก ๋ํ๋ฅผ ์ด๋์ด๊ฐ๋๋ค\n"
|
474 |
+
"2. ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋ ์ง๋ฌธ์ ๊ฐ๊ฒฐํ๊ณ ์ดํดํ๊ธฐ ์ฝ๊ฒ ๋ตํฉ๋๋ค\n"
|
475 |
+
"3. ํ ๋ฒ์ ๋๋ฌด ๋ง์ ์ ๋ณด๋ฅผ ์ ๋ฌํ์ง ์๊ณ , ๋ํ๋ฅผ ํตํด ์ ์ง์ ์ผ๋ก ํ์ด๊ฐ๋๋ค\n"
|
476 |
+
"4. \"์...\", \"์...\", \"๋ค,\" ๋ฑ ์์ฐ์ค๋ฌ์ด ๊ตฌ์ด์ฒด ํํ์ ์ฌ์ฉํฉ๋๋ค\n"
|
477 |
+
"5. ์ฒญ์ทจ์๊ฐ ๋ผ๋์ค๋ฅผ ๋ฃ๋ ๊ฒ์ฒ๋ผ ๋ชฐ์
ํ ์ ์๋๋ก ์์ํ๊ฒ ์์ฑํฉ๋๋ค\n"
|
478 |
+
"6. ๋ฐ๋์ ์๋ก ์กด๋๋ง์ ์ฌ์ฉํ๋ฉฐ, ์ ์คํ๋ฉด์๋ ์น๊ทผํ ํค์ ์ ์งํฉ๋๋ค"
|
479 |
)
|
480 |
else:
|
481 |
system_message = (
|
482 |
+
"You are an expert radio talk show scriptwriter who creates engaging, "
|
483 |
+
"natural conversations that sound like real radio broadcasts.\n\n"
|
484 |
+
"Key principles:\n"
|
485 |
+
"1. The host (Alex) mainly asks short questions and gives reactions to guide the conversation\n"
|
486 |
+
"2. The expert (Jordan) answers concisely and clearly\n"
|
487 |
+
"3. Information is revealed gradually through dialogue, not in long monologues\n"
|
488 |
+
"4. Use natural speech patterns with fillers like 'Well,' 'You know,' etc.\n"
|
489 |
+
"5. Make it sound like an actual radio show that listeners would enjoy\n"
|
490 |
+
"6. Keep each turn brief - no more than 2-3 sentences"
|
491 |
)
|
492 |
|
493 |
return [
|
|
|
497 |
|
498 |
@spaces.GPU(duration=120)
|
499 |
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
500 |
+
"""Extract conversation using new local LLM with enhanced search and style"""
|
501 |
try:
|
502 |
+
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ (ํค์๋ ๊ธฐ๋ฐ์ด ์๋ ๊ฒฝ์ฐ)
|
503 |
search_context = ""
|
504 |
+
if BRAVE_KEY and not text.startswith("Keyword-based content:"):
|
505 |
try:
|
506 |
keywords = extract_keywords_for_search(text, language)
|
507 |
if keywords:
|
|
|
508 |
search_query = keywords[0] if language == "Korean" else f"{keywords[0]} latest news"
|
509 |
search_context = format_search_results(search_query)
|
510 |
print(f"Search context added for: {search_query}")
|
|
|
517 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
518 |
provider = LlamaCppPythonProvider(self.local_llm)
|
519 |
|
520 |
+
# ๊ฐํ๋ ๋ผ๋์ค ์คํ์ผ ์์คํ
๋ฉ์์ง
|
521 |
if language == "Korean":
|
522 |
system_message = (
|
523 |
+
"๋น์ ์ ํ๊ตญ์ ์ธ๊ธฐ ๋ผ๋์ค ๋๋ด ํ๋ก๊ทธ๋จ ์ ๋ฌธ ์๊ฐ์
๋๋ค. "
|
524 |
+
"์ฒญ์ทจ์๋ค์ด ์ค์ ๋ผ๋์ค๋ฅผ ๋ฃ๋ ๊ฒ์ฒ๋ผ ๋ชฐ์
ํ ์ ์๋ ์์ฐ์ค๋ฌ์ด ๋ํ๋ฅผ ๋ง๋ญ๋๋ค.\n\n"
|
525 |
+
"์์ฑ ๊ท์น:\n"
|
526 |
+
"1. ์งํ์(์ค์)๋ ์ฃผ๋ก ์งง์ ์ง๋ฌธ์ผ๋ก ๋ํ๋ฅผ ์ด๋์ด๊ฐ์ธ์ (\"๊ทธ๋ ๊ตฐ์\", \"์ด๋ค ์ ์ด ํน๋ณํ๊ฐ์?\", \"์ฒญ์ทจ์๋ถ๋ค์ด ๊ถ๊ธํดํ์ค ๊ฒ ๊ฐ์๋ฐ์\")\n"
|
527 |
+
"2. ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋ 1-2๋ฌธ์ฅ์ผ๋ก ๊ฐ๊ฒฐํ๊ฒ ๋ต๋ณํ์ธ์\n"
|
528 |
+
"3. ์ ๋ ํ ์ฌ๋์ด 3๋ฌธ์ฅ ์ด์ ์ฐ์์ผ๋ก ๋งํ์ง ๋ง์ธ์\n"
|
529 |
+
"4. ๊ตฌ์ด์ฒด์ ์ถ์์๋ฅผ ์์ฐ์ค๋ฝ๊ฒ ์ฌ์ฉํ์ธ์\n"
|
530 |
+
"5. ๋ฐ๋์ ์๋ก ์กด๋๋ง์ ์ฌ์ฉํ์ธ์\n"
|
531 |
+
"6. 12-15ํ์ ๋ํ ๊ตํ์ผ๋ก ๊ตฌ์ฑํ์ธ์\n"
|
532 |
+
"7. JSON ํ์์ผ๋ก๋ง ์๋ตํ์ธ์"
|
533 |
)
|
534 |
else:
|
535 |
system_message = (
|
536 |
+
"You are a professional radio talk show scriptwriter creating engaging, "
|
537 |
+
"natural conversations that sound like real radio broadcasts.\n\n"
|
538 |
+
"Writing rules:\n"
|
539 |
+
"1. Host (Alex) mainly asks short questions to guide the conversation (\"I see\", \"What makes it special?\", \"Our listeners might wonder...\")\n"
|
540 |
+
"2. Expert (Jordan) answers in 1-2 concise sentences\n"
|
541 |
+
"3. Never have one person speak more than 2-3 sentences at once\n"
|
542 |
+
"4. Use natural speech patterns and fillers\n"
|
543 |
+
"5. Create 12-15 conversation exchanges\n"
|
544 |
+
"6. Respond only in JSON format"
|
545 |
)
|
546 |
|
547 |
agent = LlamaCppAgent(
|
|
|
552 |
)
|
553 |
|
554 |
settings = provider.get_provider_default_settings()
|
555 |
+
settings.temperature = 0.85 # ์ฝ๊ฐ ๋์ฌ์ ๋ ์์ฐ์ค๋ฌ์ด ๋ํ ์์ฑ
|
556 |
settings.top_k = 40
|
557 |
settings.top_p = 0.95
|
558 |
settings.max_tokens = self.config.max_tokens # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
|
|
575 |
json_match = re.search(pattern, response)
|
576 |
|
577 |
if json_match:
|
578 |
+
conversation_data = json.loads(json_match.group())
|
579 |
+
# ๋ํ ๊ธธ์ด ํ์ธ ๋ฐ ์กฐ์
|
580 |
+
if len(conversation_data["conversation"]) < self.config.min_conversation_turns:
|
581 |
+
print(f"Conversation too short ({len(conversation_data['conversation'])} turns), regenerating...")
|
582 |
+
# ์ฌ์๋ ๋ก์ง ์ถ๊ฐ ๊ฐ๋ฅ
|
583 |
+
return conversation_data
|
584 |
else:
|
585 |
raise ValueError("No valid JSON found in local LLM response")
|
586 |
|
|
|
590 |
|
591 |
@spaces.GPU(duration=120)
|
592 |
def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
|
593 |
+
"""Extract conversation using legacy local model with enhanced style"""
|
594 |
try:
|
595 |
self.initialize_legacy_local_mode()
|
596 |
|
597 |
+
# ๊ฐํ๋ ๋ผ๋์ค ์คํ์ผ ์์คํ
๋ฉ์์ง
|
598 |
if language == "Korean":
|
599 |
system_message = (
|
600 |
+
"๋น์ ์ ๋ผ๋์ค ๋๋ด ํ๋ก๊ทธ๋จ ์๊ฐ์
๋๋ค. "
|
601 |
+
"์งํ์(์ค์)๋ ์งง์ ์ง๋ฌธ์ผ๋ก, ์ ๋ฌธ๊ฐ(๋ฏผํธ)๋ ๊ฐ๊ฒฐํ ๋ต๋ณ์ผ๋ก "
|
602 |
+
"์์ฐ์ค๋ฌ์ด ๋ํ๋ฅผ ๋ง๋์ธ์. ์๋ก ์กด๋๋ง์ ์ฌ์ฉํ๊ณ , "
|
603 |
+
"ํ ๋ฒ์ 2-3๋ฌธ์ฅ ์ด๋ด๋ก ๋งํ์ธ์. 12-15ํ ๋ํ ๊ตํ์ผ๋ก ๊ตฌ์ฑํ์ธ์."
|
|
|
604 |
)
|
605 |
else:
|
606 |
system_message = (
|
607 |
+
"You are a radio talk show scriptwriter. "
|
608 |
+
"Create natural dialogue where the host (Alex) asks short questions "
|
609 |
+
"and the expert (Jordan) gives brief answers. "
|
610 |
+
"Keep each turn to 2-3 sentences max. Create 12-15 exchanges."
|
611 |
)
|
612 |
|
613 |
chat = [
|
|
|
634 |
streamer=streamer,
|
635 |
max_new_tokens=self.config.max_new_tokens, # ์ฆ๊ฐ๋ ํ ํฐ ์ ์ฌ์ฉ
|
636 |
do_sample=True,
|
637 |
+
temperature=0.85,
|
638 |
eos_token_id=terminators,
|
639 |
)
|
640 |
|
|
|
655 |
|
656 |
except Exception as e:
|
657 |
print(f"Legacy local model also failed: {e}")
|
658 |
+
# Return enhanced default template
|
659 |
if language == "Korean":
|
660 |
+
return self._get_default_korean_conversation()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
661 |
else:
|
662 |
+
return self._get_default_english_conversation()
|
663 |
+
|
664 |
+
def _get_default_korean_conversation(self) -> Dict:
|
665 |
+
"""๋ ๊ธด ๊ธฐ๋ณธ ํ๊ตญ์ด ๋ํ ํ
ํ๋ฆฟ"""
|
666 |
+
return {
|
667 |
+
"conversation": [
|
668 |
+
{"speaker": "์ค์", "text": "์๋
ํ์ธ์, ์ฌ๋ฌ๋ถ! ์ค๋๋ ์ ํฌ ํ์บ์คํธ๋ฅผ ์ฐพ์์ฃผ์
์ ์ ๋ง ๊ฐ์ฌํฉ๋๋ค."},
|
669 |
+
{"speaker": "๋ฏผํธ", "text": "๋ค, ์๋
ํ์ธ์! ์ค๋ ์ ๋ง ํฅ๋ฏธ๋ก์ด ์ฃผ์ ๋ฅผ ์ค๋นํ์ต๋๋ค."},
|
670 |
+
{"speaker": "์ค์", "text": "์, ๊ทธ๋์? ์ด๋ค ๋ด์ฉ์ธ์ง ์ ๋ง ๊ถ๊ธํ๋ฐ์?"},
|
671 |
+
{"speaker": "๋ฏผํธ", "text": "์ค๋์ ์ต๊ทผ ๋ง์ ๋ถ๋ค์ด ๊ด์ฌ์ ๊ฐ์ง๊ณ ๊ณ์ ์ฃผ์ ์ ๋ํด ์ด์ผ๊ธฐํด๋ณผ๊น ํด์."},
|
672 |
+
{"speaker": "์ค์", "text": "์, ์์ฆ ์ ๋ง ํ์ ๊ฐ ๋๊ณ ์์ฃ . ๊ตฌ์ฒด์ ์ผ๋ก ์ด๋ค ์ธก๋ฉด์ ๋ค๋ฃฐ ์์ ์ด์ ๊ฐ์?"},
|
673 |
+
{"speaker": "๋ฏผํธ", "text": "๋ค, ๋จผ์ ๊ธฐ๋ณธ์ ์ธ ๊ฐ๋
๋ถํฐ ์ฐจ๊ทผ์ฐจ๊ทผ ์ค๋ช
๋๋ฆฌ๊ณ , ์ค์ํ์ ์ด๋ป๊ฒ ์ ์ฉํ ์ ์๋์ง ์์๋ณผ๊ฒ์."},
|
674 |
+
{"speaker": "์ค์", "text": "์ข์์! ์ฒญ์ทจ์๋ถ๋ค๋ ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ค๋ช
ํด์ฃผ์ค ๊ฑฐ์ฃ ?"},
|
675 |
+
{"speaker": "๋ฏผํธ", "text": "๋ฌผ๋ก ์ด์ฃ . ์ต๋ํ ์ฝ๊ณ ์ฌ๋ฏธ์๊ฒ ํ์ด์ ์ค๋ช
๋๋ฆด๊ฒ์."},
|
676 |
+
{"speaker": "์ค์", "text": "๊ทธ๋ผ ๋ณธ๊ฒฉ์ ์ผ๋ก ์์ํด๋ณผ๊น์?"},
|
677 |
+
{"speaker": "๋ฏผํธ", "text": "๋ค, ์ข์ต๋๋ค. ์ฐ์ ์ด ์ฃผ์ ๊ฐ ์ ์ค์ํ์ง๋ถํฐ ๋ง์๋๋ฆด๊ฒ์."},
|
678 |
+
{"speaker": "์ค์", "text": "์, ๋ง์์. ๊ทธ ๋ถ๋ถ์ด ์ ๋ง ์ค์ํ์ฃ ."},
|
679 |
+
{"speaker": "๋ฏผํธ", "text": "์ต๊ทผ ์ฐ๊ตฌ ๊ฒฐ๊ณผ๋ฅผ ๋ณด๋ฉด ์ ๋ง ๋๋ผ์ด ๋ฐ๊ฒฌ๋ค์ด ๋ง์์ด์."}
|
680 |
+
]
|
681 |
+
}
|
682 |
+
|
683 |
+
def _get_default_english_conversation(self) -> Dict:
|
684 |
+
"""Enhanced default English conversation template"""
|
685 |
+
return {
|
686 |
+
"conversation": [
|
687 |
+
{"speaker": "Alex", "text": "Welcome everyone to our podcast! We have a fascinating topic today."},
|
688 |
+
{"speaker": "Jordan", "text": "Thanks, Alex. I'm excited to dive into this subject with our listeners."},
|
689 |
+
{"speaker": "Alex", "text": "So, what makes this topic particularly relevant right now?"},
|
690 |
+
{"speaker": "Jordan", "text": "Well, there have been some significant developments recently that everyone should know about."},
|
691 |
+
{"speaker": "Alex", "text": "Interesting! Can you break it down for us?"},
|
692 |
+
{"speaker": "Jordan", "text": "Absolutely. Let me start with the basics and build from there."},
|
693 |
+
{"speaker": "Alex", "text": "That sounds perfect. Our listeners will appreciate that approach."},
|
694 |
+
{"speaker": "Jordan", "text": "So, first, let's understand what we're really talking about here."},
|
695 |
+
{"speaker": "Alex", "text": "Right, the fundamentals are crucial."},
|
696 |
+
{"speaker": "Jordan", "text": "Exactly. And once we grasp that, the rest becomes much clearer."},
|
697 |
+
{"speaker": "Alex", "text": "I'm already learning something new! What's next?"},
|
698 |
+
{"speaker": "Jordan", "text": "Now, here's where it gets really interesting..."}
|
699 |
+
]
|
700 |
+
}
|
701 |
|
702 |
def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
|
703 |
+
"""Extract conversation using API with enhanced radio style"""
|
704 |
if not self.llm_client:
|
705 |
raise RuntimeError("API mode not initialized")
|
706 |
|
707 |
try:
|
708 |
# ๊ฒ์ ์ปจํ
์คํธ ์์ฑ
|
709 |
search_context = ""
|
710 |
+
if BRAVE_KEY and not text.startswith("Keyword-based content:"):
|
711 |
try:
|
712 |
keywords = extract_keywords_for_search(text, language)
|
713 |
if keywords:
|
|
|
717 |
except Exception as e:
|
718 |
print(f"Search failed, continuing without context: {e}")
|
719 |
|
720 |
+
# ๊ฐํ๋ ๋ผ๋์ค ์คํ์ผ ํ๋กฌํํธ
|
721 |
if language == "Korean":
|
722 |
system_message = (
|
723 |
+
"๋น์ ์ ํ๊ตญ์ ์ธ๊ธฐ ๋ผ๋์ค ๋๋ด ํ๋ก๊ทธ๋จ ์๊ฐ์
๋๋ค. "
|
724 |
+
"์ค์ ๋ผ๋์ค ๋ฐฉ์ก์ฒ๋ผ ์์ฐ์ค๋ฝ๊ณ ํธ์ํ ๋ํ๋ฅผ ๋ง๋์ธ์.\n"
|
725 |
+
"์ค์(์งํ์)๋ ์ฃผ๋ก ์งง์ ์ง๋ฌธ๊ณผ ๋ฆฌ์ก์
์ผ๋ก ๋ํ๋ฅผ ์ด๋๊ณ , "
|
726 |
+
"๋ฏผํธ(์ ๋ฌธ๊ฐ)๋ 1-2๋ฌธ์ฅ์ผ๋ก ๊ฐ๊ฒฐํ๊ฒ ๋ต๋ณํฉ๋๋ค. "
|
727 |
+
"๊ตฌ์ด์ฒด์ ์ถ์์๋ฅผ ์ฌ์ฉํ๊ณ , ๋ฐ๋์ ์๋ก ์กด๋๋ง์ ์ฌ์ฉํ์ธ์. "
|
728 |
+
"12-15ํ์ ๋ํ ๊ตํ์ผ๋ก ๊ตฌ์ฑํ์ธ์."
|
729 |
)
|
730 |
else:
|
731 |
system_message = (
|
732 |
+
"You are a professional radio talk show scriptwriter. "
|
733 |
+
"Create natural, engaging dialogue like a real radio broadcast. "
|
734 |
+
"Alex (host) mainly asks short questions and gives reactions, "
|
735 |
+
"while Jordan (expert) answers in 1-2 concise sentences. "
|
736 |
+
"Use conversational language with natural fillers. "
|
737 |
+
"Create 12-15 conversation exchanges."
|
738 |
)
|
739 |
|
740 |
chat_completion = self.llm_client.chat.completions.create(
|
|
|
743 |
{"role": "user", "content": self._build_prompt(text, language, search_context)}
|
744 |
],
|
745 |
model=self.config.api_model_name,
|
746 |
+
temperature=0.85,
|
747 |
)
|
748 |
|
749 |
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
|
|
993 |
|
994 |
|
995 |
async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
996 |
+
"""Main synthesis function - handles URL, PDF, and Keyword inputs"""
|
997 |
try:
|
998 |
# Extract text based on input type
|
999 |
if input_type == "URL":
|
1000 |
if not article_input or not isinstance(article_input, str):
|
1001 |
return "Please provide a valid URL.", None
|
1002 |
text = converter.fetch_text(article_input)
|
1003 |
+
elif input_type == "PDF":
|
1004 |
if not article_input:
|
1005 |
return "Please upload a PDF file.", None
|
|
|
1006 |
text = converter.extract_text_from_pdf(article_input)
|
1007 |
+
else: # Keyword
|
1008 |
+
if not article_input or not isinstance(article_input, str):
|
1009 |
+
return "Please provide a keyword or topic.", None
|
1010 |
+
# ํค์๋๋ก ๊ฒ์ํ์ฌ ์ฝํ
์ธ ์์ฑ
|
1011 |
+
text = search_and_compile_content(article_input, language)
|
1012 |
+
text = f"Keyword-based content:\n{text}" # ๋ง์ปค ์ถ๊ฐ
|
1013 |
|
1014 |
# Limit text to max words
|
1015 |
words = text.split()
|
|
|
1069 |
|
1070 |
# ํ๊ตญ์ด์ธ ๊ฒฝ์ฐ Edge-TTS๋ง ์ฌ์ฉ (๋ค๋ฅธ TTS๋ ํ๊ตญ์ด ์ง์์ด ์ ํ์ )
|
1071 |
if language == "Korean" and tts_engine != "Edge-TTS":
|
1072 |
+
tts_engine = "Edge-TTS" # ์๋์ผ๋ก Edge-TTS๋ก ๋ณ๊ฒฝ
|
1073 |
|
1074 |
# Generate audio based on TTS engine
|
1075 |
if tts_engine == "Edge-TTS":
|
|
|
1124 |
|
1125 |
|
1126 |
def toggle_input_visibility(input_type):
|
1127 |
+
"""Toggle visibility of URL input, file upload, and keyword input based on input type"""
|
1128 |
if input_type == "URL":
|
1129 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
1130 |
+
elif input_type == "PDF":
|
1131 |
+
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
1132 |
+
else: # Keyword
|
1133 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
1134 |
|
1135 |
|
1136 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
|
|
1147 |
|
1148 |
|
1149 |
# Gradio Interface
|
1150 |
+
with gr.Blocks(theme='soft', title="AI Podcast Generator") as demo:
|
1151 |
+
gr.Markdown("# ๐๏ธ AI Podcast Generator")
|
1152 |
+
gr.Markdown("Convert any article, blog, PDF document, or topic into an engaging podcast conversation!")
|
1153 |
|
1154 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
1155 |
with gr.Row():
|
1156 |
gr.Markdown(f"""
|
1157 |
+
### ๐ค Enhanced Configuration:
|
1158 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
1159 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
1160 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
1161 |
+
- **Conversation Length**: {converter.config.min_conversation_turns}-{converter.config.max_conversation_turns} exchanges (1.5x longer)
|
1162 |
- **Search**: {"โ
Brave Search Enabled" if BRAVE_KEY else "โ Brave Search Not Available - Set BSEARCH_API"}
|
1163 |
+
- **New**: ๐ฏ Keyword input for topic-based podcast generation
|
1164 |
""")
|
1165 |
|
1166 |
with gr.Row():
|
1167 |
with gr.Column(scale=3):
|
1168 |
+
# Input type selector - ํค์๋ ์ต์
์ถ๊ฐ
|
1169 |
input_type_selector = gr.Radio(
|
1170 |
+
choices=["URL", "PDF", "Keyword"],
|
1171 |
value="URL",
|
1172 |
label="Input Type",
|
1173 |
+
info="Choose between URL, PDF file upload, or keyword/topic"
|
1174 |
)
|
1175 |
|
1176 |
# URL input
|
|
|
1188 |
visible=False
|
1189 |
)
|
1190 |
|
1191 |
+
# Keyword input (์๋ก ์ถ๊ฐ)
|
1192 |
+
keyword_input = gr.Textbox(
|
1193 |
+
label="Topic/Keyword",
|
1194 |
+
placeholder="Enter a topic or keyword (e.g., 'AI trends', '์ธ๊ณต์ง๋ฅ ์ต์ ๋ํฅ')",
|
1195 |
+
value="",
|
1196 |
+
visible=False,
|
1197 |
+
info="The system will search for latest information about this topic"
|
1198 |
+
)
|
1199 |
+
|
1200 |
with gr.Column(scale=1):
|
1201 |
+
# ์ธ์ด ์ ํ
|
1202 |
language_selector = gr.Radio(
|
1203 |
choices=["English", "Korean"],
|
1204 |
value="English",
|
|
|
1224 |
)
|
1225 |
|
1226 |
gr.Markdown("""
|
1227 |
+
**๐ป Radio Talk Show Style:**
|
1228 |
+
- Natural, conversational dialogue
|
1229 |
+
- Host asks short questions
|
1230 |
+
- Expert gives brief, clear answers
|
1231 |
+
- 12-15 conversation exchanges
|
1232 |
|
1233 |
+
**๐ Keyword Feature:**
|
1234 |
+
- Enter any topic to generate a podcast
|
1235 |
+
- Automatically searches latest information
|
1236 |
+
- Creates engaging discussion from search results
|
1237 |
|
1238 |
+
**๐ฐ๐ท ํ๊ตญ์ด ์ง์:**
|
1239 |
+
- ์์ฐ์ค๋ฌ์ด ๋ผ๋์ค ๋๋ด ์คํ์ผ
|
1240 |
+
- ์งํ์(์ค์)๊ฐ ์งง์ ์ง๋ฌธ์ผ๋ก ๋ํ ์ ๋
|
1241 |
+
- ์ ๋ฌธ๊ฐ(๋ฏผํธ)๊ฐ ๊ฐ๊ฒฐํ๊ฒ ๋ต๋ณ
|
1242 |
+
- ์ต์ ์ ๋ณด ์๋ ๊ฒ์ ๋ฐ ๋ฐ์
|
1243 |
""")
|
1244 |
|
1245 |
convert_btn = gr.Button("๐ฏ Generate Conversation / ๋ํ ์์ฑ", variant="primary", size="lg")
|
|
|
1248 |
with gr.Column():
|
1249 |
conversation_output = gr.Textbox(
|
1250 |
label="Generated Conversation (Editable) / ์์ฑ๋ ๋ํ (ํธ์ง ๊ฐ๋ฅ)",
|
1251 |
+
lines=30, # ๋ ๊ธด ๋ํ๋ฅผ ์ํด ์ฆ๊ฐ
|
1252 |
+
max_lines=60,
|
1253 |
interactive=True,
|
1254 |
+
placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์์ฑ๋ ๋ํ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค. ์ค๋์ค ์์ฑ ์ ์ ํธ์งํ ์ ์์ต๋๋ค.\n\n๋ผ๋์ค ๋๋ด ์คํ์ผ๋ก ์์ฐ์ค๋ฝ๊ฒ ์งํ๋ฉ๋๋ค.",
|
1255 |
info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ์์ ๋ฐ๋ผ ๋ํ๋ฅผ ํธ์งํ์ธ์. ํ์: 'ํ์ ์ด๋ฆ: ํ
์คํธ'"
|
1256 |
)
|
1257 |
|
|
|
1278 |
gr.Examples(
|
1279 |
examples=[
|
1280 |
["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
|
1281 |
+
["", "Keyword", "Local", "Edge-TTS", "English"], # Keyword example
|
1282 |
["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
|
1283 |
+
["", "Keyword", "Local", "Edge-TTS", "Korean"], # Korean keyword example
|
1284 |
],
|
1285 |
inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
|
1286 |
outputs=[conversation_output, status_output],
|
|
|
1288 |
cache_examples=False,
|
1289 |
)
|
1290 |
|
1291 |
+
# Input type change handler - ์์ ๋จ
|
1292 |
input_type_selector.change(
|
1293 |
fn=toggle_input_visibility,
|
1294 |
inputs=[input_type_selector],
|
1295 |
+
outputs=[url_input, pdf_input, keyword_input]
|
1296 |
)
|
1297 |
|
1298 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
|
|
1303 |
)
|
1304 |
|
1305 |
# ์ด๋ฒคํธ ์ฐ๊ฒฐ - ์์ ๋ ๋ถ๋ถ
|
1306 |
+
def get_article_input(input_type, url_input, pdf_input, keyword_input):
|
1307 |
"""Get the appropriate input based on input type"""
|
1308 |
if input_type == "URL":
|
1309 |
return url_input
|
1310 |
+
elif input_type == "PDF":
|
1311 |
return pdf_input
|
1312 |
+
else: # Keyword
|
1313 |
+
return keyword_input
|
1314 |
|
1315 |
convert_btn.click(
|
1316 |
+
fn=lambda input_type, url_input, pdf_input, keyword_input, mode, tts, lang: synthesize_sync(
|
1317 |
+
get_article_input(input_type, url_input, pdf_input, keyword_input), input_type, mode, tts, lang
|
1318 |
),
|
1319 |
+
inputs=[input_type_selector, url_input, pdf_input, keyword_input, mode_selector, tts_selector, language_selector],
|
1320 |
outputs=[conversation_output, status_output]
|
1321 |
)
|
1322 |
|