openfree commited on
Commit
1236aa5
ยท
verified ยท
1 Parent(s): f87c30a

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +192 -65
app-backup.py CHANGED
@@ -14,6 +14,9 @@ import numpy as np
14
  import soundfile as sf
15
  import subprocess
16
  import shutil
 
 
 
17
  from dataclasses import dataclass
18
  from typing import List, Tuple, Dict, Optional
19
  from pathlib import Path
@@ -72,10 +75,13 @@ except:
72
 
73
  load_dotenv()
74
 
 
 
 
75
 
76
  @dataclass
77
  class ConversationConfig:
78
- max_words: int = 6000
79
  prefix_url: str = "https://r.jina.ai/"
80
  api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
81
  legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
@@ -83,9 +89,73 @@ class ConversationConfig:
83
  local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
84
  local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
85
  # ํ† ํฐ ์ˆ˜ ์ฆ๊ฐ€
86
- max_tokens: int = 6000 # 2048์—์„œ 6000์œผ๋กœ ์ฆ๊ฐ€
87
- max_new_tokens: int = 8000 # 4000์—์„œ 8000์œผ๋กœ ์ฆ๊ฐ€
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  class UnifiedAudioConverter:
91
  def __init__(self, config: ConversationConfig):
@@ -130,7 +200,7 @@ class UnifiedAudioConverter:
130
  flash_attn=True,
131
  n_gpu_layers=81 if torch.cuda.is_available() else 0,
132
  n_batch=1024,
133
- n_ctx=8192,
134
  )
135
  self.local_llm_model = self.config.local_model_name
136
  print(f"Local LLM initialized: {model_path_local}")
@@ -238,10 +308,15 @@ class UnifiedAudioConverter:
238
  else:
239
  return MessagesFormatterType.LLAMA_3
240
 
241
- def _build_prompt(self, text: str, language: str = "English") -> str:
242
- """Build prompt for conversation generation"""
 
 
 
 
 
 
243
  if language == "Korean":
244
- # ๊ฐ•ํ™”๋œ ํ•œ๊ตญ์–ด ํ”„๋กฌํ”„ํŠธ
245
  template = """
246
  {
247
  "conversation": [
@@ -252,22 +327,27 @@ class UnifiedAudioConverter:
252
  ]
253
  }
254
  """
255
- return (
256
- f"{text}\n\n"
257
- f"์œ„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ 30๋Œ€ ํ•œ๊ตญ์ธ ๋‘ ๋ช…์ด ์ง„ํ–‰ํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”.\n\n"
258
- f"ํ•„์ˆ˜ ์ง€์นจ:\n"
259
- f"1. ์ค€์ˆ˜(์ง„ํ–‰์ž): ์นœ๊ทผํ•˜๊ณ  ํ˜ธ๊ธฐ์‹ฌ ๋งŽ์€ ์„ฑ๊ฒฉ, ์ฒญ์ทจ์ž์˜ ๊ถ๊ธˆ์ฆ์„ ๋Œ€๋ณ€\n"
260
- f"2. ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€): ํ•ด๋‹น ์ฃผ์ œ์— ๋Œ€ํ•œ ๊นŠ์€ ์ง€์‹์„ ๊ฐ€์ง„ ์ „๋ฌธ๊ฐ€, ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜๋Š” ๋Šฅ๋ ฅ\n"
261
- f"3. ํ•œ๊ตญ์ธ์ด ์‹ค์ œ๋กœ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„๊ณผ ๊ฐํƒ„์‚ฌ ์‚ฌ์šฉ ('์•„~', '๊ทธ๋ ‡๊ตฌ๋‚˜', '์™€~', '์ง„์งœ์š”?')\n"
262
- f"4. ์ ์ ˆํ•œ ์กด๋Œ“๋ง๊ณผ ํŽธ์•ˆํ•œ ๋ฐ˜๋ง์„ ์„ž์–ด ์นœ๋ฐ€๊ฐ ์กฐ์„ฑ\n"
263
- f"5. ํ•œ๊ตญ ๋ฌธํ™”์™€ ์ผ์ƒ์— ๋งž๋Š” ๊ตฌ์ฒด์ ์ธ ์˜ˆ์‹œ์™€ ๋น„์œ  ์‚ฌ์šฉ\n"
264
- f"6. ๊ฐ ๋Œ€ํ™”๋Š” ์ถฉ๋ถ„ํžˆ ๊ธธ๊ณ  ์ƒ์„ธํ•˜๊ฒŒ (์ตœ์†Œ 3-4๋ฌธ์žฅ ์ด์ƒ)\n"
265
- f"7. ์ „์ฒด ๋Œ€ํ™”๋Š” ์ตœ์†Œ 10ํšŒ ์ด์ƒ์˜ ์ฃผ๊ณ ๋ฐ›๊ธฐ๋กœ ๊ตฌ์„ฑ\n"
266
- f"8. ์ฒญ์ทจ์ž๊ฐ€ '๋‚˜๋„ ๊ถ๊ธˆํ–ˆ๋˜ ๋‚ด์šฉ์ด์•ผ'๋ผ๊ณ  ๊ณต๊ฐํ•  ์ˆ˜ ์žˆ๋Š” ์งˆ๋ฌธ ํฌํ•จ\n"
267
- f"9. ํ•ต์‹ฌ ์ •๋ณด๋ฅผ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ „๋‹ฌํ•˜๋ฉด์„œ๋„ ์ง€๋ฃจํ•˜์ง€ ์•Š๊ฒŒ ๊ตฌ์„ฑ\n"
268
- f"10. ๋งˆ๋ฌด๋ฆฌ๋Š” ํ•ต์‹ฌ ๋‚ด์šฉ ์š”์•ฝ๊ณผ ์ฒญ์ทจ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๋Š” ์‹ค์šฉ์  ์กฐ์–ธ\n\n"
269
- f"๋‹ค์Œ JSON ํ˜•์‹์œผ๋กœ๋งŒ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”:\n{template}"
 
 
270
  )
 
 
 
271
  else:
272
  template = """
273
  {
@@ -279,49 +359,74 @@ class UnifiedAudioConverter:
279
  ]
280
  }
281
  """
282
- return (
283
- f"{text}\n\n"
284
- f"Convert the provided text into an engaging, natural podcast conversation between two experts.\n\n"
 
 
 
 
 
 
285
  f"Guidelines:\n"
286
- f"1. Alex (Host): Curious, engaging personality representing audience questions\n"
287
- f"2. Jordan (Expert): Knowledgeable but approachable, explains complex topics simply\n"
288
- f"3. Use natural conversational English with appropriate reactions ('Wow', 'That's interesting', 'Really?')\n"
289
- f"4. Include concrete examples and relatable analogies\n"
290
- f"5. Each response should be substantial (minimum 3-4 sentences)\n"
291
- f"6. Create at least 10 back-and-forth exchanges\n"
292
- f"7. Address common questions and misconceptions\n"
293
- f"8. Maintain an informative yet entertaining tone\n"
294
- f"9. End with key takeaways and practical advice\n\n"
295
- f"Return ONLY the JSON in this format:\n{template}"
296
  )
 
 
 
 
297
 
298
- def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
299
- """Build messages for local LLM"""
300
  if language == "Korean":
301
  system_message = (
302
- "๋‹น์‹ ์€ ํ•œ๊ตญ ์ตœ๊ณ ์˜ ๏ฟฝ๏ฟฝ๏ฟฝ์บ์ŠคํŠธ ๋Œ€๋ณธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
303
- "ํ•œ๊ตญ์ธ์˜ ์ •์„œ์™€ ๋ฌธํ™”๋ฅผ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , ์ฒญ์ทจ์ž๋“ค์ด ๋๊นŒ์ง€ ์ง‘์ค‘ํ•  ์ˆ˜ ์žˆ๋Š” "
304
- "๋งค๋ ฅ์ ์ด๊ณ  ์œ ์ตํ•œ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด๋ƒ…๋‹ˆ๋‹ค. "
305
- "์‹ค์ œ ํ•œ๊ตญ์ธ๋“ค์ด ์ผ์ƒ์—์„œ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„๊ณผ "
306
- "์ ์ ˆํ•œ ๊ฐ์ • ํ‘œํ˜„์„ ํ†ตํ•ด ์ƒ๋™๊ฐ ์žˆ๋Š” ๋Œ€ํ™”๋ฅผ ๊ตฌ์„ฑํ•ฉ๋‹ˆ๋‹ค."
 
 
 
 
 
307
  )
308
  else:
309
  system_message = (
310
  "You are an expert podcast scriptwriter who creates engaging, "
311
  "natural conversations that keep listeners hooked. "
312
  "You understand how to balance information with entertainment, "
313
- "using real conversational patterns and authentic reactions."
 
314
  )
315
 
316
  return [
317
  {"role": "system", "content": system_message},
318
- {"role": "user", "content": self._build_prompt(text, language)}
319
  ]
320
 
321
  @spaces.GPU(duration=120)
322
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
323
- """Extract conversation using new local LLM (primary method)"""
324
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  # ๋จผ์ € ์ƒˆ๋กœ์šด ๋กœ์ปฌ LLM ์‹œ๋„
326
  self.initialize_local_mode()
327
 
@@ -334,9 +439,11 @@ class UnifiedAudioConverter:
334
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
335
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
336
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
 
 
337
  "์‹ค์ œ ํ•œ๊ตญ์ธ์ด ๋Œ€ํ™”ํ•˜๋Š” ๊ฒƒ์ฒ˜๋Ÿผ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„, ์ ์ ˆํ•œ ๊ฐํƒ„์‚ฌ, "
338
  "๋ฌธํ™”์ ์œผ๋กœ ์ ํ•ฉํ•œ ์˜ˆ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ฒญ์ทจ์ž๊ฐ€ ๊ณต๊ฐํ•˜๊ณ  ๋ชฐ์ž…ํ•  ์ˆ˜ ์žˆ๋Š” "
339
- "๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”."
340
  )
341
  else:
342
  system_message = (
@@ -344,6 +451,7 @@ class UnifiedAudioConverter:
344
  "engaging, natural conversations that captivate listeners. "
345
  "You excel at transforming complex information into accessible, "
346
  "entertaining dialogue while maintaining authenticity and educational value. "
 
347
  "Respond only in JSON format."
348
  )
349
 
@@ -364,7 +472,7 @@ class UnifiedAudioConverter:
364
 
365
  messages = BasicChatHistory()
366
 
367
- prompt = self._build_prompt(text, language)
368
  response = agent.get_chat_response(
369
  prompt,
370
  llm_sampling_settings=settings,
@@ -384,10 +492,10 @@ class UnifiedAudioConverter:
384
 
385
  except Exception as e:
386
  print(f"Local LLM failed: {e}, falling back to legacy local method")
387
- return self.extract_conversation_legacy_local(text, language, progress)
388
 
389
  @spaces.GPU(duration=120)
390
- def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None) -> Dict:
391
  """Extract conversation using legacy local model (fallback)"""
392
  try:
393
  self.initialize_legacy_local_mode()
@@ -397,17 +505,20 @@ class UnifiedAudioConverter:
397
  system_message = (
398
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
399
  "30๋Œ€ ํ•œ๊ตญ์ธ ์ฒญ์ทจ์ž๋ฅผ ๋Œ€์ƒ์œผ๋กœ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. "
400
- "์‹ค์ œ ์‚ฌ์šฉํ•˜๋Š” ํ•œ๊ตญ์–ด ํ‘œํ˜„๊ณผ ๋ฌธํ™”์  ๋งฅ๋ฝ์„ ๋ฐ˜์˜ํ•˜์—ฌ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."
 
 
401
  )
402
  else:
403
  system_message = (
404
  "You are an expert podcast scriptwriter. "
405
- "Create natural, engaging conversations that inform and entertain listeners."
 
406
  )
407
 
408
  chat = [
409
  {"role": "system", "content": system_message},
410
- {"role": "user", "content": self._build_prompt(text, language)}
411
  ]
412
 
413
  terminators = [
@@ -450,14 +561,14 @@ class UnifiedAudioConverter:
450
 
451
  except Exception as e:
452
  print(f"Legacy local model also failed: {e}")
453
- # Return default template with Korean male names
454
  if language == "Korean":
455
  return {
456
  "conversation": [
457
- {"speaker": "์ค€์ˆ˜", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์—ฌ๋Ÿฌ๋ถ„! ์˜ค๋Š˜๋„ ์ €ํฌ ํŒŸ์บ์ŠคํŠธ๋ฅผ ์ฐพ์•„์ฃผ์…”์„œ ์ •๋ง ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค."},
458
- {"speaker": "๋ฏผํ˜ธ", "text": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜์€ ์ •๋ง ํฅ๋ฏธ๋กœ์šด ์ฃผ์ œ๋ฅผ ์ค€๋น„ํ–ˆ๋Š”๋ฐ์š”, ํ•จ๊ป˜ ์ด์•ผ๊ธฐ ๋‚˜๋ˆ ๋ณด์‹œ์ฃ ."},
459
- {"speaker": "์ค€์ˆ˜", "text": "๋„ค, ์ •๋ง ๊ธฐ๋Œ€๋˜๋Š”๋ฐ์š”. ์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„๋“ค๋„ ๊ถ๊ธˆํ•ดํ•˜์‹ค ๊ฒƒ ๊ฐ™์•„์š”."},
460
- {"speaker": "๋ฏผํ˜ธ", "text": "๋งž์•„์š”. ๊ทธ๋Ÿผ ๋ณธ๊ฒฉ์ ์œผ๋กœ ์‹œ์ž‘ํ•ด๋ณผ๊นŒ์š”?"}
461
  ]
462
  }
463
  else:
@@ -471,30 +582,44 @@ class UnifiedAudioConverter:
471
  }
472
 
473
  def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
474
- """Extract conversation using API (fallback method)"""
475
  if not self.llm_client:
476
  raise RuntimeError("API mode not initialized")
477
 
478
  try:
 
 
 
 
 
 
 
 
 
 
 
 
479
  # ๊ฐ•ํ™”๋œ ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
480
  if language == "Korean":
481
  system_message = (
482
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
483
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
484
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
485
- "์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€)๋ผ๋Š” ๋‘ ๋ช…์˜ 30๋Œ€ ๋‚จ์„ฑ์ด ๋Œ€ํ™”ํ•˜๋Š” ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”."
 
486
  )
487
  else:
488
  system_message = (
489
  "You are an expert podcast scriptwriter who creates engaging, "
490
  "natural conversations between Alex (host) and Jordan (expert). "
491
- "Create informative yet entertaining dialogue that keeps listeners engaged."
 
492
  )
493
 
494
  chat_completion = self.llm_client.chat.completions.create(
495
  messages=[
496
  {"role": "system", "content": system_message},
497
- {"role": "user", "content": self._build_prompt(text, language)}
498
  ],
499
  model=self.config.api_model_name,
500
  )
@@ -905,6 +1030,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
905
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
906
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
907
  - **Max Tokens**: {converter.config.max_tokens} (Extended for longer conversations)
 
908
  """)
909
 
910
  with gr.Row():
@@ -968,7 +1094,8 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
968
 
969
  **ํ•œ๊ตญ์–ด ์ง€์›:**
970
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
971
- - ๐Ÿ‘จโ€๐Ÿ‘จ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋Š” ์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€) ๋‘ ๋‚จ์„ฑ์ด ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค
 
972
  """)
973
 
974
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
@@ -977,10 +1104,10 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
977
  with gr.Column():
978
  conversation_output = gr.Textbox(
979
  label="Generated Conversation (Editable) / ์ƒ์„ฑ๋œ ๋Œ€ํ™” (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
980
- lines=20, # ๋” ๊ธด ๋Œ€ํ™”๋ฅผ ์œ„ํ•ด ์ฆ๊ฐ€
981
- max_lines=40,
982
  interactive=True,
983
- placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์ƒ์„ฑ๋œ ๋Œ€ํ™”๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ƒ์„ฑ ์ „์— ํŽธ์ง‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
984
  info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ•„์š”์— ๋”ฐ๋ผ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”. ํ˜•์‹: 'ํ™”์ž ์ด๋ฆ„: ํ…์ŠคํŠธ'"
985
  )
986
 
 
14
  import soundfile as sf
15
  import subprocess
16
  import shutil
17
+ import requests
18
+ import logging
19
+ from datetime import datetime, timedelta
20
  from dataclasses import dataclass
21
  from typing import List, Tuple, Dict, Optional
22
  from pathlib import Path
 
75
 
76
  load_dotenv()
77
 
78
+ # Brave Search API ์„ค์ •
79
+ BRAVE_KEY = os.getenv("BSEARCH_API")
80
+ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
81
 
82
  @dataclass
83
  class ConversationConfig:
84
+ max_words: int = 4000
85
  prefix_url: str = "https://r.jina.ai/"
86
  api_model_name: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
87
  legacy_local_model_name: str = "NousResearch/Hermes-2-Pro-Llama-3-8B"
 
89
  local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
90
  local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
91
  # ํ† ํฐ ์ˆ˜ ์ฆ๊ฐ€
92
+ max_tokens: int = 3000 # 2048์—์„œ 6000์œผ๋กœ ์ฆ๊ฐ€
93
+ max_new_tokens: int = 6000 # 4000์—์„œ 8000์œผ๋กœ ์ฆ๊ฐ€
94
+
95
+
96
+ def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
97
+ """Brave Search API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ตœ์‹  ์ •๋ณด ๊ฒ€์ƒ‰"""
98
+ if not BRAVE_KEY:
99
+ return []
100
+ params = {"q": query, "count": str(count)}
101
+ if freshness_days:
102
+ dt_from = (datetime.utcnow() - timedelta(days=freshness_days)).strftime("%Y-%m-%d")
103
+ params["freshness"] = dt_from
104
+ try:
105
+ r = requests.get(
106
+ BRAVE_ENDPOINT,
107
+ headers={"Accept": "application/json", "X-Subscription-Token": BRAVE_KEY},
108
+ params=params,
109
+ timeout=15
110
+ )
111
+ raw = r.json().get("web", {}).get("results") or []
112
+ return [{
113
+ "title": r.get("title", ""),
114
+ "url": r.get("url", r.get("link", "")),
115
+ "snippet": r.get("description", r.get("text", "")),
116
+ "host": re.sub(r"https?://(www\.)?", "", r.get("url", "")).split("/")[0]
117
+ } for r in raw[:count]]
118
+ except Exception as e:
119
+ logging.error(f"Brave search error: {e}")
120
+ return []
121
+
122
+ def format_search_results(query: str) -> str:
123
+ """๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ํฌ๋งทํŒ…ํ•˜์—ฌ ๋ฐ˜ํ™˜ (๊ฐ„๋žตํ•˜๊ฒŒ)"""
124
+ rows = brave_search(query, 3, freshness_days=3) # 6๊ฐœ์—์„œ 3๊ฐœ๋กœ ์ค„์ž„
125
+ if not rows:
126
+ return "" # ๋นˆ ๋ฌธ์ž์—ด ๋ฐ˜ํ™˜
127
+
128
+ # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ๋” ๊ฐ„๋žตํ•˜๊ฒŒ ์š”์•ฝ
129
+ results = []
130
+ for r in rows[:2]: # ์ตœ๋Œ€ 2๊ฐœ๋งŒ ์‚ฌ์šฉ
131
+ # ์Šค๋‹ˆํŽซ์„ 100์ž๋กœ ์ œํ•œ
132
+ snippet = r['snippet'][:100] + "..." if len(r['snippet']) > 100 else r['snippet']
133
+ results.append(f"- {r['title']}: {snippet}")
134
+
135
+ return "\n".join(results) + "\n"
136
 
137
+ def extract_keywords_for_search(text: str, language: str = "English") -> List[str]:
138
+ """ํ…์ŠคํŠธ์—์„œ ๊ฒ€์ƒ‰ํ•  ํ‚ค์›Œ๋“œ ์ถ”์ถœ (๊ฐœ์„ )"""
139
+ # ํ…์ŠคํŠธ ์•ž๋ถ€๋ถ„๋งŒ ์‚ฌ์šฉ (๋„ˆ๋ฌด ๋งŽ์€ ํ…์ŠคํŠธ ์ฒ˜๋ฆฌ ๋ฐฉ์ง€)
140
+ text_sample = text[:500]
141
+
142
+ if language == "Korean":
143
+ import re
144
+ # ํ•œ๊ตญ์–ด ๋ช…์‚ฌ ์ถ”์ถœ (2๊ธ€์ž ์ด์ƒ)
145
+ keywords = re.findall(r'[๊ฐ€-ํžฃ]{2,}', text_sample)
146
+ # ์ค‘๋ณต ์ œ๊ฑฐํ•˜๊ณ  ๊ฐ€์žฅ ๊ธด ๋‹จ์–ด 1๊ฐœ๋งŒ ์„ ํƒ
147
+ unique_keywords = list(dict.fromkeys(keywords))
148
+ # ๊ธธ์ด ์ˆœ์œผ๋กœ ์ •๋ ฌํ•˜๊ณ  ๊ฐ€์žฅ ์˜๋ฏธ์žˆ์„ ๊ฒƒ ๊ฐ™์€ ๋‹จ์–ด ์„ ํƒ
149
+ unique_keywords.sort(key=len, reverse=True)
150
+ return unique_keywords[:1] # 1๊ฐœ๋งŒ ๋ฐ˜ํ™˜
151
+ else:
152
+ # ์˜์–ด๋Š” ๋Œ€๋ฌธ์ž๋กœ ์‹œ์ž‘ํ•˜๋Š” ๋‹จ์–ด ์ค‘ ๊ฐ€์žฅ ๊ธด ๊ฒƒ 1๊ฐœ
153
+ words = text_sample.split()
154
+ keywords = [word.strip('.,!?;:') for word in words
155
+ if len(word) > 4 and word[0].isupper()]
156
+ if keywords:
157
+ return [max(keywords, key=len)] # ๊ฐ€์žฅ ๊ธด ๋‹จ์–ด 1๊ฐœ
158
+ return []
159
 
160
  class UnifiedAudioConverter:
161
  def __init__(self, config: ConversationConfig):
 
200
  flash_attn=True,
201
  n_gpu_layers=81 if torch.cuda.is_available() else 0,
202
  n_batch=1024,
203
+ n_ctx=16384,
204
  )
205
  self.local_llm_model = self.config.local_model_name
206
  print(f"Local LLM initialized: {model_path_local}")
 
308
  else:
309
  return MessagesFormatterType.LLAMA_3
310
 
311
+
312
+ def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
313
+ """Build prompt for conversation generation with search context"""
314
+ # ํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ์„ ๋” ๊ฐ•ํ•˜๊ฒŒ ์ ์šฉ
315
+ max_text_length = 3000 if search_context else 4000
316
+ if len(text) > max_text_length:
317
+ text = text[:max_text_length] + "..."
318
+
319
  if language == "Korean":
 
320
  template = """
321
  {
322
  "conversation": [
 
327
  ]
328
  }
329
  """
330
+
331
+ # ๊ฒ€์ƒ‰ ์ปจํ…์ŠคํŠธ๊ฐ€ ์žˆ์„ ๋•Œ๋งŒ ํฌํ•จ
332
+ context_part = ""
333
+ if search_context:
334
+ context_part = f"# ์ตœ์‹  ๊ด€๋ จ ์ •๋ณด:\n{search_context}\n"
335
+
336
+ base_prompt = (
337
+ f"# ์›๋ณธ ์ฝ˜ํ…์ธ :\n{text}\n\n"
338
+ f"{context_part}"
339
+ f"์œ„ ๋‚ด์šฉ์œผ๋กœ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”.\n\n"
340
+ f"## ํ•ต์‹ฌ ์ง€์นจ:\n"
341
+ f"- ์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€) ๋‘ 30๋Œ€ ๋‚จ์„ฑ์˜ ๋Œ€ํ™”\n"
342
+ f"- ์„œ๋กœ ์กด๋Œ“๋ง ์‚ฌ์šฉ ํ•„์ˆ˜ (๋ฐ˜๋ง ์ ˆ๋Œ€ ๊ธˆ์ง€)\n"
343
+ f"- ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ํ‘œํ˜„ ์‚ฌ์šฉ\n"
344
+ f"- ๊ฐ ๋Œ€ํ™” 2-3๋ฌธ์žฅ, ์ „์ฒด 8-10ํšŒ ์ฃผ๊ณ ๋ฐ›๊ธฐ\n"
345
+ f"- ์ตœ์‹  ์ •๋ณด๊ฐ€ ์žˆ๋‹ค๋ฉด ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํฌํ•จ\n\n"
346
+ f"JSON ํ˜•์‹์œผ๋กœ๋งŒ ๋ฐ˜ํ™˜:\n{template}"
347
  )
348
+
349
+ return base_prompt
350
+
351
  else:
352
  template = """
353
  {
 
359
  ]
360
  }
361
  """
362
+
363
+ context_part = ""
364
+ if search_context:
365
+ context_part = f"# Latest Information:\n{search_context}\n"
366
+
367
+ base_prompt = (
368
+ f"# Content:\n{text}\n\n"
369
+ f"{context_part}"
370
+ f"Create a podcast conversation.\n\n"
371
  f"Guidelines:\n"
372
+ f"- Alex (Host) and Jordan (Expert)\n"
373
+ f"- Natural conversational English\n"
374
+ f"- Each response 2-3 sentences\n"
375
+ f"- 8-10 exchanges total\n"
376
+ f"- Include latest info if available\n\n"
377
+ f"Return JSON only:\n{template}"
 
 
 
 
378
  )
379
+
380
+ return base_prompt
381
+
382
+
383
 
384
+ def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
385
+ """Build messages for local LLM with enhanced Korean guidelines"""
386
  if language == "Korean":
387
  system_message = (
388
+ "๋‹น์‹ ์€ ํ•œ๊ตญ ์ตœ๊ณ ์˜ ํŒŸ์บ์ŠคํŠธ ๋Œ€๋ณธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
389
+ "ํ•œ๊ตญ์ธ์˜ ์ •์„œ์™€ ๋ฌธํ™”๋ฅผ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , 30๋Œ€ ํ•œ๊ตญ์ธ ์ฒญ์ทจ์ž๋“ค์ด ๋๊นŒ์ง€ ์ง‘์ค‘ํ•  ์ˆ˜ ์žˆ๋Š” "
390
+ "๋งค๋ ฅ์ ์ด๊ณ  ์œ ์ตํ•œ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด๋ƒ…๋‹ˆ๋‹ค.\n\n"
391
+ "ํ•ต์‹ฌ ์›์น™:\n"
392
+ "1. ๋‘ ํ™”์ž๋Š” ๋ฐ˜๋“œ์‹œ ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค (๋ฐ˜๋ง ์ ˆ๋Œ€ ๊ธˆ์ง€)\n"
393
+ "2. ํ•œ๊ตญ ๋ฌธํ™”์˜ ์ •์„œ์  ์ฝ”๋“œ์™€ ๊ฐ€์น˜๊ด€์„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค\n"
394
+ "3. ์‹ค์ œ ํ•œ๊ตญ์ธ๋“ค์ด ์ผ์ƒ์—์„œ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„์„ ๊ตฌ์‚ฌํ•ฉ๋‹ˆ๋‹ค\n"
395
+ "4. ์ฒญ์ทจ์ž๊ฐ€ ๊ณต๊ฐํ•˜๊ณ  ์‹ค์šฉ์ ์œผ๋กœ ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๋‚ด์šฉ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค\n"
396
+ "5. ์ตœ์‹  ์ •๋ณด์™€ ํŠธ๋ Œ๋“œ๋ฅผ ์ ์ ˆํžˆ ๋ฐ˜์˜ํ•˜์—ฌ ์‹œ์˜์„ฑ์„ ํ™•๋ณดํ•ฉ๋‹ˆ๋‹ค\n\n"
397
+ "๋‹น์‹ ์˜ ๋Œ€๋ณธ์€ ํ•œ๊ตญ ํŒŸ์บ์ŠคํŠธ ์‹œ์žฅ์—์„œ ์ตœ๊ณ  ์ˆ˜์ค€์˜ ํ’ˆ์งˆ๋กœ ์ธ์ •๋ฐ›๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค."
398
  )
399
  else:
400
  system_message = (
401
  "You are an expert podcast scriptwriter who creates engaging, "
402
  "natural conversations that keep listeners hooked. "
403
  "You understand how to balance information with entertainment, "
404
+ "using real conversational patterns and authentic reactions. "
405
+ "You excel at incorporating current information and trends to make content relevant and timely."
406
  )
407
 
408
  return [
409
  {"role": "system", "content": system_message},
410
+ {"role": "user", "content": self._build_prompt(text, language, search_context)}
411
  ]
412
 
413
  @spaces.GPU(duration=120)
414
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
415
+ """Extract conversation using new local LLM with search context"""
416
  try:
417
+ # ๊ฒ€์ƒ‰ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
418
+ search_context = ""
419
+ if BRAVE_KEY:
420
+ try:
421
+ keywords = extract_keywords_for_search(text, language)
422
+ if keywords:
423
+ # ์ฒซ ๋ฒˆ์งธ ํ‚ค์›Œ๋“œ๋กœ ๊ฒ€์ƒ‰
424
+ search_query = keywords[0] if language == "Korean" else f"{keywords[0]} latest news"
425
+ search_context = format_search_results(search_query)
426
+ print(f"Search context added for: {search_query}")
427
+ except Exception as e:
428
+ print(f"Search failed, continuing without context: {e}")
429
+
430
  # ๋จผ์ € ์ƒˆ๋กœ์šด ๋กœ์ปฌ LLM ์‹œ๋„
431
  self.initialize_local_mode()
432
 
 
439
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
440
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
441
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
442
+ "ํŠนํžˆ ๋‘ ํ™”์ž๊ฐ€ ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด ํ•„์ˆ˜์ด๋ฉฐ, "
443
+ "๋ฐ˜๋ง์€ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. "
444
  "์‹ค์ œ ํ•œ๊ตญ์ธ์ด ๋Œ€ํ™”ํ•˜๋Š” ๊ฒƒ์ฒ˜๋Ÿผ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„, ์ ์ ˆํ•œ ๊ฐํƒ„์‚ฌ, "
445
  "๋ฌธํ™”์ ์œผ๋กœ ์ ํ•ฉํ•œ ์˜ˆ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ฒญ์ทจ์ž๊ฐ€ ๊ณต๊ฐํ•˜๊ณ  ๋ชฐ์ž…ํ•  ์ˆ˜ ์žˆ๋Š” "
446
+ "๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. ์ตœ์‹  ์ •๋ณด๋„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋ฐ˜์˜ํ•˜์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”."
447
  )
448
  else:
449
  system_message = (
 
451
  "engaging, natural conversations that captivate listeners. "
452
  "You excel at transforming complex information into accessible, "
453
  "entertaining dialogue while maintaining authenticity and educational value. "
454
+ "Incorporate current trends and latest information naturally. "
455
  "Respond only in JSON format."
456
  )
457
 
 
472
 
473
  messages = BasicChatHistory()
474
 
475
+ prompt = self._build_prompt(text, language, search_context)
476
  response = agent.get_chat_response(
477
  prompt,
478
  llm_sampling_settings=settings,
 
492
 
493
  except Exception as e:
494
  print(f"Local LLM failed: {e}, falling back to legacy local method")
495
+ return self.extract_conversation_legacy_local(text, language, progress, search_context)
496
 
497
  @spaces.GPU(duration=120)
498
+ def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
499
  """Extract conversation using legacy local model (fallback)"""
500
  try:
501
  self.initialize_legacy_local_mode()
 
505
  system_message = (
506
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
507
  "30๋Œ€ ํ•œ๊ตญ์ธ ์ฒญ์ทจ์ž๋ฅผ ๋Œ€์ƒ์œผ๋กœ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. "
508
+ "๋‘ ํ™”์ž๋Š” ๋ฐ˜๋“œ์‹œ ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•˜๋ฉฐ, ๋ฐ˜๋ง์€ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. "
509
+ "์‹ค์ œ ์‚ฌ์šฉํ•˜๋Š” ํ•œ๊ตญ์–ด ํ‘œํ˜„๊ณผ ๋ฌธํ™”์  ๋งฅ๋ฝ์„ ๋ฐ˜์˜ํ•˜์—ฌ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”. "
510
+ "์ตœ์‹  ์ •๋ณด๋„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํฌํ•จ์‹œ์ผœ์ฃผ์„ธ์š”."
511
  )
512
  else:
513
  system_message = (
514
  "You are an expert podcast scriptwriter. "
515
+ "Create natural, engaging conversations that inform and entertain listeners. "
516
+ "Incorporate current information and trends naturally."
517
  )
518
 
519
  chat = [
520
  {"role": "system", "content": system_message},
521
+ {"role": "user", "content": self._build_prompt(text, language, search_context)}
522
  ]
523
 
524
  terminators = [
 
561
 
562
  except Exception as e:
563
  print(f"Legacy local model also failed: {e}")
564
+ # Return default template with Korean male names using formal speech
565
  if language == "Korean":
566
  return {
567
  "conversation": [
568
+ {"speaker": "์ค€์ˆ˜", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์—ฌ๋Ÿฌ๋ถ„! ์˜ค๋Š˜๋„ ์ €ํฌ ํŒŸ์บ์ŠคํŠธ๋ฅผ ์ฐพ์•„์ฃผ์…”์„œ ์ •๋ง ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค. ๋ฏผํ˜ธ์”จ, ์˜ค๋Š˜ ์ •๋ง ํฅ๋ฏธ๋กœ์šด ์ฃผ์ œ๋ฅผ ์ค€๋น„ํ•ด์ฃผ์…จ๋‹ค๊ณ  ๋“ค์—ˆ์–ด์š”."},
569
+ {"speaker": "๋ฏผํ˜ธ", "text": "๋„ค, ์•ˆ๋…•ํ•˜์„ธ์š”! ์ค€์ˆ˜์”จ ๋ง์”€์ฒ˜๋Ÿผ ์˜ค๋Š˜์€ ์ •๋ง ํฅ๋ฏธ๋กœ์šด ์ด์•ผ๊ธฐ๋ฅผ ์ค€๋น„ํ–ˆ์Šต๋‹ˆ๋‹ค. ์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„๋“ค๊ป˜์„œ๋„ ๋งŽ์€ ๊ด€์‹ฌ์„ ๊ฐ€์ง€๊ณ  ๊ณ„์‹ค ์ฃผ์ œ์ธ ๊ฒƒ ๊ฐ™์•„์š”."},
570
+ {"speaker": "์ค€์ˆ˜", "text": "์ •๋ง ๊ธฐ๋Œ€๋˜๋Š”๋ฐ์š”. ๊ทธ๋Ÿฐ๋ฐ ๋ฏผํ˜ธ์”จ, ์ด ์ฃผ์ œ๊ฐ€ ์š”์ฆ˜ ์™œ ์ด๋ ‡๊ฒŒ ํ™”์ œ๊ฐ€ ๋˜๊ณ  ์žˆ๋Š” ๊ฑด๊ฐ€์š”? ์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„๋“ค๋„ ๊ถ๊ธˆํ•ดํ•˜์‹ค ๊ฒƒ ๊ฐ™์•„์š”."},
571
+ {"speaker": "๋ฏผํ˜ธ", "text": "์ข‹์€ ์งˆ๋ฌธ์ด์„ธ์š”, ์ค€์ˆ˜์”จ. ์‚ฌ์‹ค ์ตœ๊ทผ์— ์ด ๋ถ„์•ผ์— ๋งŽ์€ ๋ณ€ํ™”๊ฐ€ ์žˆ์—ˆ๊ฑฐ๋“ ์š”. ๊ทธ๋Ÿผ ๋ณธ๊ฒฉ์ ์œผ๋กœ ํ•˜๋‚˜์”ฉ ์ฐจ๊ทผ์ฐจ๊ทผ ์„ค๋ช…ํ•ด๋“œ๋ฆด๊ฒŒ์š”."}
572
  ]
573
  }
574
  else:
 
582
  }
583
 
584
  def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
585
+ """Extract conversation using API with search context"""
586
  if not self.llm_client:
587
  raise RuntimeError("API mode not initialized")
588
 
589
  try:
590
+ # ๊ฒ€์ƒ‰ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
591
+ search_context = ""
592
+ if BRAVE_KEY:
593
+ try:
594
+ keywords = extract_keywords_for_search(text, language)
595
+ if keywords:
596
+ search_query = keywords[0] if language == "Korean" else f"{keywords[0]} latest news"
597
+ search_context = format_search_results(search_query)
598
+ print(f"Search context added for: {search_query}")
599
+ except Exception as e:
600
+ print(f"Search failed, continuing without context: {e}")
601
+
602
  # ๊ฐ•ํ™”๋œ ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
603
  if language == "Korean":
604
  system_message = (
605
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
606
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
607
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
608
+ "์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€)๋ผ๋Š” ๋‘ ๋ช…์˜ 30๋Œ€ ๋‚จ์„ฑ์ด ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•˜์—ฌ ๋Œ€ํ™”ํ•˜๋Š” ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”. "
609
+ "๋ฐ˜๋ง์€ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ์•Š์œผ๋ฉฐ, ์ตœ์‹  ์ •๋ณด๋„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋ฐ˜์˜ํ•˜์„ธ์š”."
610
  )
611
  else:
612
  system_message = (
613
  "You are an expert podcast scriptwriter who creates engaging, "
614
  "natural conversations between Alex (host) and Jordan (expert). "
615
+ "Create informative yet entertaining dialogue that keeps listeners engaged. "
616
+ "Incorporate current trends and latest information naturally."
617
  )
618
 
619
  chat_completion = self.llm_client.chat.completions.create(
620
  messages=[
621
  {"role": "system", "content": system_message},
622
+ {"role": "user", "content": self._build_prompt(text, language, search_context)}
623
  ],
624
  model=self.config.api_model_name,
625
  )
 
1030
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
1031
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
1032
  - **Max Tokens**: {converter.config.max_tokens} (Extended for longer conversations)
1033
+ - **Search**: {"โœ… Brave Search Enabled" if BRAVE_KEY else "โŒ Brave Search Not Available - Set BSEARCH_API"}
1034
  """)
1035
 
1036
  with gr.Row():
 
1094
 
1095
  **ํ•œ๊ตญ์–ด ์ง€์›:**
1096
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
1097
+ - ๐Ÿ‘จโ€๐Ÿ‘จ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋Š” ์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€) ๋‘ ๋‚จ์„ฑ์ด ์กด๋Œ“๋ง๋กœ ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค
1098
+ - ๐Ÿ” **์ตœ์‹  ์ •๋ณด ๋ฐ˜์˜**: Brave Search๋ฅผ ํ†ตํ•ด ์ตœ์‹  ์‹œ์‚ฌ ๋‚ด์šฉ์„ ์ž๋™์œผ๋กœ ๊ฒ€์ƒ‰ํ•˜์—ฌ ๋Œ€๋ณธ์— ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค
1099
  """)
1100
 
1101
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
 
1104
  with gr.Column():
1105
  conversation_output = gr.Textbox(
1106
  label="Generated Conversation (Editable) / ์ƒ์„ฑ๋œ ๋Œ€ํ™” (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
1107
+ lines=25, # ๋” ๊ธด ๋Œ€ํ™”๋ฅผ ์œ„ํ•ด ์ฆ๊ฐ€
1108
+ max_lines=50,
1109
  interactive=True,
1110
+ placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์ƒ์„ฑ๋œ ๋Œ€ํ™”๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ƒ์„ฑ ์ „์— ํŽธ์ง‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.\n\nํ•œ๊ตญ์–ด ๋Œ€ํ™”๋Š” ์กด๋Œ“๋ง๋กœ ์ง„ํ–‰๋˜๋ฉฐ ์ตœ์‹  ์‹œ์‚ฌ ๋‚ด์šฉ์ด ๋ฐ˜์˜๋ฉ๋‹ˆ๋‹ค.",
1111
  info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ•„์š”์— ๋”ฐ๋ผ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”. ํ˜•์‹: 'ํ™”์ž ์ด๋ฆ„: ํ…์ŠคํŠธ'"
1112
  )
1113