openfree commited on
Commit
db70ad9
ยท
verified ยท
1 Parent(s): 99fb44e

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +24 -135
app-backup.py CHANGED
@@ -249,69 +249,6 @@ class UnifiedAudioConverter:
249
  {"role": "user", "content": self._build_prompt(text, language)}
250
  ]
251
 
252
- @spaces.GPU(duration=120)
253
- def translate_to_korean(self, conversation_json: Dict) -> Dict:
254
- """Translate English conversation to Korean using local LLM"""
255
- try:
256
- self.initialize_local_mode()
257
-
258
- chat_template = self._get_messages_formatter_type(self.config.local_model_name)
259
- provider = LlamaCppPythonProvider(self.local_llm)
260
-
261
- system_message = """๋‹น์‹ ์€ ์ „๋ฌธ ๋ฒˆ์—ญ๊ฐ€์ž…๋‹ˆ๋‹ค. ์˜์–ด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ๊ตฌ์–ด์ฒด๋กœ ๋ฒˆ์—ญํ•ด์ฃผ์„ธ์š”.
262
- ํ•œ๊ตญ์  ํ‘œํ˜„์„ ์‚ฌ์šฉํ•˜๊ณ , ํŒŸ์บ์ŠคํŠธ์— ์ ํ•ฉํ•œ ์นœ๊ทผํ•˜๊ณ  ์ž์—ฐ์Šค๋Ÿฌ์šด ๋งํˆฌ๋กœ ๋ฒˆ์—ญํ•˜์„ธ์š”.
263
- ์ „๋ฌธ ์šฉ์–ด๋Š” ํ•œ๊ตญ์–ด๋กœ ์ ์ ˆํžˆ ์˜์—ญํ•˜๊ฑฐ๋‚˜ ์„ค๋ช…์„ ์ถ”๊ฐ€ํ•˜์„ธ์š”. JSON ํ˜•์‹์„ ์œ ์ง€ํ•˜์„ธ์š”."""
264
-
265
- agent = LlamaCppAgent(
266
- provider,
267
- system_prompt=system_message,
268
- predefined_messages_formatter_type=chat_template,
269
- debug_output=False
270
- )
271
-
272
- settings = provider.get_provider_default_settings()
273
- settings.temperature = 0.7
274
- settings.top_k = 40
275
- settings.top_p = 0.95
276
- settings.max_tokens = 2048
277
- settings.repeat_penalty = 1.1
278
- settings.stream = False
279
-
280
- messages = BasicChatHistory()
281
-
282
- # ๋ฒˆ์—ญํ•  ๋Œ€ํ™”๋ฅผ JSON ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
283
- conversation_str = json.dumps(conversation_json, ensure_ascii=False, indent=2)
284
-
285
- prompt = f"""๋‹ค์Œ ์˜์–ด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ๊ตฌ์–ด์ฒด๋กœ ๋ฒˆ์—ญํ•ด์ฃผ์„ธ์š”.
286
- ํ•œ๊ตญ์  ํ‘œํ˜„๊ณผ ์นœ๊ทผํ•œ ๋งํˆฌ๋ฅผ ์‚ฌ์šฉํ•˜๊ณ , JSON ํ˜•์‹์„ ๊ทธ๋Œ€๋กœ ์œ ์ง€ํ•˜์„ธ์š”:
287
-
288
- {conversation_str}
289
-
290
- ๋ฒˆ์—ญ๋œ ๊ฒฐ๊ณผ๋ฅผ JSON ํ˜•์‹์œผ๋กœ๋งŒ ๋ฐ˜ํ™˜ํ•ด์ฃผ์„ธ์š”."""
291
-
292
- response = agent.get_chat_response(
293
- prompt,
294
- llm_sampling_settings=settings,
295
- chat_history=messages,
296
- returns_streaming_generator=False,
297
- print_output=False
298
- )
299
-
300
- # JSON ํŒŒ์‹ฑ
301
- pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
302
- json_match = re.search(pattern, response)
303
-
304
- if json_match:
305
- translated_json = json.loads(json_match.group())
306
- return translated_json
307
- else:
308
- print("๋ฒˆ์—ญ ์‹คํŒจ, ์›๋ณธ ๋ฐ˜ํ™˜")
309
- return conversation_json
310
-
311
- except Exception as e:
312
- print(f"๋ฒˆ์—ญ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}, ์›๋ณธ ๋ฐ˜ํ™˜")
313
- return conversation_json
314
-
315
  @spaces.GPU(duration=120)
316
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
317
  """Extract conversation using new local LLM (primary method)"""
@@ -322,8 +259,11 @@ class UnifiedAudioConverter:
322
  chat_template = self._get_messages_formatter_type(self.config.local_model_name)
323
  provider = LlamaCppPythonProvider(self.local_llm)
324
 
325
- # ์˜์–ด๋กœ ๋Œ€ํ™” ์ƒ์„ฑ (์ผ๋‹จ ์˜์–ด๋กœ ์ƒ์„ฑํ•˜๊ณ  ํ•œ๊ตญ์–ด ์„ ํƒ์‹œ ๋ฒˆ์—ญ)
326
- system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations. Respond only in JSON format."
 
 
 
327
 
328
  agent = LlamaCppAgent(
329
  provider,
@@ -342,8 +282,7 @@ class UnifiedAudioConverter:
342
 
343
  messages = BasicChatHistory()
344
 
345
- # ์˜์–ด ํ”„๋กฌํ”„ํŠธ๋กœ ์ƒ์„ฑ
346
- prompt = self._build_prompt(text, "English")
347
  response = agent.get_chat_response(
348
  prompt,
349
  llm_sampling_settings=settings,
@@ -357,14 +296,7 @@ class UnifiedAudioConverter:
357
  json_match = re.search(pattern, response)
358
 
359
  if json_match:
360
- conversation_json = json.loads(json_match.group())
361
-
362
- # ํ•œ๊ตญ์–ด๊ฐ€ ์„ ํƒ๋œ ๊ฒฝ์šฐ ๋ฒˆ์—ญ ์ˆ˜ํ–‰
363
- if language == "Korean":
364
- print("ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ ์ค‘...")
365
- conversation_json = self.translate_to_korean(conversation_json)
366
-
367
- return conversation_json
368
  else:
369
  raise ValueError("No valid JSON found in local LLM response")
370
 
@@ -378,12 +310,15 @@ class UnifiedAudioConverter:
378
  try:
379
  self.initialize_legacy_local_mode()
380
 
381
- # ์˜์–ด๋กœ ๋Œ€ํ™” ์ƒ์„ฑ
382
- system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
 
 
 
383
 
384
  chat = [
385
  {"role": "system", "content": system_message},
386
- {"role": "user", "content": self._build_prompt(text, "English")}
387
  ]
388
 
389
  terminators = [
@@ -420,14 +355,7 @@ class UnifiedAudioConverter:
420
  json_match = re.search(pattern, partial_text)
421
 
422
  if json_match:
423
- conversation_json = json.loads(json_match.group())
424
-
425
- # ํ•œ๊ตญ์–ด๊ฐ€ ์„ ํƒ๋œ ๊ฒฝ์šฐ ๋ฒˆ์—ญ ์ˆ˜ํ–‰
426
- if language == "Korean":
427
- print("ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ ์ค‘...")
428
- conversation_json = self.translate_to_korean(conversation_json)
429
-
430
- return conversation_json
431
  else:
432
  raise ValueError("No valid JSON found in legacy local response")
433
 
@@ -455,13 +383,16 @@ class UnifiedAudioConverter:
455
  raise RuntimeError("API mode not initialized")
456
 
457
  try:
458
- # ์˜์–ด๋กœ ๋Œ€ํ™” ์ƒ์„ฑ
459
- system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
 
 
 
460
 
461
  chat_completion = self.llm_client.chat.completions.create(
462
  messages=[
463
  {"role": "system", "content": system_message},
464
- {"role": "user", "content": self._build_prompt(text, "English")}
465
  ],
466
  model=self.config.api_model_name,
467
  )
@@ -472,14 +403,7 @@ class UnifiedAudioConverter:
472
  if not json_match:
473
  raise ValueError("No valid JSON found in response")
474
 
475
- conversation_json = json.loads(json_match.group())
476
-
477
- # ํ•œ๊ตญ์–ด๊ฐ€ ์„ ํƒ๋œ ๊ฒฝ์šฐ ๋ฒˆ์—ญ ์ˆ˜ํ–‰
478
- if language == "Korean":
479
- print("ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ ์ค‘...")
480
- conversation_json = self.translate_to_korean(conversation_json)
481
-
482
- return conversation_json
483
  except Exception as e:
484
  raise RuntimeError(f"Failed to extract conversation: {e}")
485
 
@@ -865,7 +789,6 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
865
  - **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
866
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
867
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
868
- - **ํ•œ๊ตญ์–ด ์ง€์›**: ์˜์–ด ๋Œ€ํ™” ์ƒ์„ฑ ํ›„ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ๊ตฌ์–ด์ฒด๋กœ ์ž๋™ ๋ฒˆ์—ญ
869
  """)
870
 
871
  with gr.Row():
@@ -881,7 +804,7 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
881
  choices=["English", "Korean"],
882
  value="English",
883
  label="Language / ์–ธ์–ด",
884
- info="Select output language / ์ถœ๋ ฅ ์–ธ์–ด๋ฅผ ์„ ํƒํ•˜์„ธ์š” (ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ ์ž๋™ ๋ฒˆ์—ญ)"
885
  )
886
 
887
  mode_selector = gr.Radio(
@@ -911,7 +834,6 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
911
 
912
  **ํ•œ๊ตญ์–ด ์ง€์›:**
913
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
914
- - ๐Ÿ“ ์˜์–ด๋กœ ๋Œ€ํ™” ์ƒ์„ฑ ํ›„ ์ž์—ฐ์Šค๋Ÿฌ์šด ๏ฟฝ๏ฟฝ๏ฟฝ๊ตญ์–ด ๊ตฌ์–ด์ฒด๋กœ ์ž๋™ ๋ฒˆ์—ญ๋ฉ๋‹ˆ๋‹ค
915
  """)
916
 
917
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
@@ -946,45 +868,12 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
946
  visible=True
947
  )
948
 
949
- # TTS ์—”์ง„๋ณ„ ์„ค๋ช… ๋ฐ ์„ค์น˜ ์•ˆ๋‚ด ์ถ”๊ฐ€
950
- with gr.Row():
951
- gr.Markdown("""
952
- ### TTS Engine Details / TTS ์—”์ง„ ์ƒ์„ธ์ •๋ณด:
953
-
954
- - **Edge-TTS**: Microsoft's cloud TTS service with high-quality natural voices. Requires internet connection.
955
- - ๐Ÿ‡ฐ๐Ÿ‡ท **ํ•œ๊ตญ์–ด ์ง€์›**: ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ์Œ์„ฑ (์—ฌ์„ฑ: SunHi, ๋‚จ์„ฑ: Hyunsu)
956
- - **Spark-TTS**: SparkAudio's local AI model (0.5B parameters) with zero-shot voice cloning capability.
957
- - **Setup required**: Clone [Spark-TTS repository](https://github.com/SparkAudio/Spark-TTS) in current directory
958
- - Features: Bilingual support (Chinese/English), controllable speech generation
959
- - License: CC BY-NC-SA (Non-commercial use only)
960
- - โš ๏ธ **ํ•œ๊ตญ์–ด ๋ฏธ์ง€์›**
961
- - **MeloTTS**: Local TTS with multiple voice options. GPU recommended for better performance.
962
- - โš ๏ธ **ํ•œ๊ตญ์–ด ๋ฏธ์ง€์›**
963
-
964
- ### Local LLM Setup / ๋กœ์ปฌ LLM ์„ค์ •:
965
- The system now uses **Private-BitSix-Mistral-Small-3.1-24B-Instruct** as the primary LLM, which runs locally on your device for privacy and independence. API fallback is available when needed.
966
-
967
- ๋กœ์ปฌ ๋””๋ฐ”์ด์Šค์—์„œ ๊ฐœ์ธ์ •๋ณด ๋ณดํ˜ธ์™€ ๋…๋ฆฝ์„ฑ์„ ์œ„ํ•ด **Private-BitSix-Mistral-Small-3.1-24B-Instruct**๋ฅผ ๊ธฐ๋ณธ LLM์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. ํ•„์š”์‹œ API ํด๋ฐฑ์ด ์ œ๊ณต๋ฉ๋‹ˆ๋‹ค.
968
-
969
- ### ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ ๊ธฐ๋Šฅ / Korean Translation Feature:
970
- - ํ•œ๊ตญ์–ด๋ฅผ ์„ ํƒํ•˜๋ฉด ์˜์–ด๋กœ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•œ ํ›„ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ๊ตฌ์–ด์ฒด๋กœ ์ž๋™ ๋ฒˆ์—ญ๋ฉ๋‹ˆ๋‹ค
971
- - ํ•œ๊ตญ์  ํ‘œํ˜„๊ณผ ์นœ๊ทผํ•œ ๋งํˆฌ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ž์—ฐ์Šค๋Ÿฌ์šด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค
972
- - When Korean is selected, conversations are generated in English first, then automatically translated to natural Korean colloquial style
973
-
974
- ### Spark-TTS Setup Instructions:
975
- ```bash
976
- git clone https://github.com/SparkAudio/Spark-TTS.git
977
- cd Spark-TTS
978
- pip install -r requirements.txt
979
- ```
980
- """)
981
 
982
  gr.Examples(
983
  examples=[
984
  ["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
985
- ["https://arxiv.org/html/2505.16938v1", "Local", "Edge-TTS", "English"],
986
- ["https://domeggook.com/55204552?from=popular100", "Local", "Edge-TTS", "Korean"],
987
- ["https://www.wsj.com/articles/nvidia-pushes-further-into-cloud-with-gpu-marketplace-4fba6bdd","Local", "Edge-TTS", "English"],
988
  ],
989
  inputs=[url_input, mode_selector, tts_selector, language_selector],
990
  outputs=[conversation_output, status_output],
 
249
  {"role": "user", "content": self._build_prompt(text, language)}
250
  ]
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  @spaces.GPU(duration=120)
253
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
254
  """Extract conversation using new local LLM (primary method)"""
 
259
  chat_template = self._get_messages_formatter_type(self.config.local_model_name)
260
  provider = LlamaCppPythonProvider(self.local_llm)
261
 
262
+ # ์–ธ์–ด๋ณ„ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
263
+ if language == "Korean":
264
+ system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”."
265
+ else:
266
+ system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations. Respond only in JSON format."
267
 
268
  agent = LlamaCppAgent(
269
  provider,
 
282
 
283
  messages = BasicChatHistory()
284
 
285
+ prompt = self._build_prompt(text, language)
 
286
  response = agent.get_chat_response(
287
  prompt,
288
  llm_sampling_settings=settings,
 
296
  json_match = re.search(pattern, response)
297
 
298
  if json_match:
299
+ return json.loads(json_match.group())
 
 
 
 
 
 
 
300
  else:
301
  raise ValueError("No valid JSON found in local LLM response")
302
 
 
310
  try:
311
  self.initialize_legacy_local_mode()
312
 
313
+ # ์–ธ์–ด๋ณ„ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
314
+ if language == "Korean":
315
+ system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”."
316
+ else:
317
+ system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
318
 
319
  chat = [
320
  {"role": "system", "content": system_message},
321
+ {"role": "user", "content": self._build_prompt(text, language)}
322
  ]
323
 
324
  terminators = [
 
355
  json_match = re.search(pattern, partial_text)
356
 
357
  if json_match:
358
+ return json.loads(json_match.group())
 
 
 
 
 
 
 
359
  else:
360
  raise ValueError("No valid JSON found in legacy local response")
361
 
 
383
  raise RuntimeError("API mode not initialized")
384
 
385
  try:
386
+ # ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
387
+ if language == "Korean":
388
+ system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”."
389
+ else:
390
+ system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
391
 
392
  chat_completion = self.llm_client.chat.completions.create(
393
  messages=[
394
  {"role": "system", "content": system_message},
395
+ {"role": "user", "content": self._build_prompt(text, language)}
396
  ],
397
  model=self.config.api_model_name,
398
  )
 
403
  if not json_match:
404
  raise ValueError("No valid JSON found in response")
405
 
406
+ return json.loads(json_match.group())
 
 
 
 
 
 
 
407
  except Exception as e:
408
  raise RuntimeError(f"Failed to extract conversation: {e}")
409
 
 
789
  - **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
790
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
791
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
 
792
  """)
793
 
794
  with gr.Row():
 
804
  choices=["English", "Korean"],
805
  value="English",
806
  label="Language / ์–ธ์–ด",
807
+ info="Select output language / ์ถœ๋ ฅ ์–ธ์–ด๋ฅผ ์„ ํƒํ•˜์„ธ์š”"
808
  )
809
 
810
  mode_selector = gr.Radio(
 
834
 
835
  **ํ•œ๊ตญ์–ด ์ง€์›:**
836
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
 
837
  """)
838
 
839
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
 
868
  visible=True
869
  )
870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871
 
872
  gr.Examples(
873
  examples=[
874
  ["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
875
+ ["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
876
+ ["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
 
877
  ],
878
  inputs=[url_input, mode_selector, tts_selector, language_selector],
879
  outputs=[conversation_output, status_output],