Spaces:
Running
on
Zero
Running
on
Zero
Update app-backup.py
Browse files- app-backup.py +24 -135
app-backup.py
CHANGED
@@ -249,69 +249,6 @@ class UnifiedAudioConverter:
|
|
249 |
{"role": "user", "content": self._build_prompt(text, language)}
|
250 |
]
|
251 |
|
252 |
-
@spaces.GPU(duration=120)
|
253 |
-
def translate_to_korean(self, conversation_json: Dict) -> Dict:
|
254 |
-
"""Translate English conversation to Korean using local LLM"""
|
255 |
-
try:
|
256 |
-
self.initialize_local_mode()
|
257 |
-
|
258 |
-
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
259 |
-
provider = LlamaCppPythonProvider(self.local_llm)
|
260 |
-
|
261 |
-
system_message = """๋น์ ์ ์ ๋ฌธ ๋ฒ์ญ๊ฐ์
๋๋ค. ์์ด ํ์บ์คํธ ๋ํ๋ฅผ ์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด ๊ตฌ์ด์ฒด๋ก ๋ฒ์ญํด์ฃผ์ธ์.
|
262 |
-
ํ๊ตญ์ ํํ์ ์ฌ์ฉํ๊ณ , ํ์บ์คํธ์ ์ ํฉํ ์น๊ทผํ๊ณ ์์ฐ์ค๋ฌ์ด ๋งํฌ๋ก ๋ฒ์ญํ์ธ์.
|
263 |
-
์ ๋ฌธ ์ฉ์ด๋ ํ๊ตญ์ด๋ก ์ ์ ํ ์์ญํ๊ฑฐ๋ ์ค๋ช
์ ์ถ๊ฐํ์ธ์. JSON ํ์์ ์ ์งํ์ธ์."""
|
264 |
-
|
265 |
-
agent = LlamaCppAgent(
|
266 |
-
provider,
|
267 |
-
system_prompt=system_message,
|
268 |
-
predefined_messages_formatter_type=chat_template,
|
269 |
-
debug_output=False
|
270 |
-
)
|
271 |
-
|
272 |
-
settings = provider.get_provider_default_settings()
|
273 |
-
settings.temperature = 0.7
|
274 |
-
settings.top_k = 40
|
275 |
-
settings.top_p = 0.95
|
276 |
-
settings.max_tokens = 2048
|
277 |
-
settings.repeat_penalty = 1.1
|
278 |
-
settings.stream = False
|
279 |
-
|
280 |
-
messages = BasicChatHistory()
|
281 |
-
|
282 |
-
# ๋ฒ์ญํ ๋ํ๋ฅผ JSON ๋ฌธ์์ด๋ก ๋ณํ
|
283 |
-
conversation_str = json.dumps(conversation_json, ensure_ascii=False, indent=2)
|
284 |
-
|
285 |
-
prompt = f"""๋ค์ ์์ด ํ์บ์คํธ ๋ํ๋ฅผ ์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด ๊ตฌ์ด์ฒด๋ก ๋ฒ์ญํด์ฃผ์ธ์.
|
286 |
-
ํ๊ตญ์ ํํ๊ณผ ์น๊ทผํ ๋งํฌ๋ฅผ ์ฌ์ฉํ๊ณ , JSON ํ์์ ๊ทธ๋๋ก ์ ์งํ์ธ์:
|
287 |
-
|
288 |
-
{conversation_str}
|
289 |
-
|
290 |
-
๋ฒ์ญ๋ ๊ฒฐ๊ณผ๋ฅผ JSON ํ์์ผ๋ก๋ง ๋ฐํํด์ฃผ์ธ์."""
|
291 |
-
|
292 |
-
response = agent.get_chat_response(
|
293 |
-
prompt,
|
294 |
-
llm_sampling_settings=settings,
|
295 |
-
chat_history=messages,
|
296 |
-
returns_streaming_generator=False,
|
297 |
-
print_output=False
|
298 |
-
)
|
299 |
-
|
300 |
-
# JSON ํ์ฑ
|
301 |
-
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
302 |
-
json_match = re.search(pattern, response)
|
303 |
-
|
304 |
-
if json_match:
|
305 |
-
translated_json = json.loads(json_match.group())
|
306 |
-
return translated_json
|
307 |
-
else:
|
308 |
-
print("๋ฒ์ญ ์คํจ, ์๋ณธ ๋ฐํ")
|
309 |
-
return conversation_json
|
310 |
-
|
311 |
-
except Exception as e:
|
312 |
-
print(f"๋ฒ์ญ ์ค ์ค๋ฅ ๋ฐ์: {e}, ์๋ณธ ๋ฐํ")
|
313 |
-
return conversation_json
|
314 |
-
|
315 |
@spaces.GPU(duration=120)
|
316 |
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
317 |
"""Extract conversation using new local LLM (primary method)"""
|
@@ -322,8 +259,11 @@ class UnifiedAudioConverter:
|
|
322 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
323 |
provider = LlamaCppPythonProvider(self.local_llm)
|
324 |
|
325 |
-
#
|
326 |
-
|
|
|
|
|
|
|
327 |
|
328 |
agent = LlamaCppAgent(
|
329 |
provider,
|
@@ -342,8 +282,7 @@ class UnifiedAudioConverter:
|
|
342 |
|
343 |
messages = BasicChatHistory()
|
344 |
|
345 |
-
|
346 |
-
prompt = self._build_prompt(text, "English")
|
347 |
response = agent.get_chat_response(
|
348 |
prompt,
|
349 |
llm_sampling_settings=settings,
|
@@ -357,14 +296,7 @@ class UnifiedAudioConverter:
|
|
357 |
json_match = re.search(pattern, response)
|
358 |
|
359 |
if json_match:
|
360 |
-
|
361 |
-
|
362 |
-
# ํ๊ตญ์ด๊ฐ ์ ํ๋ ๊ฒฝ์ฐ ๋ฒ์ญ ์ํ
|
363 |
-
if language == "Korean":
|
364 |
-
print("ํ๊ตญ์ด ๋ฒ์ญ ์ค...")
|
365 |
-
conversation_json = self.translate_to_korean(conversation_json)
|
366 |
-
|
367 |
-
return conversation_json
|
368 |
else:
|
369 |
raise ValueError("No valid JSON found in local LLM response")
|
370 |
|
@@ -378,12 +310,15 @@ class UnifiedAudioConverter:
|
|
378 |
try:
|
379 |
self.initialize_legacy_local_mode()
|
380 |
|
381 |
-
#
|
382 |
-
|
|
|
|
|
|
|
383 |
|
384 |
chat = [
|
385 |
{"role": "system", "content": system_message},
|
386 |
-
{"role": "user", "content": self._build_prompt(text,
|
387 |
]
|
388 |
|
389 |
terminators = [
|
@@ -420,14 +355,7 @@ class UnifiedAudioConverter:
|
|
420 |
json_match = re.search(pattern, partial_text)
|
421 |
|
422 |
if json_match:
|
423 |
-
|
424 |
-
|
425 |
-
# ํ๊ตญ์ด๊ฐ ์ ํ๋ ๊ฒฝ์ฐ ๋ฒ์ญ ์ํ
|
426 |
-
if language == "Korean":
|
427 |
-
print("ํ๊ตญ์ด ๋ฒ์ญ ์ค...")
|
428 |
-
conversation_json = self.translate_to_korean(conversation_json)
|
429 |
-
|
430 |
-
return conversation_json
|
431 |
else:
|
432 |
raise ValueError("No valid JSON found in legacy local response")
|
433 |
|
@@ -455,13 +383,16 @@ class UnifiedAudioConverter:
|
|
455 |
raise RuntimeError("API mode not initialized")
|
456 |
|
457 |
try:
|
458 |
-
#
|
459 |
-
|
|
|
|
|
|
|
460 |
|
461 |
chat_completion = self.llm_client.chat.completions.create(
|
462 |
messages=[
|
463 |
{"role": "system", "content": system_message},
|
464 |
-
{"role": "user", "content": self._build_prompt(text,
|
465 |
],
|
466 |
model=self.config.api_model_name,
|
467 |
)
|
@@ -472,14 +403,7 @@ class UnifiedAudioConverter:
|
|
472 |
if not json_match:
|
473 |
raise ValueError("No valid JSON found in response")
|
474 |
|
475 |
-
|
476 |
-
|
477 |
-
# ํ๊ตญ์ด๊ฐ ์ ํ๋ ๊ฒฝ์ฐ ๋ฒ์ญ ์ํ
|
478 |
-
if language == "Korean":
|
479 |
-
print("ํ๊ตญ์ด ๋ฒ์ญ ์ค...")
|
480 |
-
conversation_json = self.translate_to_korean(conversation_json)
|
481 |
-
|
482 |
-
return conversation_json
|
483 |
except Exception as e:
|
484 |
raise RuntimeError(f"Failed to extract conversation: {e}")
|
485 |
|
@@ -865,7 +789,6 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
865 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
866 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
867 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
868 |
-
- **ํ๊ตญ์ด ์ง์**: ์์ด ๋ํ ์์ฑ ํ ์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด ๊ตฌ์ด์ฒด๋ก ์๋ ๋ฒ์ญ
|
869 |
""")
|
870 |
|
871 |
with gr.Row():
|
@@ -881,7 +804,7 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
881 |
choices=["English", "Korean"],
|
882 |
value="English",
|
883 |
label="Language / ์ธ์ด",
|
884 |
-
info="Select output language / ์ถ๋ ฅ ์ธ์ด๋ฅผ ์ ํํ์ธ์
|
885 |
)
|
886 |
|
887 |
mode_selector = gr.Radio(
|
@@ -911,7 +834,6 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
911 |
|
912 |
**ํ๊ตญ์ด ์ง์:**
|
913 |
- ๐ฐ๐ท ํ๊ตญ์ด ์ ํ ์ Edge-TTS๋ง ์ฌ์ฉ ๊ฐ๋ฅํฉ๋๋ค
|
914 |
-
- ๐ ์์ด๋ก ๋ํ ์์ฑ ํ ์์ฐ์ค๋ฌ์ด ๏ฟฝ๏ฟฝ๏ฟฝ๊ตญ์ด ๊ตฌ์ด์ฒด๋ก ์๋ ๋ฒ์ญ๋ฉ๋๋ค
|
915 |
""")
|
916 |
|
917 |
convert_btn = gr.Button("๐ฏ Generate Conversation / ๋ํ ์์ฑ", variant="primary", size="lg")
|
@@ -946,45 +868,12 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
946 |
visible=True
|
947 |
)
|
948 |
|
949 |
-
# TTS ์์ง๋ณ ์ค๋ช
๋ฐ ์ค์น ์๋ด ์ถ๊ฐ
|
950 |
-
with gr.Row():
|
951 |
-
gr.Markdown("""
|
952 |
-
### TTS Engine Details / TTS ์์ง ์์ธ์ ๋ณด:
|
953 |
-
|
954 |
-
- **Edge-TTS**: Microsoft's cloud TTS service with high-quality natural voices. Requires internet connection.
|
955 |
-
- ๐ฐ๐ท **ํ๊ตญ์ด ์ง์**: ์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด ์์ฑ (์ฌ์ฑ: SunHi, ๋จ์ฑ: Hyunsu)
|
956 |
-
- **Spark-TTS**: SparkAudio's local AI model (0.5B parameters) with zero-shot voice cloning capability.
|
957 |
-
- **Setup required**: Clone [Spark-TTS repository](https://github.com/SparkAudio/Spark-TTS) in current directory
|
958 |
-
- Features: Bilingual support (Chinese/English), controllable speech generation
|
959 |
-
- License: CC BY-NC-SA (Non-commercial use only)
|
960 |
-
- โ ๏ธ **ํ๊ตญ์ด ๋ฏธ์ง์**
|
961 |
-
- **MeloTTS**: Local TTS with multiple voice options. GPU recommended for better performance.
|
962 |
-
- โ ๏ธ **ํ๊ตญ์ด ๋ฏธ์ง์**
|
963 |
-
|
964 |
-
### Local LLM Setup / ๋ก์ปฌ LLM ์ค์ :
|
965 |
-
The system now uses **Private-BitSix-Mistral-Small-3.1-24B-Instruct** as the primary LLM, which runs locally on your device for privacy and independence. API fallback is available when needed.
|
966 |
-
|
967 |
-
๋ก์ปฌ ๋๋ฐ์ด์ค์์ ๊ฐ์ธ์ ๋ณด ๋ณดํธ์ ๋
๋ฆฝ์ฑ์ ์ํด **Private-BitSix-Mistral-Small-3.1-24B-Instruct**๋ฅผ ๊ธฐ๋ณธ LLM์ผ๋ก ์ฌ์ฉํฉ๋๋ค. ํ์์ API ํด๋ฐฑ์ด ์ ๊ณต๋ฉ๋๋ค.
|
968 |
-
|
969 |
-
### ํ๊ตญ์ด ๋ฒ์ญ ๊ธฐ๋ฅ / Korean Translation Feature:
|
970 |
-
- ํ๊ตญ์ด๋ฅผ ์ ํํ๋ฉด ์์ด๋ก ๋ํ๋ฅผ ์์ฑํ ํ ์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด ๊ตฌ์ด์ฒด๋ก ์๋ ๋ฒ์ญ๋ฉ๋๋ค
|
971 |
-
- ํ๊ตญ์ ํํ๊ณผ ์น๊ทผํ ๋งํฌ๋ฅผ ์ฌ์ฉํ์ฌ ์์ฐ์ค๋ฌ์ด ํ์บ์คํธ ๋ํ๋ฅผ ๋ง๋ญ๋๋ค
|
972 |
-
- When Korean is selected, conversations are generated in English first, then automatically translated to natural Korean colloquial style
|
973 |
-
|
974 |
-
### Spark-TTS Setup Instructions:
|
975 |
-
```bash
|
976 |
-
git clone https://github.com/SparkAudio/Spark-TTS.git
|
977 |
-
cd Spark-TTS
|
978 |
-
pip install -r requirements.txt
|
979 |
-
```
|
980 |
-
""")
|
981 |
|
982 |
gr.Examples(
|
983 |
examples=[
|
984 |
["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
|
985 |
-
["https://
|
986 |
-
["https://
|
987 |
-
["https://www.wsj.com/articles/nvidia-pushes-further-into-cloud-with-gpu-marketplace-4fba6bdd","Local", "Edge-TTS", "English"],
|
988 |
],
|
989 |
inputs=[url_input, mode_selector, tts_selector, language_selector],
|
990 |
outputs=[conversation_output, status_output],
|
|
|
249 |
{"role": "user", "content": self._build_prompt(text, language)}
|
250 |
]
|
251 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
@spaces.GPU(duration=120)
|
253 |
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
254 |
"""Extract conversation using new local LLM (primary method)"""
|
|
|
259 |
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
260 |
provider = LlamaCppPythonProvider(self.local_llm)
|
261 |
|
262 |
+
# ์ธ์ด๋ณ ์์คํ
๋ฉ์์ง
|
263 |
+
if language == "Korean":
|
264 |
+
system_message = "๋น์ ์ ํ๊ตญ์ด๋ก ํ์บ์คํธ ๋ํ๋ฅผ ์์ฑํ๋ ์ ๋ฌธ๊ฐ์
๋๋ค. ์์ฐ์ค๋ฝ๊ณ ์ ์ตํ ํ๊ตญ์ด ๋ํ๋ฅผ ๋ง๋ค์ด์ฃผ์ธ์. JSON ํ์์ผ๋ก๋ง ์๋ตํ์ธ์."
|
265 |
+
else:
|
266 |
+
system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations. Respond only in JSON format."
|
267 |
|
268 |
agent = LlamaCppAgent(
|
269 |
provider,
|
|
|
282 |
|
283 |
messages = BasicChatHistory()
|
284 |
|
285 |
+
prompt = self._build_prompt(text, language)
|
|
|
286 |
response = agent.get_chat_response(
|
287 |
prompt,
|
288 |
llm_sampling_settings=settings,
|
|
|
296 |
json_match = re.search(pattern, response)
|
297 |
|
298 |
if json_match:
|
299 |
+
return json.loads(json_match.group())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
else:
|
301 |
raise ValueError("No valid JSON found in local LLM response")
|
302 |
|
|
|
310 |
try:
|
311 |
self.initialize_legacy_local_mode()
|
312 |
|
313 |
+
# ์ธ์ด๋ณ ์์คํ
๋ฉ์์ง
|
314 |
+
if language == "Korean":
|
315 |
+
system_message = "๋น์ ์ ํ๊ตญ์ด๋ก ํ์บ์คํธ ๋ํ๋ฅผ ์์ฑํ๋ ์ ๋ฌธ๊ฐ์
๋๋ค. ์์ฐ์ค๋ฝ๊ณ ์ ์ตํ ํ๊ตญ์ด ๋ํ๋ฅผ ๋ง๋ค์ด์ฃผ์ธ์."
|
316 |
+
else:
|
317 |
+
system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
|
318 |
|
319 |
chat = [
|
320 |
{"role": "system", "content": system_message},
|
321 |
+
{"role": "user", "content": self._build_prompt(text, language)}
|
322 |
]
|
323 |
|
324 |
terminators = [
|
|
|
355 |
json_match = re.search(pattern, partial_text)
|
356 |
|
357 |
if json_match:
|
358 |
+
return json.loads(json_match.group())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
else:
|
360 |
raise ValueError("No valid JSON found in legacy local response")
|
361 |
|
|
|
383 |
raise RuntimeError("API mode not initialized")
|
384 |
|
385 |
try:
|
386 |
+
# ์ธ์ด๋ณ ํ๋กฌํํธ ๊ตฌ์ฑ
|
387 |
+
if language == "Korean":
|
388 |
+
system_message = "๋น์ ์ ํ๊ตญ์ด๋ก ํ์บ์คํธ ๋ํ๋ฅผ ์์ฑํ๋ ์ ๋ฌธ๊ฐ์
๋๋ค. ์์ฐ์ค๋ฝ๊ณ ์ ์ตํ ํ๊ตญ์ด ๋ํ๋ฅผ ๋ง๋ค์ด์ฃผ์ธ์."
|
389 |
+
else:
|
390 |
+
system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
|
391 |
|
392 |
chat_completion = self.llm_client.chat.completions.create(
|
393 |
messages=[
|
394 |
{"role": "system", "content": system_message},
|
395 |
+
{"role": "user", "content": self._build_prompt(text, language)}
|
396 |
],
|
397 |
model=self.config.api_model_name,
|
398 |
)
|
|
|
403 |
if not json_match:
|
404 |
raise ValueError("No valid JSON found in response")
|
405 |
|
406 |
+
return json.loads(json_match.group())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
except Exception as e:
|
408 |
raise RuntimeError(f"Failed to extract conversation: {e}")
|
409 |
|
|
|
789 |
- **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
|
790 |
- **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
|
791 |
- **Status**: {"โ
Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โ Llama CPP Not Available - Install llama-cpp-python"}
|
|
|
792 |
""")
|
793 |
|
794 |
with gr.Row():
|
|
|
804 |
choices=["English", "Korean"],
|
805 |
value="English",
|
806 |
label="Language / ์ธ์ด",
|
807 |
+
info="Select output language / ์ถ๋ ฅ ์ธ์ด๋ฅผ ์ ํํ์ธ์"
|
808 |
)
|
809 |
|
810 |
mode_selector = gr.Radio(
|
|
|
834 |
|
835 |
**ํ๊ตญ์ด ์ง์:**
|
836 |
- ๐ฐ๐ท ํ๊ตญ์ด ์ ํ ์ Edge-TTS๋ง ์ฌ์ฉ ๊ฐ๋ฅํฉ๋๋ค
|
|
|
837 |
""")
|
838 |
|
839 |
convert_btn = gr.Button("๐ฏ Generate Conversation / ๋ํ ์์ฑ", variant="primary", size="lg")
|
|
|
868 |
visible=True
|
869 |
)
|
870 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
871 |
|
872 |
gr.Examples(
|
873 |
examples=[
|
874 |
["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
|
875 |
+
["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
|
876 |
+
["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
|
|
|
877 |
],
|
878 |
inputs=[url_input, mode_selector, tts_selector, language_selector],
|
879 |
outputs=[conversation_output, status_output],
|