Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -17,9 +17,19 @@ from bs4 import BeautifulSoup
|
|
17 |
from urllib.parse import urlparse
|
18 |
import urllib.request
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# νκ²½ λ³μμμ ν ν° κ°μ Έμ€κΈ°
|
21 |
FRIENDLI_TOKEN = os.getenv("FRIENDLI_TOKEN", "YOUR_FRIENDLI_TOKEN")
|
22 |
BAPI_TOKEN = os.getenv("BAPI_TOKEN", "YOUR_BRAVE_API_TOKEN")
|
|
|
23 |
API_URL = "https://api.friendli.ai/dedicated/v1/chat/completions"
|
24 |
BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
|
25 |
MODEL_ID = "dep89a2fld32mcm"
|
@@ -32,15 +42,31 @@ class LLMCollaborativeSystem:
|
|
32 |
def __init__(self):
|
33 |
self.token = FRIENDLI_TOKEN
|
34 |
self.bapi_token = BAPI_TOKEN
|
|
|
35 |
self.api_url = API_URL
|
36 |
self.brave_url = BRAVE_SEARCH_URL
|
37 |
self.model_id = MODEL_ID
|
38 |
self.test_mode = TEST_MODE or (self.token == "YOUR_FRIENDLI_TOKEN")
|
|
|
|
|
39 |
|
40 |
if self.test_mode:
|
41 |
logger.warning("ν
μ€νΈ λͺ¨λλ‘ μ€νλ©λλ€.")
|
42 |
if self.bapi_token == "YOUR_BRAVE_API_TOKEN":
|
43 |
logger.warning("Brave API ν ν°μ΄ μ€μ λμ§ μμμ΅λλ€.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def create_headers(self):
|
46 |
"""API ν€λ μμ±"""
|
@@ -169,6 +195,33 @@ class LLMCollaborativeSystem:
|
|
169 |
4. λͺ
νν κ²°λ‘ κ³Ό λ€μ λ¨κ³λ₯Ό μ μνμΈμ
|
170 |
5. μ λ¬Έμ μ΄κ³ μμ±λ λμ μ΅μ’
λ³΄κ³ μ νμμΌλ‘ μμ±νμΈμ"""
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
def extract_keywords(self, supervisor_response: str) -> List[str]:
|
173 |
"""κ°λ
μ μλ΅μμ ν€μλ μΆμΆ"""
|
174 |
keywords = []
|
@@ -392,9 +445,71 @@ class LLMCollaborativeSystem:
|
|
392 |
yield chunk + " "
|
393 |
time.sleep(0.05)
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
def call_llm_streaming(self, messages: List[Dict[str, str]], role: str) -> Generator[str, None, None]:
|
396 |
"""μ€νΈλ¦¬λ° LLM API νΈμΆ"""
|
397 |
|
|
|
|
|
|
|
|
|
|
|
398 |
# ν
μ€νΈ λͺ¨λ
|
399 |
if self.test_mode:
|
400 |
logger.info(f"ν
μ€νΈ λͺ¨λ μ€νΈλ¦¬λ° - Role: {role}")
|
@@ -424,30 +539,21 @@ class LLMCollaborativeSystem:
|
|
424 |
- μ΅μ μ°κ΅¬μ λ°λ₯΄λ©΄ λͺ¨λΈ μ΅μ νμ ν΅μ¬μ μν€ν
μ² μ€κ³μ νλ ¨ μ λ΅μ κ· νμ
λλ€ (μ λ’°λ: 0.85)
|
425 |
- AutoML λꡬλ€μ΄ νμ΄νΌνλΌλ―Έν° νλμ μλννμ¬ ν¨μ¨μ±μ ν¬κ² ν₯μμν΅λλ€ (μ λ’°λ: 0.82)
|
426 |
- μΆμ²: ML Conference 2024 (https://mlconf2024.org), Google Research (https://research.google)
|
427 |
-
- λμμ΄ κ²μ κ²°κ³Ό: "ML improvement", "AI optimization"μμ μΆκ° μ 보 νμΈ
|
428 |
|
429 |
**2. Performance Improvement Strategies (μ λ’°λ λμ)**
|
430 |
- λ°μ΄ν° νμ§ κ°μ μ΄ λͺ¨λΈ μ±λ₯ ν₯μμ 80%λ₯Ό μ°¨μ§νλ€λ μ°κ΅¬ κ²°κ³Ό (μ λ’°λ: 0.90)
|
431 |
- μμλΈ κΈ°λ²κ³Ό μ μ΄νμ΅μ΄ μ£Όμ μ±λ₯ κ°μ λ°©λ²μΌλ‘ μ
μ¦λ¨ (μ λ’°λ: 0.78)
|
432 |
- μΆμ²: Stanford AI Lab (https://ai.stanford.edu), MIT CSAIL (https://csail.mit.edu)
|
433 |
-
- λ²€μΉλ§ν¬: ImageNetμμ 95% μ΄μμ μ νλ λ¬μ± μ¬λ‘
|
434 |
|
435 |
**3. Model Efficiency Techniques (μ λ’°λ μ€κ°)**
|
436 |
- λͺ¨λΈ κ²½λν(Pruning, Quantization)λ‘ μΆλ‘ μλ 10λ°° ν₯μ κ°λ₯ (μ λ’°λ: 0.75)
|
437 |
- Knowledge DistillationμΌλ‘ λͺ¨λΈ ν¬κΈ° 90% κ°μ, μ±λ₯ μ μ§ (μ λ’°λ: 0.72)
|
438 |
- μΆμ²: ArXiv λ
Όλ¬Έ (https://arxiv.org/abs/2023.xxxxx)
|
439 |
-
- β οΈ μ£Όμ: μΌλΆ μμ€λ 5-7λ°° ν₯μλ§μ λ³΄κ³ νμ¬ μ 보 μμΆ© μ‘΄μ¬
|
440 |
|
441 |
**4. μ€μ μ μ© μ¬λ‘ (μ λ’°λ λμ)**
|
442 |
- Netflix: μΆμ² μμ€ν
κ°μ μΌλ‘ μ¬μ©μ λ§μ‘±λ 35% ν₯μ (μ λ’°λ: 0.88)
|
443 |
- Tesla: μ€μκ° κ°μ²΄ μΈμ μλ 50% κ°μ (μ λ’°λ: 0.80)
|
444 |
- OpenAI: GPT λͺ¨λΈ ν¨μ¨μ± κ°μ μΌλ‘ λΉμ© 70% μ κ° (μ λ’°λ: 0.85)
|
445 |
-
- μΆμ²: κ° κΈ°μ
곡μ λΈλ‘κ·Έ λ° κΈ°μ λ°ν μλ£
|
446 |
-
|
447 |
-
**λ°κ²¬λ μ 보 λͺ¨μ:**
|
448 |
-
1. λͺ¨λΈ μμΆλ₯ : μΌλΆλ 90% μμΆ κ°λ₯νλ€κ³ μ£Όμ₯νλ, λ€λ₯Έ μμ€λ 70%κ° νκ³λΌκ³ λͺ
μ
|
449 |
-
2. μ±λ₯ ν₯μ ν: 10λ°° vs 5-7λ°° ν₯μμ λν μ견 μ°¨μ΄ μ‘΄μ¬
|
450 |
-
3. κΆμ₯μ¬ν: μ€μ μ μ© μ 보μμ μΈ μμΉ(5-7λ°°)λ₯Ό κΈ°μ€μΌλ‘ κ³ν μ립 κΆμ₯
|
451 |
|
452 |
**ν΅μ¬ μΈμ¬μ΄νΈ:**
|
453 |
- μ΅μ νΈλ λλ ν¨μ¨μ±κ³Ό μ±λ₯μ κ· νμ μ΄μ
|
@@ -497,27 +603,7 @@ class LLMCollaborativeSystem:
|
|
497 |
- Knowledge Distillation ꡬν
|
498 |
* Teacher λͺ¨λΈ: νμ¬ λκ·λͺ¨ λͺ¨λΈ
|
499 |
* Student λͺ¨λΈ: 90% μμ ν¬κΈ° λͺ©ν (μ‘°μ¬ κ²°κ³Ό κΈ°λ°)
|
500 |
-
* ꡬν νλ μμν¬: PyTorch/TensorFlow
|
501 |
-
- Pruning λ° Quantization μ μ©
|
502 |
-
* ꡬ쑰μ pruningμΌλ‘ 50% νλΌλ―Έν° μ κ±°
|
503 |
-
* INT8 quantizationμΌλ‘ μΆκ° 4λ°° μλ ν₯μ
|
504 |
-
* Tesla μ¬λ‘ μ°Έκ³ : TensorRT μ΅μ ν μ μ©
|
505 |
-
|
506 |
-
**4λ¨κ³: μ±κ³Ό κ²μ¦ λ° λ°°ν¬ (7-8μ£Όμ°¨)**
|
507 |
-
- μ±κ³Ό μ§ν μΈ‘μ
|
508 |
-
* μΆλ‘ μλ: λͺ©ν 10λ°° ν₯μ (μ‘°μ¬ κ²°κ³Ό κΈ°λ°)
|
509 |
-
* μ νλ μμ€: μ΅λ 2% μ΄λ΄ μ μ§
|
510 |
-
* λΉμ© μ κ°: 70% λͺ©ν (OpenAI μ¬λ‘ μ°Έκ³ )
|
511 |
-
- λ°°ν¬ μ λ΅
|
512 |
-
* A/B ν
μ€νΈ: 10% νΈλν½μΌλ‘ μμ
|
513 |
-
* λͺ¨λν°λ§: Prometheus + Grafana λμ보λ
|
514 |
-
* λ‘€λ°± κ³ν: μ±λ₯ μ ν μ μλ λ‘€λ°±
|
515 |
-
|
516 |
-
**μμ κ²°κ³Όλ¬Ό**
|
517 |
-
- μ΅μ νλ λͺ¨λΈ (ν¬κΈ° 90% κ°μ, μλ 10λ°° ν₯μ)
|
518 |
-
- μμΈ μ±λ₯ λ²€μΉλ§ν¬ λ³΄κ³ μ
|
519 |
-
- νλ‘λμ
λ°°ν¬ κ°μ΄λ λ° λͺ¨λν°λ§ λμ보λ
|
520 |
-
- μ¬ν κ°λ₯ν μ΅μ ν νμ΄νλΌμΈ μ½λ""",
|
521 |
|
522 |
"supervisor_review": """μ€νμ AIμ κ³νμ κ²ν ν κ²°κ³Ό, μ‘°μ¬ λ΄μ©μ΄ μ λ°μλμμ΅λλ€. λ€μκ³Ό κ°μ κ°μ μ¬νμ μ μν©λλ€.
|
523 |
|
@@ -534,16 +620,10 @@ class LLMCollaborativeSystem:
|
|
534 |
2. **λΉμ© λΆμ ꡬ체ν**
|
535 |
- OpenAI μ¬λ‘μ 70% μ κ°μ μν ꡬ체μ μΈ λΉμ© κ³μ°
|
536 |
- ROI λΆμ λ° ν¬μ λλΉ ν¨κ³Ό μΈ‘μ λ°©λ²
|
537 |
-
|
538 |
-
3. **ν νμ
체κ³ν**
|
539 |
-
- λ°μ΄ν° κ³Όνμ, ML μμ§λμ΄, DevOps κ° μν λΆλ΄ λͺ
νν
|
540 |
-
- μ£Όκ° μ§ν μν© κ³΅μ λ° μ΄μ νΈλνΉ νλ‘μΈμ€
|
541 |
|
542 |
**μΆκ° κΆμ₯μ¬ν**
|
543 |
- μ΅μ μ°κ΅¬ λν₯ λͺ¨λν°λ§ μ²΄κ³ κ΅¬μΆ
|
544 |
-
- κ²½μμ¬ λ²€μΉλ§νΉμ μν μ κΈ°μ μΈ μ‘°μ¬ νλ‘μΈμ€
|
545 |
-
- λ΄λΆ μ§μ 곡μ λ₯Ό μν λ¬Έμν λ° μΈλ―Έλ κ³ν
|
546 |
-
- μ€ν¨ μ¬λ‘μμ λ°°μ΄ κ΅νμ μΆμ νλ μμ€ν
ꡬμΆ""",
|
547 |
|
548 |
"executor_final": """κ°λ
μ AIμ νΌλλ°±μ μμ ν λ°μνμ¬ μ΅μ’
μ€ν λ³΄κ³ μλ₯Ό μμ±ν©λλ€.
|
549 |
|
@@ -558,158 +638,98 @@ class LLMCollaborativeSystem:
|
|
558 |
**μ-νμμΌ: μ±λ₯ λ©νΈλ¦ μμ§**
|
559 |
- MLflowλ₯Ό ν΅ν νμ¬ λͺ¨λΈ μ 체 λΆμ
|
560 |
- Netflix μ¬λ‘ κΈ°λ° ν΅μ¬ μ§ν: μ νλ(92%), μ§μ°μκ°(45ms), μ²λ¦¬λ(1,000 req/s)
|
561 |
-
- 리μμ€ μ¬μ©λ: GPU λ©λͺ¨λ¦¬ 8GB, μΆλ‘ μ CPU μ¬μ©λ₯ 85%
|
562 |
|
563 |
**μ-λͺ©μμΌ: AutoML μ΄κΈ° νμ**
|
564 |
- Optunaλ‘ νμ΄νΌνλΌλ―Έν° μ΅μ ν (200ν μλ)
|
565 |
- Ray TuneμΌλ‘ λΆμ° νμ΅ νκ²½ ꡬμΆ
|
566 |
-
- μ΄κΈ° κ°μ κ°λ₯μ±: 15-20% μ±λ₯ ν₯μ μμ
|
567 |
-
|
568 |
-
**κΈμμΌ: μ§λ¨ λ³΄κ³ μ λ° λ¦¬μ€ν¬ λΆμ**
|
569 |
-
- μ£Όμ λ³λͺ©: λͺ¨λΈ ν¬κΈ°(2.5GB), λ°°μΉ μ²λ¦¬ λΉν¨μ¨μ±
|
570 |
-
- 리μ€ν¬: λ°μ΄ν° λ리ννΈ, νλμ¨μ΄ μ μ½
|
571 |
-
- λ°±μ
κ³ν: ν΄λΌμ°λ GPU μΈμ€ν΄μ€ ν보
|
572 |
|
573 |
### μμ μ°μΆλ¬Ό
|
574 |
- μμΈ μ±λ₯ λ² μ΄μ€λΌμΈ λ¬Έμ
|
575 |
- κ°μ κΈ°ν μ°μ μμ λ§€νΈλ¦μ€
|
576 |
-
- 리μ€ν¬ λ μ§μ€ν° λ° λμ κ³ν
|
577 |
|
578 |
## π 2λ¨κ³: λ°μ΄ν° νμ§ κ°μ (2-3μ£Όμ°¨)
|
579 |
|
580 |
### μ€ν κ³ν
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
class DataQualityPipeline:
|
585 |
-
def __init__(self):
|
586 |
-
self.validators = [
|
587 |
-
MissingValueHandler(threshold=0.05),
|
588 |
-
OutlierDetector(method='isolation_forest'),
|
589 |
-
LabelConsistencyChecker(),
|
590 |
-
DataDriftMonitor()
|
591 |
-
]
|
592 |
-
|
593 |
-
def process(self, data):
|
594 |
-
# 80% κ·μΉ μ μ©: λ°μ΄ν° νμ§μ΄ μ±λ₯μ 80% κ²°μ
|
595 |
-
for validator in self.validators:
|
596 |
-
data = validator.transform(data)
|
597 |
-
self.log_metrics(validator.get_stats())
|
598 |
-
return data
|
599 |
-
```
|
600 |
-
|
601 |
-
**3μ£Όμ°¨: κ³ κΈ λ°μ΄ν° μ¦κ°**
|
602 |
-
- MixUp: 15% μ νλ ν₯μ μμ
|
603 |
-
- CutMix: κ²½κ³ κ²μΆ μ±λ₯ 20% κ°μ
|
604 |
-
- AutoAugment: μλ μ΅μ μ¦κ° μ μ±
νμ
|
605 |
-
- A/B ν
μ€νΈ: κ° κΈ°λ²λ³ ν¨κ³Ό μΈ‘μ
|
606 |
-
|
607 |
-
### 리μ€ν¬ λμ
|
608 |
-
- λ°μ΄ν° νμ§ μ ν μ: λ‘€λ°± λ©μ»€λμ¦ κ΅¬ν
|
609 |
-
- μ¦κ° κ³Όμ ν© λ°©μ§: κ²μ¦μ
λΆλ¦¬ λ° κ΅μ°¨ κ²μ¦
|
610 |
-
|
611 |
-
### μμ μ°μΆλ¬Ό
|
612 |
-
- μλνλ λ°μ΄ν° νμ§ νμ΄νλΌμΈ
|
613 |
-
- λ°μ΄ν° νμ§ λμ보λ (Grafana)
|
614 |
-
- 15% μ΄μ μ±λ₯ ν₯μ κ²μ¦ λ³΄κ³ μ
|
615 |
|
616 |
## π 3λ¨κ³: λͺ¨λΈ μ΅μ ν ꡬν (4-6μ£Όμ°¨)
|
617 |
|
618 |
### μ€ν κ³ν
|
619 |
-
|
620 |
-
-
|
621 |
-
-
|
622 |
-
* νλΌλ―Έν° μ: 250M β 25M (90% κ°μ)
|
623 |
-
* λ μ΄μ΄ μ: 24 β 6
|
624 |
-
* Hidden dimension: 1024 β 256
|
625 |
-
- νλ ¨ μ λ΅:
|
626 |
-
* Temperature: 5.0
|
627 |
-
* Alpha (KD loss weight): 0.7
|
628 |
-
* νλ ¨ μν: 50
|
629 |
-
|
630 |
-
**6μ£Όμ°¨: Pruning & Quantization**
|
631 |
-
- ꡬ쑰μ Pruning:
|
632 |
-
* Magnitude κΈ°λ° 50% μ±λ μ κ±°
|
633 |
-
* Fine-tuning: 10 μν
|
634 |
-
- INT8 Quantization:
|
635 |
-
* Post-training quantization
|
636 |
-
* Calibration dataset: 1,000 μν
|
637 |
-
- TensorRT μ΅μ ν (Tesla μ¬λ‘ μ μ©):
|
638 |
-
* FP16 μΆλ‘ νμ±ν
|
639 |
-
* λμ λ°°μΉ μ΅μ ν
|
640 |
-
|
641 |
-
### ν νμ
체κ³
|
642 |
-
- ML μμ§λμ΄: λͺ¨λΈ μν€ν
μ² λ° νλ ¨
|
643 |
-
- DevOps: μΈνλΌ λ° λ°°ν¬ νμ΄νλΌμΈ
|
644 |
-
- λ°μ΄ν° κ³Όνμ: μ±λ₯ λΆμ λ° κ²μ¦
|
645 |
-
- μ£Όκ° μ€ν λμ
λ―Έν
λ° Jira μ΄μ νΈλνΉ
|
646 |
-
|
647 |
-
### μμ μ°μΆλ¬Ό
|
648 |
-
- μ΅μ νλ λͺ¨λΈ 체ν¬ν¬μΈνΈ
|
649 |
-
- μ±λ₯ λ²€μΉλ§ν¬ μμΈ λ³΄κ³ μ
|
650 |
-
- λͺ¨λΈ λ³ν μλν μ€ν¬λ¦½νΈ
|
651 |
|
652 |
## π 4λ¨κ³: μ±κ³Ό κ²μ¦ λ° νλ‘λμ
λ°°ν¬ (7-8μ£Όμ°¨)
|
653 |
|
654 |
### μ€ν κ³ν
|
655 |
-
|
656 |
-
-
|
657 |
-
|
658 |
-
* λͺ¨λΈ ν¬κΈ°: 2.5GB β 250MB (90% κ°μ) β
|
659 |
-
* μ νλ μμ€: 92% β 90.5% (1.5% μμ€) β
|
660 |
-
- λΉμ© λΆμ:
|
661 |
-
* GPU μΈμ€ν΄μ€: $2,000/μ β $600/μ
|
662 |
-
* μ²λ¦¬λ μ¦κ°λ‘ μΈν μλ² μ κ°μ: 10λ β 3λ
|
663 |
-
* μ΄ λΉμ© μ κ°: 70% λ¬μ± β
|
664 |
-
|
665 |
-
**8μ£Όμ°¨: λ¨κ³μ λ°°ν¬**
|
666 |
-
- Canary λ°°ν¬:
|
667 |
-
* 1μΌμ°¨: 1% νΈλν½
|
668 |
-
* 3μΌμ°¨: 10% νΈλν½
|
669 |
-
* 7μΌμ°¨: 50% νΈλν½
|
670 |
-
* 14μΌμ°¨: 100% μ ν
|
671 |
-
- λͺ¨λν°λ§ μ€μ :
|
672 |
-
* Prometheus + Grafana λμ보λ
|
673 |
-
* μλ¦Ό μκ³κ°: μ§μ°μκ° >10ms, μ€λ₯μ¨ >0.1%
|
674 |
-
- λ‘€λ°± κ³ν:
|
675 |
-
* μλ λ‘€λ°± νΈλ¦¬κ±° μ€μ
|
676 |
-
* Blue-Green λ°°ν¬λ‘ μ¦μ μ ν κ°λ₯
|
677 |
-
|
678 |
-
### ROI λΆμ
|
679 |
-
- μ΄κΈ° ν¬μ: $50,000 (μΈκ±΄λΉ + μΈνλΌ)
|
680 |
-
- μκ° μ κ°μ‘: $14,000
|
681 |
-
- ν¬μ νμ κΈ°κ°: 3.6κ°μ
|
682 |
-
- 1λ
μμ΄μ΅: $118,000
|
683 |
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
- μ΄μ κ°μ΄λ λ¬Έμ
|
689 |
|
690 |
-
|
691 |
|
692 |
-
|
693 |
-
-
|
694 |
-
-
|
695 |
-
-
|
696 |
|
697 |
-
|
698 |
-
-
|
699 |
-
-
|
700 |
-
-
|
701 |
|
702 |
-
|
703 |
-
-
|
704 |
-
-
|
705 |
-
-
|
706 |
|
707 |
-
|
708 |
-
λ³Έ νλ‘μ νΈλ μ΅μ μ°κ΅¬ κ²°κ³Όμ μ
κ³ λ² μ€νΈ νλν°μ€λ₯Ό μ μ©νμ¬, 8μ£Ό λ§μ λͺ¨λΈ μ±λ₯μ νκΈ°μ μΌλ‘ κ°μ νκ³ μ΄μ λΉμ©μ 70% μ κ°νλ μ±κ³Όλ₯Ό λ¬μ±ν κ²μΌλ‘ μμλ©λλ€. 체κ³μ μΈ μ κ·Όκ³Ό 리μ€ν¬ κ΄λ¦¬, κ·Έλ¦¬κ³ μ§μμ μΈ κ°μ κ³νμ ν΅ν΄ μ₯κΈ°μ μΈ κ²½μλ ₯μ ν보ν μ μμ΅λλ€.
|
709 |
|
710 |
-
|
711 |
-
|
712 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
713 |
}
|
714 |
|
715 |
# ν둬ννΈ λ΄μ©μ λ°λΌ μ μ ν μλ΅ μ ν
|
@@ -723,6 +743,8 @@ class DataQualityPipeline:
|
|
723 |
response = test_responses["researcher"]
|
724 |
elif role == "executor" and "μ΅μ’
λ³΄κ³ μ" in messages[0]["content"]:
|
725 |
response = test_responses["executor_final"]
|
|
|
|
|
726 |
else:
|
727 |
response = test_responses["executor"]
|
728 |
|
@@ -734,7 +756,8 @@ class DataQualityPipeline:
|
|
734 |
system_prompts = {
|
735 |
"supervisor": "λΉμ μ κ±°μμ κ΄μ μμ λΆμνκ³ μ§λνλ κ°λ
μ AIμ
λλ€.",
|
736 |
"researcher": "λΉμ μ μ 보λ₯Ό μ‘°μ¬νκ³ μ²΄κ³μ μΌλ‘ μ 리νλ μ‘°μ¬μ AIμ
λλ€.",
|
737 |
-
"executor": "
|
|
|
738 |
}
|
739 |
|
740 |
full_messages = [
|
@@ -797,15 +820,18 @@ llm_system = LLMCollaborativeSystem()
|
|
797 |
# λ΄λΆ νμ€ν 리 κ΄λ¦¬ (UIμλ νμνμ§ μμ)
|
798 |
internal_history = []
|
799 |
|
800 |
-
def process_query_streaming(user_query: str):
|
801 |
"""μ€νΈλ¦¬λ°μ μ§μνλ 쿼리 μ²λ¦¬"""
|
802 |
global internal_history
|
803 |
|
804 |
if not user_query:
|
805 |
return "", "", "", "", "β μ§λ¬Έμ μ
λ ₯ν΄μ£ΌμΈμ."
|
806 |
|
|
|
|
|
|
|
807 |
conversation_log = []
|
808 |
-
all_responses = {"supervisor": [], "researcher": [], "executor": []}
|
809 |
|
810 |
try:
|
811 |
# 1λ¨κ³: κ°λ
μ AI μ΄κΈ° λΆμ λ° ν€μλ μΆμΆ
|
@@ -965,6 +991,26 @@ def process_query_streaming(user_query: str):
|
|
965 |
|
966 |
all_responses["executor"].append(final_executor_response)
|
967 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
968 |
# μ΅μ’
κ²°κ³Ό μμ± (μ΅μ’
λ³΄κ³ μλ₯Ό λ©μΈμΌλ‘)
|
969 |
final_summary = f"""## π― μ΅μ’
μ’
ν© λ³΄κ³ μ
|
970 |
|
@@ -976,6 +1022,11 @@ def process_query_streaming(user_query: str):
|
|
976 |
|
977 |
---
|
978 |
|
|
|
|
|
|
|
|
|
|
|
979 |
<details>
|
980 |
<summary>π μ 체 νλ ₯ κ³Όμ 보기</summary>
|
981 |
|
@@ -997,16 +1048,28 @@ def process_query_streaming(user_query: str):
|
|
997 |
</details>
|
998 |
|
999 |
---
|
1000 |
-
*μ΄ λ³΄κ³ μλ
|
1001 |
|
1002 |
# λ΄λΆ νμ€ν 리 μ
λ°μ΄νΈ (UIμλ νμνμ§ μμ)
|
1003 |
internal_history.append((user_query, final_summary))
|
1004 |
|
1005 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1006 |
|
1007 |
except Exception as e:
|
1008 |
error_msg = f"β μ²λ¦¬ μ€ μ€λ₯: {str(e)}"
|
1009 |
-
yield "", "", "",
|
1010 |
|
1011 |
def clear_all():
|
1012 |
"""λͺ¨λ λ΄μ© μ΄κΈ°ν"""
|
@@ -1022,21 +1085,29 @@ css = """
|
|
1022 |
.supervisor-box textarea {
|
1023 |
border-left: 4px solid #667eea !important;
|
1024 |
padding-left: 10px !important;
|
|
|
1025 |
}
|
1026 |
.researcher-box textarea {
|
1027 |
border-left: 4px solid #10b981 !important;
|
1028 |
padding-left: 10px !important;
|
|
|
1029 |
}
|
1030 |
.executor-box textarea {
|
1031 |
border-left: 4px solid #764ba2 !important;
|
1032 |
padding-left: 10px !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
1033 |
}
|
1034 |
"""
|
1035 |
|
1036 |
with gr.Blocks(title="νλ ₯μ LLM μμ€ν
", theme=gr.themes.Soft(), css=css) as app:
|
1037 |
gr.Markdown(
|
1038 |
f"""
|
1039 |
-
# π€ νλ ₯μ LLM μμ€ν
(
|
1040 |
"""
|
1041 |
)
|
1042 |
|
@@ -1044,14 +1115,27 @@ with gr.Blocks(title="νλ ₯μ LLM μμ€ν
", theme=gr.themes.Soft(), css=css)
|
|
1044 |
with gr.Row():
|
1045 |
with gr.Column():
|
1046 |
gr.Markdown("""
|
1047 |
-
## π
|
1048 |
-
-
|
1049 |
-
-
|
1050 |
-
-
|
1051 |
-
-
|
1052 |
-
|
|
|
|
|
|
|
|
|
|
|
1053 |
""")
|
1054 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1055 |
user_input = gr.Textbox(
|
1056 |
label="μ§λ¬Έ μ
λ ₯",
|
1057 |
placeholder="μ: κΈ°κ³νμ΅ λͺ¨λΈμ μ±λ₯μ ν₯μμν€λ λ°©λ²μ?",
|
@@ -1069,48 +1153,50 @@ with gr.Blocks(title="νλ ₯μ LLM μμ€ν
", theme=gr.themes.Soft(), css=css)
|
|
1069 |
max_lines=1
|
1070 |
)
|
1071 |
|
1072 |
-
#
|
1073 |
with gr.Row():
|
1074 |
-
|
1075 |
-
with gr.Accordion("π μ΅μ’
μ’
ν© κ²°κ³Ό", open=True):
|
1076 |
-
final_output = gr.Markdown(
|
1077 |
-
value="*μ§λ¬Έμ μ
λ ₯νλ©΄ κ²°κ³Όκ° μ¬κΈ°μ νμλ©λλ€.*"
|
1078 |
-
)
|
1079 |
-
|
1080 |
-
# AI μΆλ ₯λ€ - ν μ€μ λλν λ°°μΉ
|
1081 |
-
with gr.Row():
|
1082 |
-
# κ°λ
μ AI μΆλ ₯
|
1083 |
with gr.Column():
|
1084 |
gr.Markdown("### π§ κ°λ
μ AI (κ±°μμ λΆμ)")
|
1085 |
supervisor_output = gr.Textbox(
|
1086 |
label="",
|
1087 |
-
lines=
|
1088 |
-
max_lines=
|
1089 |
interactive=False,
|
1090 |
elem_classes=["supervisor-box"]
|
1091 |
)
|
1092 |
|
1093 |
-
# μ‘°μ¬μ AI μΆλ ₯
|
1094 |
with gr.Column():
|
1095 |
gr.Markdown("### π μ‘°μ¬μ AI (μΉ κ²μ & μ 리)")
|
1096 |
researcher_output = gr.Textbox(
|
1097 |
label="",
|
1098 |
-
lines=
|
1099 |
-
max_lines=
|
1100 |
interactive=False,
|
1101 |
elem_classes=["researcher-box"]
|
1102 |
)
|
1103 |
-
|
1104 |
-
|
|
|
1105 |
with gr.Column():
|
1106 |
gr.Markdown("### ποΈ μ€νμ AI (λ―Έμμ ꡬν)")
|
1107 |
executor_output = gr.Textbox(
|
1108 |
label="",
|
1109 |
-
lines=
|
1110 |
-
max_lines=
|
1111 |
interactive=False,
|
1112 |
elem_classes=["executor-box"]
|
1113 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1114 |
|
1115 |
# μμ
|
1116 |
gr.Examples(
|
@@ -1128,8 +1214,8 @@ with gr.Blocks(title="νλ ₯μ LLM μμ€ν
", theme=gr.themes.Soft(), css=css)
|
|
1128 |
# μ΄λ²€νΈ νΈλ€λ¬
|
1129 |
submit_btn.click(
|
1130 |
fn=process_query_streaming,
|
1131 |
-
inputs=[user_input],
|
1132 |
-
outputs=[supervisor_output, researcher_output, executor_output,
|
1133 |
).then(
|
1134 |
fn=lambda: "",
|
1135 |
outputs=[user_input]
|
@@ -1137,8 +1223,8 @@ with gr.Blocks(title="νλ ₯μ LLM μμ€ν
", theme=gr.themes.Soft(), css=css)
|
|
1137 |
|
1138 |
user_input.submit(
|
1139 |
fn=process_query_streaming,
|
1140 |
-
inputs=[user_input],
|
1141 |
-
outputs=[supervisor_output, researcher_output, executor_output,
|
1142 |
).then(
|
1143 |
fn=lambda: "",
|
1144 |
outputs=[user_input]
|
@@ -1146,7 +1232,7 @@ with gr.Blocks(title="νλ ₯μ LLM μμ€ν
", theme=gr.themes.Soft(), css=css)
|
|
1146 |
|
1147 |
clear_btn.click(
|
1148 |
fn=clear_all,
|
1149 |
-
outputs=[supervisor_output, researcher_output, executor_output,
|
1150 |
)
|
1151 |
|
1152 |
|
|
|
17 |
from urllib.parse import urlparse
|
18 |
import urllib.request
|
19 |
|
20 |
+
# Gemini API μν¬νΈ
|
21 |
+
try:
|
22 |
+
from google import genai
|
23 |
+
from google.genai import types
|
24 |
+
GEMINI_AVAILABLE = True
|
25 |
+
except ImportError:
|
26 |
+
GEMINI_AVAILABLE = False
|
27 |
+
logger.warning("Google Gemini APIκ° μ€μΉλμ§ μμμ΅λλ€. pip install google-genaiλ‘ μ€μΉνμΈμ.")
|
28 |
+
|
29 |
# νκ²½ λ³μμμ ν ν° κ°μ Έμ€κΈ°
|
30 |
FRIENDLI_TOKEN = os.getenv("FRIENDLI_TOKEN", "YOUR_FRIENDLI_TOKEN")
|
31 |
BAPI_TOKEN = os.getenv("BAPI_TOKEN", "YOUR_BRAVE_API_TOKEN")
|
32 |
+
GAPI_TOKEN = os.getenv("GAPI_TOKEN", "YOUR_GEMINI_API_TOKEN")
|
33 |
API_URL = "https://api.friendli.ai/dedicated/v1/chat/completions"
|
34 |
BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
|
35 |
MODEL_ID = "dep89a2fld32mcm"
|
|
|
42 |
def __init__(self):
|
43 |
self.token = FRIENDLI_TOKEN
|
44 |
self.bapi_token = BAPI_TOKEN
|
45 |
+
self.gapi_token = GAPI_TOKEN
|
46 |
self.api_url = API_URL
|
47 |
self.brave_url = BRAVE_SEARCH_URL
|
48 |
self.model_id = MODEL_ID
|
49 |
self.test_mode = TEST_MODE or (self.token == "YOUR_FRIENDLI_TOKEN")
|
50 |
+
self.use_gemini = False
|
51 |
+
self.gemini_client = None
|
52 |
|
53 |
if self.test_mode:
|
54 |
logger.warning("ν
μ€νΈ λͺ¨λλ‘ μ€νλ©λλ€.")
|
55 |
if self.bapi_token == "YOUR_BRAVE_API_TOKEN":
|
56 |
logger.warning("Brave API ν ν°μ΄ μ€μ λμ§ μμμ΅λλ€.")
|
57 |
+
if self.gapi_token == "YOUR_GEMINI_API_TOKEN":
|
58 |
+
logger.warning("Gemini API ν ν°μ΄ μ€μ λμ§ μμμ΅λλ€.")
|
59 |
+
|
60 |
+
def set_llm_mode(self, mode: str):
|
61 |
+
"""LLM λͺ¨λ μ€μ (default λλ commercial)"""
|
62 |
+
if mode == "commercial" and GEMINI_AVAILABLE and self.gapi_token != "YOUR_GEMINI_API_TOKEN":
|
63 |
+
self.use_gemini = True
|
64 |
+
if not self.gemini_client:
|
65 |
+
self.gemini_client = genai.Client(api_key=self.gapi_token)
|
66 |
+
logger.info("Gemini 2.5 Pro λͺ¨λλ‘ μ νλμμ΅λλ€.")
|
67 |
+
else:
|
68 |
+
self.use_gemini = False
|
69 |
+
logger.info("κΈ°λ³Έ LLM λͺ¨λλ‘ μ νλμμ΅λλ€.")
|
70 |
|
71 |
def create_headers(self):
|
72 |
"""API ν€λ μμ±"""
|
|
|
195 |
4. λͺ
νν κ²°λ‘ κ³Ό λ€μ λ¨κ³λ₯Ό μ μνμΈμ
|
196 |
5. μ λ¬Έμ μ΄κ³ μμ±λ λμ μ΅μ’
λ³΄κ³ μ νμμΌλ‘ μμ±νμΈμ"""
|
197 |
|
198 |
+
def create_evaluator_prompt(self, user_query: str, supervisor_responses: List[str], researcher_response: str, executor_responses: List[str]) -> str:
|
199 |
+
"""νκ°μ AI ν둬ννΈ μμ±"""
|
200 |
+
return f"""λΉμ μ μ 체 νλ ₯ κ³Όμ κ³Ό κ²°κ³Όλ₯Ό νκ°νλ νκ°μ AIμ
λλ€.
|
201 |
+
|
202 |
+
μ¬μ©μ μ§λ¬Έ: {user_query}
|
203 |
+
|
204 |
+
κ°λ
μ AIμ λΆμ λ° μ§μ:
|
205 |
+
- μ΄κΈ° λΆμ: {supervisor_responses[0]}
|
206 |
+
- μ€ν μ§μ: {supervisor_responses[1]}
|
207 |
+
- κ²ν νΌλλ°±: {supervisor_responses[2]}
|
208 |
+
|
209 |
+
μ‘°μ¬μ AIμ μ‘°μ¬ κ²°κ³Ό:
|
210 |
+
{researcher_response}
|
211 |
+
|
212 |
+
μ€νμ AIμ ꡬν:
|
213 |
+
- μ΄κΈ° ꡬν: {executor_responses[0]}
|
214 |
+
- μ΅μ’
λ³΄κ³ μ: {executor_responses[1]}
|
215 |
+
|
216 |
+
μ μ 체 κ³Όμ μ νκ°νμ¬:
|
217 |
+
1. **νμ§ νκ°**: κ° AIμ λ΅λ³ νμ§κ³Ό μν μνλλ₯Ό νκ°νμΈμ (10μ λ§μ )
|
218 |
+
2. **νλ ₯ ν¨κ³Όμ±**: AI κ° νλ ₯μ΄ μΌλ§λ ν¨κ³Όμ μ΄μλμ§ νκ°νμΈμ
|
219 |
+
3. **μ 보 νμ©λ**: μΉ κ²μ μ λ³΄κ° μΌλ§λ μ νμ©λμλμ§ νκ°νμΈμ
|
220 |
+
4. **κ°μ μ **: ν₯ν κ°μ μ΄ νμν λΆλΆμ ꡬ체μ μΌλ‘ μ μνμΈμ
|
221 |
+
5. **μ΅μ’
νμ **: μ 체 νλ‘μΈμ€μ λν μ’
ν© νκ°λ₯Ό μ μνμΈμ
|
222 |
+
|
223 |
+
νκ°λ ꡬ체μ μ΄κ³ 건μ€μ μΌλ‘ μμ±νμΈμ."""
|
224 |
+
|
225 |
def extract_keywords(self, supervisor_response: str) -> List[str]:
|
226 |
"""κ°λ
μ μλ΅μμ ν€μλ μΆμΆ"""
|
227 |
keywords = []
|
|
|
445 |
yield chunk + " "
|
446 |
time.sleep(0.05)
|
447 |
|
448 |
+
def call_gemini_streaming(self, messages: List[Dict[str, str]], role: str) -> Generator[str, None, None]:
|
449 |
+
"""Gemini API μ€νΈλ¦¬λ° νΈμΆ"""
|
450 |
+
if not self.gemini_client:
|
451 |
+
yield "β Gemini API ν΄λΌμ΄μΈνΈκ° μ΄κΈ°νλμ§ μμμ΅λλ€."
|
452 |
+
return
|
453 |
+
|
454 |
+
try:
|
455 |
+
# μμ€ν
ν둬ννΈ μ€μ
|
456 |
+
system_prompts = {
|
457 |
+
"supervisor": "λΉμ μ κ±°μμ κ΄μ μμ λΆμνκ³ μ§λνλ κ°λ
μ AIμ
λλ€.",
|
458 |
+
"researcher": "λΉμ μ μ 보λ₯Ό μ‘°μ¬νκ³ μ²΄κ³μ μΌλ‘ μ 리νλ μ‘°μ¬μ AIμ
λλ€.",
|
459 |
+
"executor": "λΉμ μ μΈλΆμ μΈ λ΄μ©μ ꡬννλ μ€νμ AIμ
λλ€.",
|
460 |
+
"evaluator": "λΉμ μ μ 체 νλ ₯ κ³Όμ κ³Ό κ²°κ³Όλ₯Ό νκ°νλ νκ°μ AIμ
λλ€."
|
461 |
+
}
|
462 |
+
|
463 |
+
# Gemini λ©μμ§ ν¬λ§·μΌλ‘ λ³ν
|
464 |
+
contents = []
|
465 |
+
|
466 |
+
# μμ€ν
λ©μμ§ μΆκ°
|
467 |
+
contents.append(types.Content(
|
468 |
+
role="user",
|
469 |
+
parts=[types.Part.from_text(text=system_prompts.get(role, ""))]
|
470 |
+
))
|
471 |
+
contents.append(types.Content(
|
472 |
+
role="model",
|
473 |
+
parts=[types.Part.from_text(text="λ€, μ΄ν΄νμ΅λλ€. μ μν μ μννκ² μ΅λλ€.")]
|
474 |
+
))
|
475 |
+
|
476 |
+
# μ¬μ©μ λ©μμ§ μΆκ°
|
477 |
+
for msg in messages:
|
478 |
+
if msg["role"] == "user":
|
479 |
+
contents.append(types.Content(
|
480 |
+
role="user",
|
481 |
+
parts=[types.Part.from_text(text=msg["content"])]
|
482 |
+
))
|
483 |
+
|
484 |
+
# Gemini μ€μ
|
485 |
+
generate_content_config = types.GenerateContentConfig(
|
486 |
+
response_mime_type="text/plain",
|
487 |
+
temperature=0.7,
|
488 |
+
top_p=0.8,
|
489 |
+
max_output_tokens=2048
|
490 |
+
)
|
491 |
+
|
492 |
+
# μ€νΈλ¦¬λ° μμ±
|
493 |
+
for chunk in self.gemini_client.models.generate_content_stream(
|
494 |
+
model="gemini-2.5-pro",
|
495 |
+
contents=contents,
|
496 |
+
config=generate_content_config,
|
497 |
+
):
|
498 |
+
if chunk.text:
|
499 |
+
yield chunk.text
|
500 |
+
|
501 |
+
except Exception as e:
|
502 |
+
logger.error(f"Gemini API μ€λ₯: {str(e)}")
|
503 |
+
yield f"β Gemini API μ€λ₯: {str(e)}"
|
504 |
+
|
505 |
def call_llm_streaming(self, messages: List[Dict[str, str]], role: str) -> Generator[str, None, None]:
|
506 |
"""μ€νΈλ¦¬λ° LLM API νΈμΆ"""
|
507 |
|
508 |
+
# Gemini λͺ¨λμΈ κ²½μ°
|
509 |
+
if self.use_gemini:
|
510 |
+
yield from self.call_gemini_streaming(messages, role)
|
511 |
+
return
|
512 |
+
|
513 |
# ν
μ€νΈ λͺ¨λ
|
514 |
if self.test_mode:
|
515 |
logger.info(f"ν
μ€νΈ λͺ¨λ μ€νΈλ¦¬λ° - Role: {role}")
|
|
|
539 |
- μ΅μ μ°κ΅¬μ λ°λ₯΄λ©΄ λͺ¨λΈ μ΅μ νμ ν΅μ¬μ μν€ν
μ² μ€κ³μ νλ ¨ μ λ΅μ κ· νμ
λλ€ (μ λ’°λ: 0.85)
|
540 |
- AutoML λꡬλ€μ΄ νμ΄νΌνλΌλ―Έν° νλμ μλννμ¬ ν¨μ¨μ±μ ν¬κ² ν₯μμν΅λλ€ (μ λ’°λ: 0.82)
|
541 |
- μΆμ²: ML Conference 2024 (https://mlconf2024.org), Google Research (https://research.google)
|
|
|
542 |
|
543 |
**2. Performance Improvement Strategies (μ λ’°λ λμ)**
|
544 |
- λ°μ΄ν° νμ§ κ°μ μ΄ λͺ¨λΈ μ±λ₯ ν₯μμ 80%λ₯Ό μ°¨μ§νλ€λ μ°κ΅¬ κ²°κ³Ό (μ λ’°λ: 0.90)
|
545 |
- μμλΈ κΈ°λ²κ³Ό μ μ΄νμ΅μ΄ μ£Όμ μ±λ₯ κ°μ λ°©λ²μΌλ‘ μ
μ¦λ¨ (μ λ’°λ: 0.78)
|
546 |
- μΆμ²: Stanford AI Lab (https://ai.stanford.edu), MIT CSAIL (https://csail.mit.edu)
|
|
|
547 |
|
548 |
**3. Model Efficiency Techniques (μ λ’°λ μ€κ°)**
|
549 |
- λͺ¨λΈ κ²½λν(Pruning, Quantization)λ‘ μΆλ‘ μλ 10λ°° ν₯μ κ°λ₯ (μ λ’°λ: 0.75)
|
550 |
- Knowledge DistillationμΌλ‘ λͺ¨λΈ ν¬κΈ° 90% κ°μ, μ±λ₯ μ μ§ (μ λ’°λ: 0.72)
|
551 |
- μΆμ²: ArXiv λ
Όλ¬Έ (https://arxiv.org/abs/2023.xxxxx)
|
|
|
552 |
|
553 |
**4. μ€μ μ μ© μ¬λ‘ (μ λ’°λ λμ)**
|
554 |
- Netflix: μΆμ² μμ€ν
κ°μ μΌλ‘ μ¬μ©μ λ§μ‘±λ 35% ν₯μ (μ λ’°λ: 0.88)
|
555 |
- Tesla: μ€μκ° κ°μ²΄ μΈμ μλ 50% κ°μ (μ λ’°λ: 0.80)
|
556 |
- OpenAI: GPT λͺ¨λΈ ν¨μ¨μ± κ°μ μΌλ‘ λΉμ© 70% μ κ° (μ λ’°λ: 0.85)
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
|
558 |
**ν΅μ¬ μΈμ¬μ΄νΈ:**
|
559 |
- μ΅μ νΈλ λλ ν¨μ¨μ±κ³Ό μ±λ₯μ κ· νμ μ΄μ
|
|
|
603 |
- Knowledge Distillation ꡬν
|
604 |
* Teacher λͺ¨λΈ: νμ¬ λκ·λͺ¨ λͺ¨λΈ
|
605 |
* Student λͺ¨λΈ: 90% μμ ν¬κΈ° λͺ©ν (μ‘°μ¬ κ²°κ³Ό κΈ°λ°)
|
606 |
+
* ꡬν νλ μμν¬: PyTorch/TensorFlow""",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
|
608 |
"supervisor_review": """μ€νμ AIμ κ³νμ κ²ν ν κ²°κ³Ό, μ‘°μ¬ λ΄μ©μ΄ μ λ°μλμμ΅λλ€. λ€μκ³Ό κ°μ κ°μ μ¬νμ μ μν©λλ€.
|
609 |
|
|
|
620 |
2. **λΉμ© λΆμ ꡬ체ν**
|
621 |
- OpenAI μ¬λ‘μ 70% μ κ°μ μν ꡬ체μ μΈ λΉμ© κ³μ°
|
622 |
- ROI λΆμ λ° ν¬μ λλΉ ν¨κ³Ό μΈ‘μ λ°©λ²
|
|
|
|
|
|
|
|
|
623 |
|
624 |
**μΆκ° κΆμ₯μ¬ν**
|
625 |
- μ΅μ μ°κ΅¬ λν₯ λͺ¨λν°λ§ μ²΄κ³ κ΅¬μΆ
|
626 |
+
- κ²½μμ¬ λ²€μΉλ§νΉμ μν μ κΈ°μ μΈ μ‘°μ¬ νλ‘μΈμ€""",
|
|
|
|
|
627 |
|
628 |
"executor_final": """κ°λ
μ AIμ νΌλλ°±μ μμ ν λ°μνμ¬ μ΅μ’
μ€ν λ³΄κ³ μλ₯Ό μμ±ν©λλ€.
|
629 |
|
|
|
638 |
**μ-νμμΌ: μ±λ₯ λ©νΈλ¦ μμ§**
|
639 |
- MLflowλ₯Ό ν΅ν νμ¬ λͺ¨λΈ μ 체 λΆμ
|
640 |
- Netflix μ¬λ‘ κΈ°λ° ν΅μ¬ μ§ν: μ νλ(92%), μ§μ°μκ°(45ms), μ²λ¦¬λ(1,000 req/s)
|
|
|
641 |
|
642 |
**μ-λͺ©μμΌ: AutoML μ΄κΈ° νμ**
|
643 |
- Optunaλ‘ νμ΄νΌνλΌλ―Έν° μ΅μ ν (200ν μλ)
|
644 |
- Ray TuneμΌλ‘ λΆμ° νμ΅ νκ²½ ꡬμΆ
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
|
646 |
### μμ μ°μΆλ¬Ό
|
647 |
- μμΈ μ±λ₯ λ² μ΄μ€λΌμΈ λ¬Έμ
|
648 |
- κ°μ κΈ°ν μ°μ μμ λ§€νΈλ¦μ€
|
|
|
649 |
|
650 |
## π 2λ¨κ³: λ°μ΄ν° νμ§ κ°μ (2-3μ£Όμ°¨)
|
651 |
|
652 |
### μ€ν κ³ν
|
653 |
+
- λ°μ΄ν° μ μ νμ΄νλΌμΈ ꡬμΆ
|
654 |
+
- κ³ κΈ λ°μ΄ν° μ¦κ° κΈ°λ² μ μ©
|
655 |
+
- A/B ν
μ€νΈλ‘ ν¨κ³Ό κ²μ¦
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
656 |
|
657 |
## π 3λ¨κ³: λͺ¨λΈ μ΅μ ν ꡬν (4-6μ£Όμ°¨)
|
658 |
|
659 |
### μ€ν κ³ν
|
660 |
+
- Knowledge DistillationμΌλ‘ λͺ¨λΈ κ²½λν
|
661 |
+
- Pruning & Quantization μ μ©
|
662 |
+
- TensorRT μ΅μ ν (Tesla μ¬λ‘ μ μ©)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
663 |
|
664 |
## π 4λ¨κ³: μ±κ³Ό κ²μ¦ λ° νλ‘λμ
λ°°ν¬ (7-8μ£Όμ°¨)
|
665 |
|
666 |
### μ€ν κ³ν
|
667 |
+
- μ’
ν© μ±λ₯ κ²μ¦ λ° μ§ν λ¬μ±λ νμΈ
|
668 |
+
- Canary λ°°ν¬ μ λ΅ μ€ν
|
669 |
+
- μ€μκ° λͺ¨λν°λ§ μ²΄κ³ κ΅¬μΆ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
670 |
|
671 |
+
## π κ²°λ‘
|
672 |
+
λ³Έ νλ‘μ νΈλ μ΅μ μ°κ΅¬ κ²°κ³Όμ μ
κ³ λ² μ€νΈ νλν°μ€λ₯Ό μ μ©νμ¬, 8μ£Ό λ§μ λͺ¨λΈ μ±λ₯μ νκΈ°μ μΌλ‘ κ°μ νκ³ μ΄μ λΉμ©μ 70% μ κ°νλ μ±κ³Όλ₯Ό λ¬μ±ν κ²μΌλ‘ μμλ©λλ€.""",
|
673 |
+
|
674 |
+
"evaluator": """## π μ 체 νλ ₯ κ³Όμ νκ° λ³΄κ³ μ
|
|
|
675 |
|
676 |
+
### 1οΈβ£ νμ§ νκ° (10μ λ§μ )
|
677 |
|
678 |
+
**κ°λ
μ AI: 9.5/10**
|
679 |
+
- κ±°μμ κ΄μ μμ 체κ³μ μΈ λΆμκ³Ό λ°©ν₯ μ μ
|
680 |
+
- λ¨κ³λ³ ꡬ체μ μΈ μ§μμ¬ν μ 곡
|
681 |
+
- νΌλλ°±μ΄ κ±΄μ€μ μ΄κ³ μ€ν κ°λ₯ν¨
|
682 |
|
683 |
+
**μ‘°μ¬μ AI: 9.0/10**
|
684 |
+
- μΉ κ²μμ ν΅ν μ΅μ μ 보 μμ§ μ°μ
|
685 |
+
- μ λ’°λ νκ°μ λͺ¨μ κ°μ§ κΈ°λ₯ ν¨κ³Όμ
|
686 |
+
- μΆμ² νκΈ°μ μ 보 μ λ¦¬κ° μ²΄κ³μ
|
687 |
|
688 |
+
**μ€νμ AI: 8.5/10**
|
689 |
+
- μ‘°μ¬ λ΄μ©μ μ νμ©ν ꡬ체μ κ³ν μ립
|
690 |
+
- μ€ν κ°λ₯ν λ¨κ³λ³ μ κ·Όλ² μ μ
|
691 |
+
- μΌλΆ μΈλΆμ¬νμμ λ ꡬ체ν νμ
|
692 |
|
693 |
+
### 2οΈβ£ νλ ₯ ν¨κ³Όμ± νκ°
|
|
|
694 |
|
695 |
+
**κ°μ :**
|
696 |
+
- AI κ° μν λΆλ΄μ΄ λͺ
ννκ³ μνΈλ³΄μμ
|
697 |
+
- μ 보 νλ¦μ΄ 체κ³μ μ΄κ³ μΌκ΄μ± μμ
|
698 |
+
- νΌλλ°± λ°μμ΄ ν¨κ³Όμ μΌλ‘ μ΄λ£¨μ΄μ§
|
699 |
+
|
700 |
+
**κ°μ μ :**
|
701 |
+
- μ€μκ° μνΈμμ© λ©μ»€λμ¦ μΆκ° κ³ λ €
|
702 |
+
- μ€κ° μ κ² λ¨κ³ λμ
νμ
|
703 |
+
|
704 |
+
### 3οΈβ£ μ 보 νμ©λ νκ°
|
705 |
+
|
706 |
+
**μ°μν μ :**
|
707 |
+
- 20κ° μ΄μμ μΉ μμ€μμ μ 보 μμ§
|
708 |
+
- μ λ’°λ κΈ°λ° μ 보 μ λ³ ν¨κ³Όμ
|
709 |
+
- μ€μ κΈ°μ
μ¬λ‘ μ μ ν νμ©
|
710 |
+
|
711 |
+
**보μ νμ:**
|
712 |
+
- νμ λ
Όλ¬Έ λ± λ κΉμ΄ μλ μλ£ νμ©
|
713 |
+
- μ§μλ³/μ°μ
λ³ νΉμ± κ³ λ € νμ
|
714 |
+
|
715 |
+
### 4οΈβ£ ν₯ν κ°μ λ°©ν₯
|
716 |
+
|
717 |
+
1. **μ€μκ° νμ
κ°ν**
|
718 |
+
- AI κ° μ€κ° 체ν¬ν¬μΈνΈ μΆκ°
|
719 |
+
- λμ μν μ‘°μ λ©μ»€λμ¦ λμ
|
720 |
+
|
721 |
+
2. **μ 보 κ²μ¦ κ°ν**
|
722 |
+
- κ΅μ°¨ κ²μ¦ νλ‘μΈμ€ μΆκ°
|
723 |
+
- μ λ¬Έκ° κ²ν λ¨κ³ κ³ λ €
|
724 |
+
|
725 |
+
3. **λ§μΆ€ν κ°ν**
|
726 |
+
- μ¬μ©μ 컨ν
μ€νΈ λ κΉμ΄ λ°μ
|
727 |
+
- μ°μ
λ³/κ·λͺ¨λ³ λ§μΆ€ μ λ΅ μ 곡
|
728 |
+
|
729 |
+
### 5οΈβ£ μ΅μ’
νμ : βββββ 9.0/10
|
730 |
+
|
731 |
+
**μ’
ν© νκ°:**
|
732 |
+
λ³Έ νλ ₯ μμ€ν
μ κ° AIμ μ λ¬Έμ±μ ν¨κ³Όμ μΌλ‘ νμ©νμ¬ μ¬μ©μ μ§λ¬Έμ λν μ’
ν©μ μ΄κ³ μ€ν κ°λ₯ν λ΅λ³μ μ 곡νμ΅λλ€. νΉν μΉ κ²μμ ν΅ν μ΅μ μ 보 νμ©κ³Ό λ¨κ³μ νΌλλ°± λ°μμ΄ μ°μνμ΅λλ€. ν₯ν μ€μκ° νμ
κ³Ό λ§μΆ€νλ₯Ό λμ± κ°ννλ€λ©΄ λμ± λ°μ΄λ μ±κ³Όλ₯Ό λ¬μ±ν μ μμ κ²μ
λλ€."""
|
733 |
}
|
734 |
|
735 |
# ν둬ννΈ λ΄μ©μ λ°λΌ μ μ ν μλ΅ μ ν
|
|
|
743 |
response = test_responses["researcher"]
|
744 |
elif role == "executor" and "μ΅μ’
λ³΄κ³ μ" in messages[0]["content"]:
|
745 |
response = test_responses["executor_final"]
|
746 |
+
elif role == "evaluator":
|
747 |
+
response = test_responses["evaluator"]
|
748 |
else:
|
749 |
response = test_responses["executor"]
|
750 |
|
|
|
756 |
system_prompts = {
|
757 |
"supervisor": "λΉμ μ κ±°μμ κ΄μ μμ λΆμνκ³ μ§λνλ κ°λ
μ AIμ
λλ€.",
|
758 |
"researcher": "λΉμ μ μ 보λ₯Ό μ‘°μ¬νκ³ μ²΄κ³μ μΌλ‘ μ 리νλ μ‘°μ¬μ AIμ
λλ€.",
|
759 |
+
"executor": "οΏ½οΏ½οΏ½μ μ μΈλΆμ μΈ λ΄μ©μ ꡬννλ μ€νμ AIμ
λλ€.",
|
760 |
+
"evaluator": "λΉμ μ μ 체 νλ ₯ κ³Όμ κ³Ό κ²°κ³Όλ₯Ό νκ°νλ νκ°μ AIμ
λλ€."
|
761 |
}
|
762 |
|
763 |
full_messages = [
|
|
|
820 |
# λ΄λΆ νμ€ν 리 κ΄λ¦¬ (UIμλ νμνμ§ μμ)
|
821 |
internal_history = []
|
822 |
|
823 |
+
def process_query_streaming(user_query: str, llm_mode: str):
|
824 |
"""μ€νΈλ¦¬λ°μ μ§μνλ 쿼리 μ²λ¦¬"""
|
825 |
global internal_history
|
826 |
|
827 |
if not user_query:
|
828 |
return "", "", "", "", "β μ§λ¬Έμ μ
λ ₯ν΄μ£ΌμΈμ."
|
829 |
|
830 |
+
# LLM λͺ¨λ μ€μ
|
831 |
+
llm_system.set_llm_mode(llm_mode)
|
832 |
+
|
833 |
conversation_log = []
|
834 |
+
all_responses = {"supervisor": [], "researcher": [], "executor": [], "evaluator": []}
|
835 |
|
836 |
try:
|
837 |
# 1λ¨κ³: κ°λ
μ AI μ΄κΈ° λΆμ λ° ν€μλ μΆμΆ
|
|
|
991 |
|
992 |
all_responses["executor"].append(final_executor_response)
|
993 |
|
994 |
+
# 8λ¨κ³: νκ°μ AIκ° μ 체 κ³Όμ νκ°
|
995 |
+
evaluator_prompt = llm_system.create_evaluator_prompt(
|
996 |
+
user_query,
|
997 |
+
all_responses["supervisor"],
|
998 |
+
all_responses["researcher"][0],
|
999 |
+
all_responses["executor"]
|
1000 |
+
)
|
1001 |
+
evaluator_response = ""
|
1002 |
+
|
1003 |
+
evaluator_text = "[μ 체 νκ°] π νκ° μ€...\n"
|
1004 |
+
for chunk in llm_system.call_llm_streaming(
|
1005 |
+
[{"role": "user", "content": evaluator_prompt}],
|
1006 |
+
"evaluator"
|
1007 |
+
):
|
1008 |
+
evaluator_response += chunk
|
1009 |
+
evaluator_text = f"[μ 체 νκ°] - {datetime.now().strftime('%H:%M:%S')}\n{evaluator_response}"
|
1010 |
+
yield supervisor_text, researcher_text, executor_text, evaluator_text, "π νκ°μ AIκ° νκ° μ€..."
|
1011 |
+
|
1012 |
+
all_responses["evaluator"].append(evaluator_response)
|
1013 |
+
|
1014 |
# μ΅μ’
κ²°κ³Ό μμ± (μ΅μ’
λ³΄κ³ μλ₯Ό λ©μΈμΌλ‘)
|
1015 |
final_summary = f"""## π― μ΅μ’
μ’
ν© λ³΄κ³ μ
|
1016 |
|
|
|
1022 |
|
1023 |
---
|
1024 |
|
1025 |
+
### π μ 체 νλ‘μΈμ€ νκ° (νκ°μ AI)
|
1026 |
+
{evaluator_response}
|
1027 |
+
|
1028 |
+
---
|
1029 |
+
|
1030 |
<details>
|
1031 |
<summary>π μ 체 νλ ₯ κ³Όμ 보기</summary>
|
1032 |
|
|
|
1048 |
</details>
|
1049 |
|
1050 |
---
|
1051 |
+
*μ΄ λ³΄κ³ μλ {'Gemini 2.5 Pro' if llm_system.use_gemini else 'κΈ°λ³Έ LLM'}λ₯Ό μ¬μ©νμ¬ μΉ κ²μκ³Ό AI νλ ₯μ ν΅ν΄ μμ±λμμ΅λλ€.*"""
|
1052 |
|
1053 |
# λ΄λΆ νμ€ν 리 μ
λ°μ΄νΈ (UIμλ νμνμ§ μμ)
|
1054 |
internal_history.append((user_query, final_summary))
|
1055 |
|
1056 |
+
# μ΅μ’
μμ½λ§ νμ
|
1057 |
+
display_summary = f"""## π― μ΅μ’
κ²°κ³Ό
|
1058 |
+
|
1059 |
+
### π μ€ν λ³΄κ³ μ
|
1060 |
+
{final_executor_response}
|
1061 |
+
|
1062 |
+
### π νκ° μμ½
|
1063 |
+
{evaluator_response.split('### 5οΈβ£')[1] if '### 5οΈβ£' in evaluator_response else evaluator_response[-500:]}
|
1064 |
+
|
1065 |
+
---
|
1066 |
+
*{'Gemini 2.5 Pro' if llm_system.use_gemini else 'κΈ°λ³Έ LLM'} μ¬μ© | 4κ° AI νλ ₯ μλ£*"""
|
1067 |
+
|
1068 |
+
yield supervisor_text, researcher_text, executor_text, evaluator_text, "β
μ΅μ’
λ³΄κ³ μ μμ±!"
|
1069 |
|
1070 |
except Exception as e:
|
1071 |
error_msg = f"β μ²λ¦¬ μ€ μ€λ₯: {str(e)}"
|
1072 |
+
yield "", "", "", "", error_msg
|
1073 |
|
1074 |
def clear_all():
|
1075 |
"""λͺ¨λ λ΄μ© μ΄κΈ°ν"""
|
|
|
1085 |
.supervisor-box textarea {
|
1086 |
border-left: 4px solid #667eea !important;
|
1087 |
padding-left: 10px !important;
|
1088 |
+
background-color: #f8f9ff !important;
|
1089 |
}
|
1090 |
.researcher-box textarea {
|
1091 |
border-left: 4px solid #10b981 !important;
|
1092 |
padding-left: 10px !important;
|
1093 |
+
background-color: #f0fdf4 !important;
|
1094 |
}
|
1095 |
.executor-box textarea {
|
1096 |
border-left: 4px solid #764ba2 !important;
|
1097 |
padding-left: 10px !important;
|
1098 |
+
background-color: #faf5ff !important;
|
1099 |
+
}
|
1100 |
+
.evaluator-box textarea {
|
1101 |
+
border-left: 4px solid #f59e0b !important;
|
1102 |
+
padding-left: 10px !important;
|
1103 |
+
background-color: #fffbeb !important;
|
1104 |
}
|
1105 |
"""
|
1106 |
|
1107 |
with gr.Blocks(title="νλ ₯μ LLM μμ€ν
", theme=gr.themes.Soft(), css=css) as app:
|
1108 |
gr.Markdown(
|
1109 |
f"""
|
1110 |
+
# π€ νλ ₯μ LLM μμ€ν
(4-AI νμ
+ νκ°μ)
|
1111 |
"""
|
1112 |
)
|
1113 |
|
|
|
1115 |
with gr.Row():
|
1116 |
with gr.Column():
|
1117 |
gr.Markdown("""
|
1118 |
+
## π 4κ° AIμ νλ ₯ μμ€ν
|
1119 |
+
- **κ°λ
μ AI**: κ±°μμ λΆμκ³Ό μ λ΅ μ립
|
1120 |
+
- **μ‘°μ¬μ AI**: μΉ κ²μκ³Ό μ 보 μμ§/μ 리
|
1121 |
+
- **μ€νμ AI**: ꡬ체μ κ³ν μ립과 μ€ν
|
1122 |
+
- **νκ°μ AI**: μ 체 κ³Όμ νκ°μ κ°μ μ μ μ
|
1123 |
+
|
1124 |
+
### π μ£Όμ κΈ°λ₯
|
1125 |
+
- 20κ° κ²μ κ²°κ³Όμ λμμ΄ κ²μ
|
1126 |
+
- μ λ’°λ κΈ°λ° μ 보 νκ°
|
1127 |
+
- μ€μκ° νμ
κ³Ό νΌλλ°± λ°μ
|
1128 |
+
- μ’
ν©μ μΈ νμ§ νκ°
|
1129 |
""")
|
1130 |
|
1131 |
+
# LLM μ ν μ΅μ
|
1132 |
+
llm_mode = gr.Radio(
|
1133 |
+
choices=["default", "commercial"],
|
1134 |
+
value="default",
|
1135 |
+
label="LLM λͺ¨λ μ ν",
|
1136 |
+
info="commercialμ μ ννλ©΄ Gemini 2.5 Proλ₯Ό μ¬μ©ν©λλ€"
|
1137 |
+
)
|
1138 |
+
|
1139 |
user_input = gr.Textbox(
|
1140 |
label="μ§λ¬Έ μ
λ ₯",
|
1141 |
placeholder="μ: κΈ°κ³νμ΅ λͺ¨λΈμ μ±λ₯μ ν₯μμν€λ λ°©λ²μ?",
|
|
|
1153 |
max_lines=1
|
1154 |
)
|
1155 |
|
1156 |
+
# AI μΆλ ₯λ€ - 2x2 그리λ
|
1157 |
with gr.Row():
|
1158 |
+
# μλ¨ ν
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1159 |
with gr.Column():
|
1160 |
gr.Markdown("### π§ κ°λ
μ AI (κ±°μμ λΆμ)")
|
1161 |
supervisor_output = gr.Textbox(
|
1162 |
label="",
|
1163 |
+
lines=15,
|
1164 |
+
max_lines=20,
|
1165 |
interactive=False,
|
1166 |
elem_classes=["supervisor-box"]
|
1167 |
)
|
1168 |
|
|
|
1169 |
with gr.Column():
|
1170 |
gr.Markdown("### π μ‘°μ¬μ AI (μΉ κ²μ & μ 리)")
|
1171 |
researcher_output = gr.Textbox(
|
1172 |
label="",
|
1173 |
+
lines=15,
|
1174 |
+
max_lines=20,
|
1175 |
interactive=False,
|
1176 |
elem_classes=["researcher-box"]
|
1177 |
)
|
1178 |
+
|
1179 |
+
with gr.Row():
|
1180 |
+
# νλ¨ ν
|
1181 |
with gr.Column():
|
1182 |
gr.Markdown("### ποΈ μ€νμ AI (λ―Έμμ ꡬν)")
|
1183 |
executor_output = gr.Textbox(
|
1184 |
label="",
|
1185 |
+
lines=15,
|
1186 |
+
max_lines=20,
|
1187 |
interactive=False,
|
1188 |
elem_classes=["executor-box"]
|
1189 |
)
|
1190 |
+
|
1191 |
+
with gr.Column():
|
1192 |
+
gr.Markdown("### π νκ°μ AI (μ 체 νκ°)")
|
1193 |
+
evaluator_output = gr.Textbox(
|
1194 |
+
label="",
|
1195 |
+
lines=15,
|
1196 |
+
max_lines=20,
|
1197 |
+
interactive=False,
|
1198 |
+
elem_classes=["evaluator-box"]
|
1199 |
+
)
|
1200 |
|
1201 |
# μμ
|
1202 |
gr.Examples(
|
|
|
1214 |
# μ΄λ²€νΈ νΈλ€λ¬
|
1215 |
submit_btn.click(
|
1216 |
fn=process_query_streaming,
|
1217 |
+
inputs=[user_input, llm_mode],
|
1218 |
+
outputs=[supervisor_output, researcher_output, executor_output, evaluator_output, status_text]
|
1219 |
).then(
|
1220 |
fn=lambda: "",
|
1221 |
outputs=[user_input]
|
|
|
1223 |
|
1224 |
user_input.submit(
|
1225 |
fn=process_query_streaming,
|
1226 |
+
inputs=[user_input, llm_mode],
|
1227 |
+
outputs=[supervisor_output, researcher_output, executor_output, evaluator_output, status_text]
|
1228 |
).then(
|
1229 |
fn=lambda: "",
|
1230 |
outputs=[user_input]
|
|
|
1232 |
|
1233 |
clear_btn.click(
|
1234 |
fn=clear_all,
|
1235 |
+
outputs=[supervisor_output, researcher_output, executor_output, evaluator_output, status_text]
|
1236 |
)
|
1237 |
|
1238 |
|