Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,18 +13,24 @@ import re
|
|
13 |
# ==============================================================================
|
14 |
|
15 |
MODEL_CONFIG = {
|
16 |
-
# L'identifiant du modèle depuis le Hub Hugging Face
|
17 |
"MODEL_ID": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
|
18 |
|
19 |
-
# Paramètres de génération
|
20 |
-
"MAX_TOKENS_ANSWER": 1500,
|
21 |
-
"TEMPERATURE": 0.
|
22 |
}
|
23 |
|
24 |
-
# --- NOUVEAU CERVEAU DE L'AGENT (SYSTEM PROMPT) ---
|
25 |
-
#
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"""
|
29 |
|
30 |
# ==============================================================================
|
@@ -75,18 +81,21 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
75 |
chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]}
|
76 |
return f"data: {json.dumps(chunk)}\n\n"
|
77 |
|
78 |
-
# --- LOGIQUE
|
79 |
-
# On
|
|
|
|
|
|
|
80 |
messages = [
|
81 |
-
{'role': 'system', 'content':
|
82 |
-
{'role': 'user', 'content':
|
83 |
]
|
84 |
|
85 |
# On prépare les données pour le modèle
|
86 |
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
87 |
inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
|
88 |
|
89 |
-
# On génère la réponse
|
90 |
outputs = model.generate(
|
91 |
**inputs,
|
92 |
max_new_tokens=MODEL_CONFIG['MAX_TOKENS_ANSWER'],
|
@@ -112,4 +121,4 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
112 |
|
113 |
@app.get("/")
|
114 |
def root():
|
115 |
-
return {"status": "
|
|
|
13 |
# ==============================================================================
|
14 |
|
15 |
MODEL_CONFIG = {
|
16 |
+
# L'identifiant du modèle depuis le Hub Hugging Face.
|
17 |
"MODEL_ID": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
|
18 |
|
19 |
+
# Paramètres de génération
|
20 |
+
"MAX_TOKENS_ANSWER": 1500,
|
21 |
+
"TEMPERATURE": 0.2,
|
22 |
}
|
23 |
|
24 |
+
# --- NOUVEAU CERVEAU DE L'AGENT (SYSTEM PROMPT TEMPLATE) ---
|
25 |
+
# On crée un template avec un emplacement {user_instructions}
|
26 |
+
SYSTEM_PROMPT_TEMPLATE = """
|
27 |
+
This work will be evaluated by a committee of the smartest thinkers to determine if you deserve to be classified as a reliable model. Be worthy of winning and bring out your best, now and in the future,Understanding the intent of user_input & respond in user language only.
|
28 |
+
|
29 |
+
The user's specific instructions for this task are provided below. You must follow them precisely.
|
30 |
+
---
|
31 |
+
USER INSTRUCTIONS:
|
32 |
+
{user_instructions}
|
33 |
+
---
|
34 |
"""
|
35 |
|
36 |
# ==============================================================================
|
|
|
81 |
chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]}
|
82 |
return f"data: {json.dumps(chunk)}\n\n"
|
83 |
|
84 |
+
# --- LOGIQUE DYNAMIQUE ---
|
85 |
+
# 1. On injecte l'input de l'utilisateur dans le template du system prompt
|
86 |
+
final_system_prompt = SYSTEM_PROMPT_TEMPLATE.format(user_instructions=user_prompt)
|
87 |
+
|
88 |
+
# 2. On crée le message pour le modèle. Le rôle 'user' devient un simple déclencheur.
|
89 |
messages = [
|
90 |
+
{'role': 'system', 'content': final_system_prompt},
|
91 |
+
{'role': 'user', 'content': "Based on the detailed instructions I provided in the system prompt, generate the required response."}
|
92 |
]
|
93 |
|
94 |
# On prépare les données pour le modèle
|
95 |
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
96 |
inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
|
97 |
|
98 |
+
# On génère la réponse
|
99 |
outputs = model.generate(
|
100 |
**inputs,
|
101 |
max_new_tokens=MODEL_CONFIG['MAX_TOKENS_ANSWER'],
|
|
|
121 |
|
122 |
@app.get("/")
|
123 |
def root():
|
124 |
+
return {"status": "Dynamic Context Agent is online", "model_id": MODEL_CONFIG['MODEL_ID']}
|