Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -96,9 +96,13 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
96 |
if not user_prompt: return {"error": "Prompt not found."}
|
97 |
|
98 |
initial_messages = [{'role': 'system', 'content': SYSTEM_PROMPT}, {'role': 'user', 'content': user_prompt}]
|
99 |
-
inputs = tokenizer.apply_chat_template(initial_messages, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
|
100 |
|
101 |
-
# CORRECTION ICI : On
|
|
|
|
|
|
|
|
|
|
|
102 |
outputs = model.generate(**inputs, max_new_tokens=150, eos_token_id=tokenizer.eos_token_id)
|
103 |
thought_process = tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
|
104 |
|
@@ -129,9 +133,10 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
129 |
{'role': 'assistant', 'content': thought_process}
|
130 |
]
|
131 |
|
132 |
-
|
|
|
|
|
133 |
|
134 |
-
# DEUXIÈME CORRECTION ICI : On utilise également **final_inputs
|
135 |
final_outputs = model.generate(**final_inputs, max_new_tokens=1024, do_sample=True, temperature=0.1, top_k=50, top_p=0.95, eos_token_id=tokenizer.eos_token_id)
|
136 |
response_text = tokenizer.decode(final_outputs[0][len(final_inputs['input_ids'][0]):], skip_special_tokens=True)
|
137 |
|
|
|
96 |
if not user_prompt: return {"error": "Prompt not found."}
|
97 |
|
98 |
initial_messages = [{'role': 'system', 'content': SYSTEM_PROMPT}, {'role': 'user', 'content': user_prompt}]
|
|
|
99 |
|
100 |
+
# --- CORRECTION ICI : On garantit que 'inputs' est un dictionnaire ---
|
101 |
+
# Étape 1: Formatter le texte
|
102 |
+
formatted_prompt = tokenizer.apply_chat_template(initial_messages, tokenize=False, add_generation_prompt=True)
|
103 |
+
# Étape 2: Tokenizer le texte formaté pour obtenir un dictionnaire
|
104 |
+
inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
|
105 |
+
|
106 |
outputs = model.generate(**inputs, max_new_tokens=150, eos_token_id=tokenizer.eos_token_id)
|
107 |
thought_process = tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
|
108 |
|
|
|
133 |
{'role': 'assistant', 'content': thought_process}
|
134 |
]
|
135 |
|
136 |
+
# --- DEUXIÈME CORRECTION ICI : On applique la même logique ---
|
137 |
+
final_formatted_prompt = tokenizer.apply_chat_template(final_messages, tokenize=False, add_generation_prompt=True)
|
138 |
+
final_inputs = tokenizer(final_formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
|
139 |
|
|
|
140 |
final_outputs = model.generate(**final_inputs, max_new_tokens=1024, do_sample=True, temperature=0.1, top_k=50, top_p=0.95, eos_token_id=tokenizer.eos_token_id)
|
141 |
response_text = tokenizer.decode(final_outputs[0][len(final_inputs['input_ids'][0]):], skip_special_tokens=True)
|
142 |
|