Update app.py
Browse files
app.py
CHANGED
@@ -4,28 +4,24 @@ import time
|
|
4 |
from llama_cpp import Llama
|
5 |
import os
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
-
import base64
|
8 |
|
9 |
# Configuration du modèle
|
10 |
MODEL_NAME = "Dorian2B/Vera-v1.5-Instruct-GGUF"
|
11 |
MODEL_FILE = "vera-v1.5-instruct-q8_0.gguf"
|
12 |
|
13 |
-
# Variables globales pour les paramètres
|
14 |
-
model_instance = None
|
15 |
-
|
16 |
def download_model():
|
17 |
model_path = hf_hub_download(repo_id=MODEL_NAME, filename=MODEL_FILE)
|
18 |
return model_path
|
19 |
|
20 |
-
def load_model(
|
21 |
model_path = download_model()
|
22 |
|
23 |
# Paramètres pour le modèle
|
24 |
model = Llama(
|
25 |
model_path=model_path,
|
26 |
-
n_ctx=
|
27 |
n_gpu_layers=-1, # Utilise tous les layers disponibles sur GPU si possible
|
28 |
-
verbose=False # Désactive les logs
|
29 |
)
|
30 |
return model
|
31 |
|
@@ -44,40 +40,10 @@ def format_prompt(message, history):
|
|
44 |
|
45 |
return prompt
|
46 |
|
47 |
-
# Animation de réflexion (robot qui pense)
|
48 |
-
def thinking_animation():
|
49 |
-
# GIF animation encodée en base64
|
50 |
-
robot_animation = """
|
51 |
-
<div style="display: flex; justify-content: center; margin: 20px 0;">
|
52 |
-
<div class="loading-animation">
|
53 |
-
<div class="robot">
|
54 |
-
<div class="antenna"></div>
|
55 |
-
<div class="head">
|
56 |
-
<div class="eye left"></div>
|
57 |
-
<div class="eye right"></div>
|
58 |
-
<div class="mouth"></div>
|
59 |
-
</div>
|
60 |
-
<div class="body">
|
61 |
-
<div class="arm left"></div>
|
62 |
-
<div class="arm right"></div>
|
63 |
-
</div>
|
64 |
-
</div>
|
65 |
-
</div>
|
66 |
-
<div style="margin-left: 20px; font-size: 18px; color: #666; margin-top: 50px;">Vera réfléchit...</div>
|
67 |
-
</div>
|
68 |
-
"""
|
69 |
-
return robot_animation
|
70 |
-
|
71 |
# Fonction d'inférence avec streaming
|
72 |
-
def generate_response(message, history
|
73 |
-
|
74 |
-
|
75 |
-
# Recharger le modèle si les paramètres de contexte ont changé
|
76 |
-
if not model_instance or model_instance.n_ctx() != context_size:
|
77 |
-
model_instance = load_model(context_size)
|
78 |
-
|
79 |
-
# Animation de réflexion
|
80 |
-
yield history + [(message, thinking_animation())]
|
81 |
|
82 |
# Ajout du message utilisateur à l'historique
|
83 |
history = history + [(message, "")]
|
@@ -87,11 +53,11 @@ def generate_response(message, history, temperature, top_p, context_size):
|
|
87 |
response_text = ""
|
88 |
|
89 |
# Utilise le stream pour générer la réponse progressivement
|
90 |
-
for token in
|
91 |
prompt,
|
92 |
-
max_tokens=
|
93 |
-
temperature=
|
94 |
-
top_p=
|
95 |
stop=["</s>", "<|user|>", "<|system|>"],
|
96 |
stream=True,
|
97 |
):
|
@@ -119,122 +85,10 @@ footer {visibility: hidden}
|
|
119 |
background: linear-gradient(135deg, #6e8efb, #a777e3);
|
120 |
color: white;
|
121 |
border-radius: 15px 15px 0 15px;
|
122 |
-
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
123 |
}
|
124 |
.chatbot .bot-message {
|
125 |
background: #f0f2f5;
|
126 |
border-radius: 15px 15px 15px 0;
|
127 |
-
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
|
128 |
-
}
|
129 |
-
.input-textbox {
|
130 |
-
border: 2px solid #ddd !important;
|
131 |
-
border-radius: 12px !important;
|
132 |
-
transition: border-color 0.3s ease !important;
|
133 |
-
}
|
134 |
-
.input-textbox:focus {
|
135 |
-
border-color: #6e8efb !important;
|
136 |
-
box-shadow: 0 0 0 3px rgba(110, 142, 251, 0.1) !important;
|
137 |
-
}
|
138 |
-
.parameters-box {
|
139 |
-
border-radius: 10px;
|
140 |
-
background: #fff;
|
141 |
-
padding: 15px;
|
142 |
-
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
143 |
-
margin-top: 10px;
|
144 |
-
}
|
145 |
-
.slider-label {
|
146 |
-
font-weight: 600;
|
147 |
-
color: #444;
|
148 |
-
}
|
149 |
-
|
150 |
-
/* Animation du robot qui réfléchit */
|
151 |
-
@keyframes thinking {
|
152 |
-
0% { transform: translateY(0); }
|
153 |
-
50% { transform: translateY(-10px); }
|
154 |
-
100% { transform: translateY(0); }
|
155 |
-
}
|
156 |
-
@keyframes blink {
|
157 |
-
0% { opacity: 1; }
|
158 |
-
50% { opacity: 0.5; }
|
159 |
-
100% { opacity: 1; }
|
160 |
-
}
|
161 |
-
@keyframes wave {
|
162 |
-
0% { transform: rotate(0deg); }
|
163 |
-
25% { transform: rotate(20deg); }
|
164 |
-
75% { transform: rotate(-20deg); }
|
165 |
-
100% { transform: rotate(0deg); }
|
166 |
-
}
|
167 |
-
.loading-animation {
|
168 |
-
width: 100px;
|
169 |
-
height: 150px;
|
170 |
-
position: relative;
|
171 |
-
}
|
172 |
-
.robot {
|
173 |
-
animation: thinking 2s ease-in-out infinite;
|
174 |
-
}
|
175 |
-
.antenna {
|
176 |
-
width: 4px;
|
177 |
-
height: 15px;
|
178 |
-
background-color: #888;
|
179 |
-
position: absolute;
|
180 |
-
top: 0;
|
181 |
-
left: 48px;
|
182 |
-
border-radius: 2px;
|
183 |
-
}
|
184 |
-
.head {
|
185 |
-
width: 60px;
|
186 |
-
height: 50px;
|
187 |
-
background-color: #6e8efb;
|
188 |
-
border-radius: 15px;
|
189 |
-
position: absolute;
|
190 |
-
top: 15px;
|
191 |
-
left: 20px;
|
192 |
-
display: flex;
|
193 |
-
flex-wrap: wrap;
|
194 |
-
justify-content: space-around;
|
195 |
-
align-items: center;
|
196 |
-
padding: 10px;
|
197 |
-
}
|
198 |
-
.eye {
|
199 |
-
width: 12px;
|
200 |
-
height: 12px;
|
201 |
-
background-color: white;
|
202 |
-
border-radius: 50%;
|
203 |
-
animation: blink 2.5s ease-in-out infinite;
|
204 |
-
}
|
205 |
-
.mouth {
|
206 |
-
width: 20px;
|
207 |
-
height: 5px;
|
208 |
-
background-color: white;
|
209 |
-
border-radius: 2px;
|
210 |
-
margin-top: 5px;
|
211 |
-
}
|
212 |
-
.body {
|
213 |
-
width: 70px;
|
214 |
-
height: 60px;
|
215 |
-
background-color: #a777e3;
|
216 |
-
border-radius: 10px;
|
217 |
-
position: absolute;
|
218 |
-
top: 65px;
|
219 |
-
left: 15px;
|
220 |
-
display: flex;
|
221 |
-
justify-content: space-between;
|
222 |
-
align-items: center;
|
223 |
-
padding: 0 5px;
|
224 |
-
}
|
225 |
-
.arm {
|
226 |
-
width: 8px;
|
227 |
-
height: 40px;
|
228 |
-
background-color: #6e8efb;
|
229 |
-
border-radius: 4px;
|
230 |
-
}
|
231 |
-
.arm.left {
|
232 |
-
transform-origin: top center;
|
233 |
-
animation: wave 1.5s ease-in-out infinite;
|
234 |
-
}
|
235 |
-
.arm.right {
|
236 |
-
transform-origin: top center;
|
237 |
-
animation: wave 1.5s ease-in-out infinite reverse;
|
238 |
}
|
239 |
"""
|
240 |
|
@@ -257,95 +111,37 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
|
257 |
elem_id="chatbot",
|
258 |
container=True,
|
259 |
elem_classes="chatbot-container",
|
260 |
-
render=True, # Pour supporter le HTML dans les réponses
|
261 |
)
|
262 |
|
263 |
with gr.Row():
|
264 |
with gr.Column(scale=4):
|
265 |
message = gr.Textbox(
|
266 |
-
placeholder="Entrez votre message ici...
|
267 |
lines=2,
|
268 |
-
max_lines=10,
|
269 |
container=True,
|
270 |
scale=4,
|
271 |
autofocus=True,
|
272 |
-
elem_classes="input-textbox",
|
273 |
-
submit_btn=True, # Bouton d'envoi intégré
|
274 |
)
|
275 |
with gr.Column(scale=1):
|
276 |
with gr.Row():
|
277 |
submit_btn = gr.Button("Envoyer", variant="primary", scale=2)
|
278 |
reset_btn = gr.Button("Réinitialiser", variant="secondary", scale=1)
|
279 |
|
280 |
-
# Paramètres du modèle
|
281 |
-
with gr.Row(elem_classes="parameters-box"):
|
282 |
-
with gr.Column(scale=1):
|
283 |
-
temperature = gr.Slider(
|
284 |
-
minimum=0.1,
|
285 |
-
maximum=1.5,
|
286 |
-
value=0.7,
|
287 |
-
step=0.1,
|
288 |
-
label="Température",
|
289 |
-
info="Contrôle la créativité (plus élevé = plus créatif)",
|
290 |
-
elem_classes="slider-label"
|
291 |
-
)
|
292 |
-
with gr.Column(scale=1):
|
293 |
-
top_p = gr.Slider(
|
294 |
-
minimum=0.1,
|
295 |
-
maximum=1.0,
|
296 |
-
value=0.95,
|
297 |
-
step=0.05,
|
298 |
-
label="Top P",
|
299 |
-
info="Contrôle la diversité des réponses",
|
300 |
-
elem_classes="slider-label"
|
301 |
-
)
|
302 |
-
with gr.Column(scale=1):
|
303 |
-
context_size = gr.Slider(
|
304 |
-
minimum=1024,
|
305 |
-
maximum=8192,
|
306 |
-
value=4096,
|
307 |
-
step=1024,
|
308 |
-
label="Taille du contexte",
|
309 |
-
info="Mémoire du modèle (plus élevé = plus de contexte)",
|
310 |
-
elem_classes="slider-label"
|
311 |
-
)
|
312 |
-
|
313 |
with gr.Accordion("À propos du modèle", open=False):
|
314 |
gr.Markdown("""
|
315 |
Ce modèle est basé sur **Vera-v1.5-Instruct-GGUF** de [Dorian2B](https://huggingface.co/Dorian2B/Vera-v1.5-Instruct-GGUF).
|
316 |
Le modèle est optimisé pour les conversations en français.
|
317 |
|
318 |
-
**Paramètres
|
319 |
-
-
|
320 |
-
-
|
321 |
-
-
|
322 |
""")
|
323 |
|
324 |
-
# JavaScript pour la gestion de Maj+Entrée
|
325 |
-
gr.HTML("""
|
326 |
-
<script>
|
327 |
-
document.addEventListener('DOMContentLoaded', function() {
|
328 |
-
// Permettre à Shift+Enter d'insérer une nouvelle ligne au lieu de soumettre
|
329 |
-
setTimeout(() => {
|
330 |
-
const textareas = document.querySelectorAll('textarea');
|
331 |
-
textareas.forEach(textarea => {
|
332 |
-
textarea.addEventListener('keydown', function(e) {
|
333 |
-
if (e.key === 'Enter' && !e.shiftKey) {
|
334 |
-
e.preventDefault();
|
335 |
-
const submitButton = textarea.closest('.gradio-container').querySelector('button[data-testid="submit"]');
|
336 |
-
if (submitButton) submitButton.click();
|
337 |
-
}
|
338 |
-
});
|
339 |
-
});
|
340 |
-
}, 1000); // Délai pour s'assurer que tous les éléments sont chargés
|
341 |
-
});
|
342 |
-
</script>
|
343 |
-
""")
|
344 |
-
|
345 |
# Configuration des événements
|
346 |
submit_btn.click(
|
347 |
fn=generate_response,
|
348 |
-
inputs=[message, chatbot
|
349 |
outputs=[chatbot],
|
350 |
queue=True
|
351 |
).then(
|
@@ -355,7 +151,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
|
355 |
|
356 |
message.submit(
|
357 |
fn=generate_response,
|
358 |
-
inputs=[message, chatbot
|
359 |
outputs=[chatbot],
|
360 |
queue=True
|
361 |
).then(
|
@@ -370,15 +166,5 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
|
370 |
|
371 |
# Lancement de l'interface
|
372 |
if __name__ == "__main__":
|
373 |
-
# Pré-télécharger le modèle au démarrage
|
374 |
-
print("Téléchargement du modèle...")
|
375 |
-
model_path = download_model()
|
376 |
-
print(f"Modèle téléchargé à {model_path}")
|
377 |
-
|
378 |
-
# Initialiser le modèle au démarrage
|
379 |
-
print("Initialisation du modèle...")
|
380 |
-
model_instance = load_model()
|
381 |
-
print("Modèle initialisé avec succès!")
|
382 |
-
|
383 |
demo.queue()
|
384 |
demo.launch(share=True, show_error=True)
|
|
|
4 |
from llama_cpp import Llama
|
5 |
import os
|
6 |
from huggingface_hub import hf_hub_download
|
|
|
7 |
|
8 |
# Configuration du modèle
|
9 |
MODEL_NAME = "Dorian2B/Vera-v1.5-Instruct-GGUF"
|
10 |
MODEL_FILE = "vera-v1.5-instruct-q8_0.gguf"
|
11 |
|
|
|
|
|
|
|
12 |
def download_model():
|
13 |
model_path = hf_hub_download(repo_id=MODEL_NAME, filename=MODEL_FILE)
|
14 |
return model_path
|
15 |
|
16 |
+
def load_model():
|
17 |
model_path = download_model()
|
18 |
|
19 |
# Paramètres pour le modèle
|
20 |
model = Llama(
|
21 |
model_path=model_path,
|
22 |
+
n_ctx=4096, # Taille du contexte
|
23 |
n_gpu_layers=-1, # Utilise tous les layers disponibles sur GPU si possible
|
24 |
+
verbose=False # Désactive les logs verbeaux
|
25 |
)
|
26 |
return model
|
27 |
|
|
|
40 |
|
41 |
return prompt
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# Fonction d'inférence avec streaming
|
44 |
+
def generate_response(message, history):
|
45 |
+
if not hasattr(generate_response, "model"):
|
46 |
+
generate_response.model = load_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# Ajout du message utilisateur à l'historique
|
49 |
history = history + [(message, "")]
|
|
|
53 |
response_text = ""
|
54 |
|
55 |
# Utilise le stream pour générer la réponse progressivement
|
56 |
+
for token in generate_response.model.create_completion(
|
57 |
prompt,
|
58 |
+
max_tokens=2048,
|
59 |
+
temperature=0.7,
|
60 |
+
top_p=0.95,
|
61 |
stop=["</s>", "<|user|>", "<|system|>"],
|
62 |
stream=True,
|
63 |
):
|
|
|
85 |
background: linear-gradient(135deg, #6e8efb, #a777e3);
|
86 |
color: white;
|
87 |
border-radius: 15px 15px 0 15px;
|
|
|
88 |
}
|
89 |
.chatbot .bot-message {
|
90 |
background: #f0f2f5;
|
91 |
border-radius: 15px 15px 15px 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
}
|
93 |
"""
|
94 |
|
|
|
111 |
elem_id="chatbot",
|
112 |
container=True,
|
113 |
elem_classes="chatbot-container",
|
|
|
114 |
)
|
115 |
|
116 |
with gr.Row():
|
117 |
with gr.Column(scale=4):
|
118 |
message = gr.Textbox(
|
119 |
+
placeholder="Entrez votre message ici...",
|
120 |
lines=2,
|
|
|
121 |
container=True,
|
122 |
scale=4,
|
123 |
autofocus=True,
|
|
|
|
|
124 |
)
|
125 |
with gr.Column(scale=1):
|
126 |
with gr.Row():
|
127 |
submit_btn = gr.Button("Envoyer", variant="primary", scale=2)
|
128 |
reset_btn = gr.Button("Réinitialiser", variant="secondary", scale=1)
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
with gr.Accordion("À propos du modèle", open=False):
|
131 |
gr.Markdown("""
|
132 |
Ce modèle est basé sur **Vera-v1.5-Instruct-GGUF** de [Dorian2B](https://huggingface.co/Dorian2B/Vera-v1.5-Instruct-GGUF).
|
133 |
Le modèle est optimisé pour les conversations en français.
|
134 |
|
135 |
+
**Paramètres du modèle:**
|
136 |
+
- Température: 0.7
|
137 |
+
- Top-p: 0.95
|
138 |
+
- Contexte: 4096 tokens
|
139 |
""")
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
# Configuration des événements
|
142 |
submit_btn.click(
|
143 |
fn=generate_response,
|
144 |
+
inputs=[message, chatbot],
|
145 |
outputs=[chatbot],
|
146 |
queue=True
|
147 |
).then(
|
|
|
151 |
|
152 |
message.submit(
|
153 |
fn=generate_response,
|
154 |
+
inputs=[message, chatbot],
|
155 |
outputs=[chatbot],
|
156 |
queue=True
|
157 |
).then(
|
|
|
166 |
|
167 |
# Lancement de l'interface
|
168 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
demo.queue()
|
170 |
demo.launch(share=True, show_error=True)
|