|
import gradio as gr |
|
import torch |
|
import time |
|
from llama_cpp import Llama |
|
import os |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
MODEL_NAME = "Dorian2B/Vera-v1.5-Instruct-GGUF" |
|
MODEL_FILE = "vera-v1.5-instruct-q8_0.gguf" |
|
|
|
def download_model(): |
|
model_path = hf_hub_download(repo_id=MODEL_NAME, filename=MODEL_FILE) |
|
return model_path |
|
|
|
def load_model(): |
|
model_path = download_model() |
|
|
|
|
|
model = Llama( |
|
model_path=model_path, |
|
n_ctx=4096, |
|
n_gpu_layers=-1, |
|
verbose=False |
|
) |
|
return model |
|
|
|
|
|
def format_prompt(message, history): |
|
prompt = "<|system|>\nTu es Vera, une assistante IA utile, honnête et inoffensive.\n</s>\n" |
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
prompt += f"<|user|>\n{user_msg}\n</s>\n" |
|
prompt += f"<|assistant|>\n{assistant_msg}\n</s>\n" |
|
|
|
|
|
prompt += f"<|user|>\n{message}\n</s>\n" |
|
prompt += "<|assistant|>\n" |
|
|
|
return prompt |
|
|
|
|
|
def generate_response(message, history): |
|
if not hasattr(generate_response, "model"): |
|
generate_response.model = load_model() |
|
|
|
|
|
history = history + [(message, "")] |
|
|
|
prompt = format_prompt(message, history[:-1]) |
|
|
|
response_text = "" |
|
|
|
|
|
for token in generate_response.model.create_completion( |
|
prompt, |
|
max_tokens=2048, |
|
temperature=0.7, |
|
top_p=0.95, |
|
stop=["</s>", "<|user|>", "<|system|>"], |
|
stream=True, |
|
): |
|
response_text += token["choices"][0]["text"] |
|
|
|
history[-1] = (message, response_text) |
|
time.sleep(0.01) |
|
yield history |
|
|
|
|
|
def reset_conversation(): |
|
return [], "" |
|
|
|
|
|
custom_css = """ |
|
footer {visibility: hidden} |
|
.gradio-container { |
|
background-color: #f8f9fa; |
|
} |
|
.chatbot-container { |
|
border-radius: 15px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
} |
|
.chatbot .user-message { |
|
background: linear-gradient(135deg, #6e8efb, #a777e3); |
|
color: white; |
|
border-radius: 15px 15px 0 15px; |
|
} |
|
.chatbot .bot-message { |
|
background: #f0f2f5; |
|
border-radius: 15px 15px 15px 0; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: |
|
gr.Markdown(""" |
|
# 🌟 Assistant Vera-v1.5-Instruct |
|
|
|
Cette interface vous permet d'interagir avec le modèle Vera-v1.5-Instruct en français. |
|
Posez vos questions et l'assistant vous répondra en tenant compte du contexte de la conversation. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=4): |
|
chatbot = gr.Chatbot( |
|
height=550, |
|
show_copy_button=True, |
|
avatar_images=("👤", "🤖"), |
|
bubble_full_width=False, |
|
elem_id="chatbot", |
|
container=True, |
|
elem_classes="chatbot-container", |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=4): |
|
message = gr.Textbox( |
|
placeholder="Entrez votre message ici...", |
|
lines=2, |
|
container=True, |
|
scale=4, |
|
autofocus=True, |
|
) |
|
with gr.Column(scale=1): |
|
with gr.Row(): |
|
submit_btn = gr.Button("Envoyer", variant="primary", scale=2) |
|
reset_btn = gr.Button("Réinitialiser", variant="secondary", scale=1) |
|
|
|
with gr.Accordion("À propos du modèle", open=False): |
|
gr.Markdown(""" |
|
Ce modèle est basé sur **Vera-v1.5-Instruct-GGUF** de [Dorian2B](https://huggingface.co/Dorian2B/Vera-v1.5-Instruct-GGUF). |
|
Le modèle est optimisé pour les conversations en français. |
|
|
|
**Paramètres du modèle:** |
|
- Température: 0.7 |
|
- Top-p: 0.95 |
|
- Contexte: 4096 tokens |
|
""") |
|
|
|
|
|
submit_btn.click( |
|
fn=generate_response, |
|
inputs=[message, chatbot], |
|
outputs=[chatbot], |
|
queue=True |
|
).then( |
|
fn=lambda: "", |
|
outputs=[message] |
|
) |
|
|
|
message.submit( |
|
fn=generate_response, |
|
inputs=[message, chatbot], |
|
outputs=[chatbot], |
|
queue=True |
|
).then( |
|
fn=lambda: "", |
|
outputs=[message] |
|
) |
|
|
|
reset_btn.click( |
|
fn=reset_conversation, |
|
outputs=[chatbot, message] |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.queue() |
|
demo.launch(share=True, show_error=True) |