|
import gradio as gr |
|
import torch |
|
from llama_cpp import Llama |
|
import os |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
MODEL_NAME = "Dorian2B/Vera-v1.5-Instruct-GGUF" |
|
MODEL_FILE = "vera-v1.5-instruct-q8_0.gguf" |
|
|
|
def download_model(): |
|
model_path = hf_hub_download(repo_id=MODEL_NAME, filename=MODEL_FILE) |
|
return model_path |
|
|
|
def load_model(): |
|
model_path = download_model() |
|
|
|
|
|
model = Llama( |
|
model_path=model_path, |
|
n_ctx=4096, |
|
n_gpu_layers=-1 |
|
) |
|
return model |
|
|
|
|
|
def format_prompt(message, history): |
|
prompt = "<|system|>\nTu es Vera, une assistante IA utile, honnête et inoffensive.\n</s>\n" |
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
prompt += f"<|user|>\n{user_msg}\n</s>\n" |
|
prompt += f"<|assistant|>\n{assistant_msg}\n</s>\n" |
|
|
|
|
|
prompt += f"<|user|>\n{message}\n</s>\n" |
|
prompt += "<|assistant|>\n" |
|
|
|
return prompt |
|
|
|
|
|
def generate_response(message, history): |
|
if not hasattr(generate_response, "model"): |
|
generate_response.model = load_model() |
|
|
|
prompt = format_prompt(message, history) |
|
|
|
|
|
response = generate_response.model.create_completion( |
|
prompt, |
|
max_tokens=2048, |
|
temperature=0.7, |
|
top_p=0.95, |
|
stop=["</s>", "<|user|>", "<|system|>"], |
|
echo=False |
|
) |
|
|
|
return response['choices'][0]['text'] |
|
|
|
|
|
def reset_conversation(): |
|
return [], "" |
|
|
|
|
|
with gr.Blocks(css="footer {visibility: hidden}") as demo: |
|
gr.Markdown(""" |
|
# 🌟 Assistant Vera-v1.5-Instruct |
|
|
|
Cette interface vous permet d'interagir avec le modèle Vera-v1.5-Instruct en français. |
|
Posez vos questions et l'assistant vous répondra en tenant compte du contexte de la conversation. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=4): |
|
chatbot = gr.Chatbot( |
|
height=500, |
|
show_copy_button=True, |
|
avatar_images=("👤", "🤖"), |
|
bubble_full_width=False, |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=4): |
|
message = gr.Textbox( |
|
placeholder="Entrez votre message ici...", |
|
lines=2, |
|
container=False, |
|
scale=4, |
|
) |
|
with gr.Column(scale=1): |
|
with gr.Row(): |
|
submit_btn = gr.Button("Envoyer", variant="primary", scale=2) |
|
reset_btn = gr.Button("Réinitialiser", variant="secondary", scale=1) |
|
|
|
gr.Markdown(""" |
|
### À propos du modèle |
|
|
|
Ce modèle est basé sur **Vera-v1.5-Instruct-GGUF** de [Dorian2B](https://huggingface.co/Dorian2B/Vera-v1.5-Instruct-GGUF). |
|
Le modèle est optimisé pour les conversations en français. |
|
""") |
|
|
|
|
|
submit_btn.click( |
|
fn=generate_response, |
|
inputs=[message, chatbot], |
|
outputs=[chatbot], |
|
queue=True |
|
).then( |
|
fn=lambda: "", |
|
outputs=[message] |
|
) |
|
|
|
message.submit( |
|
fn=generate_response, |
|
inputs=[message, chatbot], |
|
outputs=[chatbot], |
|
queue=True |
|
).then( |
|
fn=lambda: "", |
|
outputs=[message] |
|
) |
|
|
|
reset_btn.click( |
|
fn=reset_conversation, |
|
outputs=[chatbot, message] |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.queue() |
|
demo.launch() |