|
import spaces |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
import os |
|
|
|
|
|
Title = """# 🙋🏻♂️Welcome to 🌟Tonic's 🇫🇷🏆 Legml-1-Instruct : L'Excellence Française de l'Instruction-Tuning 🏆🇫🇷""" |
|
|
|
description = """ |
|
[legml-v1.0-instruct](https://huggingface.co/legmlai/legml-v1.0-instruct) est un modèle LLM francophone, affiné sur près de 800 000 paires instruction/réponse en français (Open-Hermes-FR). Il excelle dans le dialogue, le raisonnement et la QA, avec une rigueur et une bienveillance typiquement françaises. |
|
""" |
|
|
|
training = """ |
|
## Détails d'entraînement |
|
|
|
- **Base** : legmlai/legml-v1.0-base (Qwen-3 · 8B) |
|
- **Corpus** : Open-Hermes-FR (799 875 paires, 100% français) |
|
- **Méthode** : SFT multi-tour + DPO léger |
|
- **Licence** : Apache-2.0 |
|
- **Sponsor GPU** : 24 × H100 80 Go (Nebius) |
|
""" |
|
|
|
join_us = """ |
|
## Suivez Legml : |
|
Retrouvez la communauté legml.ai sur [https://legml.ai](https://legml.ai) et suivez les nouveautés sur [Hugging Face](https://huggingface.co/legmlai). |
|
|
|
# Join us: |
|
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 |
|
[](https://discord.gg/qdfnvSPcqP) |
|
On 🤗Huggingface: [MultiTransformer](https://huggingface.co/MultiTransformer) |
|
On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute) |
|
🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗 |
|
""" |
|
|
|
|
|
model_id = "legmlai/legml-v1.0-instruct" |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
hf_token = os.getenv('READTOKEN') |
|
|
|
|
|
if hf_token: |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
token=hf_token, |
|
device_map="auto", |
|
torch_dtype="auto" |
|
) |
|
else: |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
device_map="auto", |
|
torch_dtype="auto" |
|
) |
|
|
|
config_json = model.config.to_dict() |
|
|
|
def format_model_info(config): |
|
info = [] |
|
important_keys = [ |
|
"model_type", "vocab_size", "hidden_size", "num_attention_heads", |
|
"num_hidden_layers", "max_position_embeddings", "torch_dtype" |
|
] |
|
for key in important_keys: |
|
if key in config: |
|
value = config[key] |
|
if key == "torch_dtype" and hasattr(value, "name"): |
|
value = value.name |
|
info.append(f"**{key}:** {value}") |
|
return "\n".join(info) |
|
|
|
@spaces.GPU |
|
def generate_response(system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k): |
|
|
|
messages = [ |
|
{"role": "system", "content": system_prompt}, |
|
{"role": "user", "content": user_prompt} |
|
] |
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=max_new_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
top_k=top_k, |
|
repetition_penalty=repetition_penalty, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
response = tokenizer.decode(outputs[0, inputs["input_ids"].shape[-1]:], skip_special_tokens=True) |
|
return response.strip() |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(Title) |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown(description) |
|
with gr.Column(): |
|
gr.Markdown(training) |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Group(): |
|
gr.Markdown("### Configuration du modèle") |
|
gr.Markdown(format_model_info(config_json)) |
|
with gr.Column(): |
|
with gr.Group(): |
|
gr.Markdown("### Configuration du tokenizer") |
|
gr.Markdown(f""" |
|
**Taille du vocabulaire :** {tokenizer.vocab_size} |
|
**Longueur max du modèle :** {tokenizer.model_max_length} |
|
**Token de padding :** {tokenizer.pad_token} |
|
**Token EOS :** {tokenizer.eos_token} |
|
""") |
|
with gr.Row(): |
|
with gr.Group(): |
|
gr.Markdown(join_us) |
|
with gr.Row(): |
|
with gr.Column(): |
|
system_prompt = gr.Textbox( |
|
label="Message Système", |
|
value="Tu es un assistant francophone rigoureux et bienveillant. Tu réponds toujours en français de façon précise et utile.", |
|
lines=3 |
|
) |
|
user_prompt = gr.Textbox( |
|
label="🗣️Votre message", |
|
placeholder="Entrez votre texte ici...", |
|
lines=5 |
|
) |
|
with gr.Accordion("🧪Paramètres avancés", open=False): |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=2.0, |
|
value=0.4, |
|
step=0.05, |
|
label="🌡️Température" |
|
) |
|
max_new_tokens = gr.Slider( |
|
minimum=1, |
|
maximum=2048, |
|
value=512, |
|
step=1, |
|
label="💶Longueur maximale" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.9, |
|
step=0.05, |
|
label="🏅Top-p" |
|
) |
|
top_k = gr.Slider( |
|
minimum=1, |
|
maximum=100, |
|
value=50, |
|
step=1, |
|
label="🏆Top-k" |
|
) |
|
repetition_penalty = gr.Slider( |
|
minimum=1.0, |
|
maximum=2.0, |
|
value=1.05, |
|
step=0.05, |
|
label="🦜Pénalité de répétition" |
|
) |
|
generate_btn = gr.Button("🏆 Générer") |
|
with gr.Column(): |
|
output = gr.Textbox( |
|
label="🏆 Legml-1-Instruct", |
|
lines=10 |
|
) |
|
gr.Examples( |
|
examples=[ |
|
[ |
|
"Tu es un assistant francophone rigoureux et bienveillant.", |
|
"Explique-moi la relativité restreinte en trois points.", |
|
0.4, 512, 0.9, 1.05, 50 |
|
], |
|
[ |
|
"Tu es un expert en histoire de France.", |
|
"Quels sont les événements majeurs de la Révolution française?", |
|
0.5, 768, 0.9, 1.1, 40 |
|
], |
|
[ |
|
"Tu es un professeur de mathématiques.", |
|
"Explique le théorème de Pythagore simplement.", |
|
0.3, 256, 0.85, 1.05, 30 |
|
], |
|
[ |
|
"Tu es un expert en gastronomie française.", |
|
"Quels sont les plats traditionnels français les plus connus?", |
|
0.4, 512, 0.9, 1.05, 50 |
|
], |
|
[ |
|
"Tu es un poète français.", |
|
"Écris un court poème sur Paris.", |
|
0.7, 256, 0.95, 1.2, 60 |
|
] |
|
], |
|
inputs=[system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k], |
|
outputs=output, |
|
label="Exemples" |
|
) |
|
generate_btn.click( |
|
fn=generate_response, |
|
inputs=[system_prompt, user_prompt, temperature, max_new_tokens, top_p, repetition_penalty, top_k], |
|
outputs=output |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(ssr_mode=False, mcp_server=True) |