|
import torch |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
model_name = "jsbeaudry/makandal-v2" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float16, |
|
device_map="auto" |
|
) |
|
|
|
think_token_id = tokenizer.convert_tokens_to_ids("</think>") |
|
|
|
def generate_response(prompt): |
|
|
|
messages = [{"role": "user", "content": prompt}] |
|
text = tokenizer.apply_chat_template( |
|
messages, |
|
tokenize=False, |
|
add_generation_prompt=True, |
|
enable_thinking=False |
|
|
|
) |
|
|
|
|
|
model_inputs = tokenizer([text], return_tensors="pt") |
|
model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()} |
|
|
|
|
|
generated_ids = model.generate( |
|
**model_inputs, |
|
max_new_tokens=100, |
|
do_sample=True, |
|
temperature=0.7, |
|
top_p=0.9 |
|
) |
|
|
|
output_ids = generated_ids[0][len(model_inputs["input_ids"][0]):].tolist() |
|
|
|
try: |
|
index = len(output_ids) - output_ids[::-1].index(think_token_id) |
|
except ValueError: |
|
index = 0 |
|
|
|
thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n") |
|
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") |
|
|
|
return thinking_content, content |
|
|
|
|
|
demo = gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."), |
|
outputs=[ |
|
|
|
gr.Textbox(label="Respons") |
|
], |
|
title="Makandal Text Generator", |
|
description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|