makandal / app.py
jsbeaudry's picture
Update app.py
23efa83 verified
raw
history blame
3.23 kB
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load model and tokenizer once at startup
model_name = "jsbeaudry/makandal-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto"
)
think_token_id = tokenizer.convert_tokens_to_ids("</think>")
def generate_response(prompt):
# Format input for chat template
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False
)
# Tokenize
model_inputs = tokenizer([text], return_tensors="pt")
model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()}
# Generate
generated_ids = model.generate(
**model_inputs,
max_new_tokens=100,
do_sample=True,
temperature=0.7,
top_p=0.9
)
output_ids = generated_ids[0][len(model_inputs["input_ids"][0]):].tolist()
try:
index = len(output_ids) - output_ids[::-1].index(think_token_id)
except ValueError:
index = 0
thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
return thinking_content, content
# Gradio Interface
demo = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
outputs=[
# gr.Textbox(label="Thinking Content"),
gr.Textbox(label="Respons")
],
title="Makandal Text Generator",
description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti."
)
if __name__ == "__main__":
demo.launch()
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch
# # Load model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("jsbeaudry/makandal-v2")
# model = AutoModelForCausalLM.from_pretrained("jsbeaudry/makandal-v2")
# # Set device
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model.to(device)
# # Generation function
# def generate_text(prompt):
# inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
# output = model.generate(
# **inputs,
# max_new_tokens=30,
# do_sample=True,
# repetition_penalty=1.2,
# no_repeat_ngram_size=3,
# temperature=0.9,
# top_k=40,
# top_p=0.85,
# pad_token_id=tokenizer.pad_token_id,
# eos_token_id=tokenizer.eos_token_id
# )
# return tokenizer.decode(output[0], skip_special_tokens=True)
# # Gradio interface
# iface = gr.Interface(
# fn=generate_text,
# inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
# outputs="text",
# title="Makandal Text Generator",
# description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti."
# )
# if __name__ == "__main__":
# iface.launch()