makandal / app.py
jsbeaudry's picture
Update app.py
594db6a verified
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
# Load model and tokenizer once at startup
model_name = "jsbeaudry/makandal-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto"
)
think_token_id = tokenizer.convert_tokens_to_ids("</think>")
def generate_response_stream(prompt):
# Format input for chat template
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False
)
# Tokenize
model_inputs = tokenizer([text], return_tensors="pt")
model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()}
# Create streamer
text_streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# Generation parameters
generation_kwargs = dict(
**model_inputs,
streamer=text_streamer,
max_new_tokens=100,
do_sample=True,
temperature=0.7,
top_p=0.9,
use_cache=True,
)
# Start generation in a separate thread
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# Stream the response
partial_response = ""
for new_text in text_streamer:
partial_response += new_text
yield partial_response
# Wait for thread to complete
thread.join()
# Gradio Interface with streaming
demo = gr.Interface(
fn=generate_response_stream,
inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
outputs=gr.Textbox(label="Respons"),
title="Makandal Text Generator (Streaming)",
description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti.",
live=False # Set to False to prevent auto-triggering
)
if __name__ == "__main__":
demo.launch()
# import torch
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM
# # Load model and tokenizer once at startup
# model_name = "jsbeaudry/makandal-v2"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(
# model_name,
# torch_dtype=torch.float16,
# device_map="auto"
# )
# think_token_id = tokenizer.convert_tokens_to_ids("</think>")
# def generate_response(prompt):
# # Format input for chat template
# messages = [{"role": "user", "content": prompt}]
# text = tokenizer.apply_chat_template(
# messages,
# tokenize=False,
# add_generation_prompt=True,
# enable_thinking=False
# )
# # Tokenize
# model_inputs = tokenizer([text], return_tensors="pt")
# model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()}
# # Generate
# generated_ids = model.generate(
# **model_inputs,
# max_new_tokens=100,
# do_sample=True,
# temperature=0.7,
# top_p=0.9
# )
# output_ids = generated_ids[0][len(model_inputs["input_ids"][0]):].tolist()
# try:
# index = len(output_ids) - output_ids[::-1].index(think_token_id)
# except ValueError:
# index = 0
# thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
# content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
# return thinking_content, content
# # Gradio Interface
# demo = gr.Interface(
# fn=generate_response,
# inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
# outputs=[
# # gr.Textbox(label="Thinking Content"),
# gr.Textbox(label="Respons")
# ],
# title="Makandal Text Generator",
# description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti."
# )
# if __name__ == "__main__":
# demo.launch()
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch
# # Load model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("jsbeaudry/makandal-v2")
# model = AutoModelForCausalLM.from_pretrained("jsbeaudry/makandal-v2")
# # Set device
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model.to(device)
# # Generation function
# def generate_text(prompt):
# inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
# output = model.generate(
# **inputs,
# max_new_tokens=30,
# do_sample=True,
# repetition_penalty=1.2,
# no_repeat_ngram_size=3,
# temperature=0.9,
# top_k=40,
# top_p=0.85,
# pad_token_id=tokenizer.pad_token_id,
# eos_token_id=tokenizer.eos_token_id
# )
# return tokenizer.decode(output[0], skip_special_tokens=True)
# # Gradio interface
# iface = gr.Interface(
# fn=generate_text,
# inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."),
# outputs="text",
# title="Makandal Text Generator",
# description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti."
# )
# if __name__ == "__main__":
# iface.launch()