import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Load model and tokenizer once at startup model_name = "jsbeaudry/makandal-v2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) think_token_id = tokenizer.convert_tokens_to_ids("") def generate_response(prompt): # Format input for chat template messages = [{"role": "user", "content": prompt}] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, enable_thinking=False ) # Tokenize model_inputs = tokenizer([text], return_tensors="pt") model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()} # Generate generated_ids = model.generate( **model_inputs, max_new_tokens=100, do_sample=True, temperature=0.7, top_p=0.9 ) output_ids = generated_ids[0][len(model_inputs["input_ids"][0]):].tolist() try: index = len(output_ids) - output_ids[::-1].index(think_token_id) except ValueError: index = 0 thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n") content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") return thinking_content, content # Gradio Interface demo = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."), outputs=[ # gr.Textbox(label="Thinking Content"), gr.Textbox(label="Respons") ], title="Makandal Text Generator", description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti." ) if __name__ == "__main__": demo.launch() # import gradio as gr # from transformers import AutoTokenizer, AutoModelForCausalLM # import torch # # Load model and tokenizer # tokenizer = AutoTokenizer.from_pretrained("jsbeaudry/makandal-v2") # model = AutoModelForCausalLM.from_pretrained("jsbeaudry/makandal-v2") # # Set device # device = "cuda" if torch.cuda.is_available() else "cpu" # model.to(device) # # Generation function # def generate_text(prompt): # inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device) # output = model.generate( # **inputs, # max_new_tokens=30, # do_sample=True, # repetition_penalty=1.2, # no_repeat_ngram_size=3, # temperature=0.9, # top_k=40, # top_p=0.85, # pad_token_id=tokenizer.pad_token_id, # eos_token_id=tokenizer.eos_token_id # ) # return tokenizer.decode(output[0], skip_special_tokens=True) # # Gradio interface # iface = gr.Interface( # fn=generate_text, # inputs=gr.Textbox(lines=2, placeholder="Ekri yon sijè oswa yon fraz..."), # outputs="text", # title="Makandal Text Generator", # description="Ekri yon fraz oswa mo kle pou jenere tèks ak modèl Makandal la. Modèl sa fèt espesyalman pou kontèks Ayiti." # ) # if __name__ == "__main__": # iface.launch()