gemma2-9B-test / app.py
ninooo96's picture
Update app.py
fbb49bf verified
raw
history blame
659 Bytes
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# LLM model
model_name = "swap-uniba/LLaMAntino-2-7b-hf-ITA" # Sostituisci con il modello desiderato
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")
# Funzione di inferenza
def chat(input_text):
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=150)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Interfaccia Gradio
iface = gr.Interface(fn=chat, inputs="text", outputs="text")
iface.launch()