Spaces:

juanelot
/

Asistente_IA

Sleeping

File size: 1,559 Bytes

fc3cf6d
 
7ec5fdb
fc3cf6d
 
5ed4d80
fc3cf6d
da20661
fc3cf6d
da20661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8a26fe
da20661
a8a26fe
7ec5fdb
5ed4d80
da20661
 
 
 
 
 
7ec5fdb
da20661
a8a26fe
fc3cf6d
 
 
 
 
5ed4d80
 
fc3cf6d

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Cargar el modelo y el tokenizador
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)

def generate_response(prompt, max_length=200):
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            inputs, 
            max_length=max_length, 
            num_return_sequences=1,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

def chatbot(message, history):
    history = history or []
    
    # Construir el prompt en el formato que Llama-2 espera
    prompt = "Eres un asistente AI amigable y útil. Responde de manera concisa y coherente.\n\n"
    for human, ai in history:
        prompt += f"Human: {human}\nAssistant: {ai}\n"
    prompt += f"Human: {message}\nAssistant:"
    
    response = generate_response(prompt)
    
    history.append((message, response))
    return history, history

iface = gr.Interface(
    fn=chatbot,
    inputs=["text", "state"],
    outputs=["chatbot", "state"],
    title="Tu Compañero AI con Llama-2",
    description="Un chatbot de IA avanzado utilizando el modelo Llama-2-7b-chat para conversaciones coherentes y naturales.",
)

iface.launch()