Spaces:

Gil158
/

Phi

Runtime error

Phi

File size: 1,292 Bytes

9f58d7b
db0699d
0ded2ac
 
 
9f58d7b
0ded2ac
 
73bb6da
6c8697c
 
 
9f58d7b
 
6c8697c
0ded2ac
864db67
db0699d
0ded2ac
864db67
 
 
db0699d
864db67
0ded2ac
864db67
db0699d
864db67
db0699d
864db67
 
 
 
 
db0699d
864db67
 
 
 
db0699d
864db67
db0699d
864db67

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_id = "TheBloke/phi-2-GPTQ"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=False  # Força desabilitar quantização
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)
# Pipeline de texto
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Função do chat + salvar memória
def chat(user_input, history):
    prompt = user_input
    result = pipe(prompt, max_new_tokens=256, temperature=0.7)[0]["generated_text"]

    # Salvar memória em arquivo
    with open("memoria.txt", "a", encoding="utf-8") as f:
        f.write(f"User: {user_input}\nAI: {result}\n")

    return result

# Interface Gradio
with gr.Blocks() as demo:
    chat_history = gr.State([])
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Digite sua pergunta:")

    def respond(user_input, chat_history):
        answer = chat(user_input, chat_history)
        chat_history.append((user_input, answer))
        return chat_history, chat_history

    msg.submit(respond, [msg, chat_history], [chatbot, chat_history])

demo.launch()