Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import numpy as np
|
|
9 |
import requests
|
10 |
import torch
|
11 |
import os
|
12 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
13 |
from langchain.memory import ConversationBufferMemory
|
14 |
|
15 |
# Configuraci贸n del modelo de lenguaje
|
@@ -23,13 +23,16 @@ if not HF_TOKEN:
|
|
23 |
|
24 |
print("馃攧 Cargando modelo de lenguaje...")
|
25 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
|
|
|
|
|
26 |
model = AutoModelForCausalLM.from_pretrained(
|
27 |
MODEL_NAME,
|
28 |
-
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
29 |
device_map="auto",
|
|
|
30 |
token=HF_TOKEN
|
31 |
).to(device)
|
32 |
|
|
|
33 |
# Memoria conversacional
|
34 |
memory = ConversationBufferMemory()
|
35 |
|
|
|
9 |
import requests
|
10 |
import torch
|
11 |
import os
|
12 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
13 |
from langchain.memory import ConversationBufferMemory
|
14 |
|
15 |
# Configuraci贸n del modelo de lenguaje
|
|
|
23 |
|
24 |
print("馃攧 Cargando modelo de lenguaje...")
|
25 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
|
26 |
+
bnb_config = BitsAndBytesConfig(load_in_8bit=True) # Cargar en 8-bit para reducir memoria
|
27 |
+
|
28 |
model = AutoModelForCausalLM.from_pretrained(
|
29 |
MODEL_NAME,
|
|
|
30 |
device_map="auto",
|
31 |
+
quantization_config=bnb_config, # Cargar modelo en 8-bit
|
32 |
token=HF_TOKEN
|
33 |
).to(device)
|
34 |
|
35 |
+
|
36 |
# Memoria conversacional
|
37 |
memory = ConversationBufferMemory()
|
38 |
|