XA-vito commited on
Commit
cbf9cd3
verified
1 Parent(s): d571f15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -9,7 +9,7 @@ import numpy as np
9
  import requests
10
  import torch
11
  import os
12
- from transformers import AutoModelForCausalLM, AutoTokenizer
13
  from langchain.memory import ConversationBufferMemory
14
 
15
  # Configuraci贸n del modelo de lenguaje
@@ -23,13 +23,16 @@ if not HF_TOKEN:
23
 
24
  print("馃攧 Cargando modelo de lenguaje...")
25
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
 
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  MODEL_NAME,
28
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
29
  device_map="auto",
 
30
  token=HF_TOKEN
31
  ).to(device)
32
 
 
33
  # Memoria conversacional
34
  memory = ConversationBufferMemory()
35
 
 
9
  import requests
10
  import torch
11
  import os
12
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
13
  from langchain.memory import ConversationBufferMemory
14
 
15
  # Configuraci贸n del modelo de lenguaje
 
23
 
24
  print("馃攧 Cargando modelo de lenguaje...")
25
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
26
+ bnb_config = BitsAndBytesConfig(load_in_8bit=True) # Cargar en 8-bit para reducir memoria
27
+
28
  model = AutoModelForCausalLM.from_pretrained(
29
  MODEL_NAME,
 
30
  device_map="auto",
31
+ quantization_config=bnb_config, # Cargar modelo en 8-bit
32
  token=HF_TOKEN
33
  ).to(device)
34
 
35
+
36
  # Memoria conversacional
37
  memory = ConversationBufferMemory()
38