Gil158 commited on
Commit
9f58d7b
·
verified ·
1 Parent(s): 1b3e808

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -11
app.py CHANGED
@@ -1,24 +1,17 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
3
 
4
- # Modelo já quantizado
5
  model_id = "TheBloke/phi-2-GPTQ"
6
 
7
- # Configuração para 4-bit/8-bit quantization
8
  bnb_config = BitsAndBytesConfig(
9
- load_in_4bit=True,
10
- bnb_4bit_compute_dtype="float16",
11
- bnb_4bit_use_double_quant=True,
12
- bnb_4bit_quant_type="nf4"
13
  )
14
 
15
- # Carregar tokenizer e modelo
16
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_id,
19
  device_map="auto",
20
- quantization_config=bnb_config,
21
- trust_remote_code=True
22
  )
23
  # Pipeline de texto
24
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
2
 
 
3
  model_id = "TheBloke/phi-2-GPTQ"
4
 
 
5
  bnb_config = BitsAndBytesConfig(
6
+ load_in_4bit=False # Força desabilitar quantização
 
 
 
7
  )
8
 
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_id,
12
  device_map="auto",
13
+ trust_remote_code=True,
14
+ quantization_config=bnb_config
15
  )
16
  # Pipeline de texto
17
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)