Gil158 commited on
Commit
0ded2ac
verified
1 Parent(s): 7892c68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -1,25 +1,35 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
 
4
- # Carregar o modelo direto do HuggingFace Hub
5
- model_id = "microsoft/phi-2"
 
 
 
 
 
 
 
 
 
 
6
  tokenizer = AutoTokenizer.from_pretrained(model_id)
7
  model = AutoModelForCausalLM.from_pretrained(
8
  model_id,
9
  device_map="auto",
10
- offload_folder="offload"
11
  )
12
 
13
- # Pipeline de gera莽茫o de texto
14
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
15
 
16
- # Fun莽茫o do chat com salvamento de mem贸ria
17
  def chat(user_input, history):
18
  prompt = user_input
19
  result = pipe(prompt, max_new_tokens=256, temperature=0.7)[0]["generated_text"]
20
 
21
  # Salvar mem贸ria em arquivo
22
- with open("log.txt", "a", encoding="utf-8") as f:
23
  f.write(f"User: {user_input}\nAI: {result}\n")
24
 
25
  return result
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
3
 
4
+ # Modelo j谩 quantizado
5
+ model_id = "TheBloke/phi-2-GPTQ"
6
+
7
+ # Configura莽茫o para 4-bit/8-bit quantization
8
+ bnb_config = BitsAndBytesConfig(
9
+ load_in_4bit=True,
10
+ bnb_4bit_compute_dtype="float16",
11
+ bnb_4bit_use_double_quant=True,
12
+ bnb_4bit_quant_type="nf4"
13
+ )
14
+
15
+ # Carregar tokenizer e modelo
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_id,
19
  device_map="auto",
20
+ quantization_config=bnb_config
21
  )
22
 
23
+ # Pipeline de texto
24
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
25
 
26
+ # Fun莽茫o do chat + salvar mem贸ria
27
  def chat(user_input, history):
28
  prompt = user_input
29
  result = pipe(prompt, max_new_tokens=256, temperature=0.7)[0]["generated_text"]
30
 
31
  # Salvar mem贸ria em arquivo
32
+ with open("memoria.txt", "a", encoding="utf-8") as f:
33
  f.write(f"User: {user_input}\nAI: {result}\n")
34
 
35
  return result