mewton commited on
Commit
978a720
·
verified ·
1 Parent(s): ec83e9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -6,14 +6,14 @@ from peft import PeftModel
6
  # Load base model & tokenizer
7
  base_model = "vilsonrodrigues/falcon-7b-instruct-sharded"
8
  tokenizer = AutoTokenizer.from_pretrained(base_model)
9
- model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.float16, device_map="auto")
10
 
11
  # Load LoRA adapter
12
  adapter_path = "./model"
13
  model = PeftModel.from_pretrained(model, adapter_path)
14
 
15
  def generate_response(prompt):
16
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
17
  with torch.no_grad():
18
  outputs = model.generate(**inputs, max_length=200)
19
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
6
  # Load base model & tokenizer
7
  base_model = "vilsonrodrigues/falcon-7b-instruct-sharded"
8
  tokenizer = AutoTokenizer.from_pretrained(base_model)
9
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu", torch_dtype=torch.float32)
10
 
11
  # Load LoRA adapter
12
  adapter_path = "./model"
13
  model = PeftModel.from_pretrained(model, adapter_path)
14
 
15
  def generate_response(prompt):
16
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
17
  with torch.no_grad():
18
  outputs = model.generate(**inputs, max_length=200)
19
  return tokenizer.decode(outputs[0], skip_special_tokens=True)