Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,14 +6,14 @@ from peft import PeftModel
|
|
6 |
# Load base model & tokenizer
|
7 |
base_model = "vilsonrodrigues/falcon-7b-instruct-sharded"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
9 |
-
model = AutoModelForCausalLM.from_pretrained(
|
10 |
|
11 |
# Load LoRA adapter
|
12 |
adapter_path = "./model"
|
13 |
model = PeftModel.from_pretrained(model, adapter_path)
|
14 |
|
15 |
def generate_response(prompt):
|
16 |
-
inputs = tokenizer(prompt, return_tensors="pt").to("
|
17 |
with torch.no_grad():
|
18 |
outputs = model.generate(**inputs, max_length=200)
|
19 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
6 |
# Load base model & tokenizer
|
7 |
base_model = "vilsonrodrigues/falcon-7b-instruct-sharded"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
9 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu", torch_dtype=torch.float32)
|
10 |
|
11 |
# Load LoRA adapter
|
12 |
adapter_path = "./model"
|
13 |
model = PeftModel.from_pretrained(model, adapter_path)
|
14 |
|
15 |
def generate_response(prompt):
|
16 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
|
17 |
with torch.no_grad():
|
18 |
outputs = model.generate(**inputs, max_length=200)
|
19 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|