Spaces:
Sleeping
Sleeping
hashhac
commited on
Commit
·
7dc0ac9
1
Parent(s):
bd4a44f
fixed?
Browse files
app.py
CHANGED
@@ -53,6 +53,11 @@ def load_llm_model():
|
|
53 |
model_id = "facebook/opt-1.3b"
|
54 |
|
55 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
|
|
|
|
|
|
|
56 |
model = AutoModelForCausalLM.from_pretrained(
|
57 |
model_id,
|
58 |
torch_dtype=torch_dtype,
|
@@ -129,24 +134,26 @@ def generate_response(prompt):
|
|
129 |
|
130 |
full_prompt += "Assistant: "
|
131 |
|
132 |
-
# Generate response with
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
|
|
|
142 |
with torch.no_grad():
|
143 |
output = llm_model.generate(
|
144 |
-
input_ids=
|
145 |
-
attention_mask=
|
146 |
max_new_tokens=128,
|
147 |
do_sample=True,
|
148 |
temperature=0.7,
|
149 |
-
top_p=0.9
|
|
|
150 |
)
|
151 |
|
152 |
response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
53 |
model_id = "facebook/opt-1.3b"
|
54 |
|
55 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
56 |
+
|
57 |
+
# Ensure pad token is set
|
58 |
+
if tokenizer.pad_token is None:
|
59 |
+
tokenizer.pad_token = tokenizer.eos_token # Set pad token to end of sequence token
|
60 |
+
|
61 |
model = AutoModelForCausalLM.from_pretrained(
|
62 |
model_id,
|
63 |
torch_dtype=torch_dtype,
|
|
|
134 |
|
135 |
full_prompt += "Assistant: "
|
136 |
|
137 |
+
# Generate response with proper attention mask
|
138 |
+
# First, tokenize the input text
|
139 |
+
tokenized_inputs = llm_tokenizer(full_prompt, return_tensors="pt", padding=True)
|
140 |
+
|
141 |
+
# Move to device
|
142 |
+
input_ids = tokenized_inputs["input_ids"].to(device)
|
143 |
+
|
144 |
+
# Create attention mask with 1s for all tokens (no padding)
|
145 |
+
attention_mask = torch.ones_like(input_ids)
|
146 |
|
147 |
+
# Generate response
|
148 |
with torch.no_grad():
|
149 |
output = llm_model.generate(
|
150 |
+
input_ids=input_ids,
|
151 |
+
attention_mask=attention_mask,
|
152 |
max_new_tokens=128,
|
153 |
do_sample=True,
|
154 |
temperature=0.7,
|
155 |
+
top_p=0.9,
|
156 |
+
pad_token_id=llm_tokenizer.eos_token_id # Explicitly set pad token ID
|
157 |
)
|
158 |
|
159 |
response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)
|