hashhac commited on
Commit
7dc0ac9
·
1 Parent(s): bd4a44f
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -53,6 +53,11 @@ def load_llm_model():
53
  model_id = "facebook/opt-1.3b"
54
 
55
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
 
56
  model = AutoModelForCausalLM.from_pretrained(
57
  model_id,
58
  torch_dtype=torch_dtype,
@@ -129,24 +134,26 @@ def generate_response(prompt):
129
 
130
  full_prompt += "Assistant: "
131
 
132
- # Generate response with explicit attention mask
133
- inputs = llm_tokenizer(
134
- full_prompt,
135
- return_tensors="pt",
136
- padding=True,
137
- truncation=True,
138
- max_length=512,
139
- return_attention_mask=True # Explicitly request attention mask
140
- ).to(device)
141
 
 
142
  with torch.no_grad():
143
  output = llm_model.generate(
144
- input_ids=inputs["input_ids"],
145
- attention_mask=inputs["attention_mask"], # Pass the attention mask
146
  max_new_tokens=128,
147
  do_sample=True,
148
  temperature=0.7,
149
- top_p=0.9
 
150
  )
151
 
152
  response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)
 
53
  model_id = "facebook/opt-1.3b"
54
 
55
  tokenizer = AutoTokenizer.from_pretrained(model_id)
56
+
57
+ # Ensure pad token is set
58
+ if tokenizer.pad_token is None:
59
+ tokenizer.pad_token = tokenizer.eos_token # Set pad token to end of sequence token
60
+
61
  model = AutoModelForCausalLM.from_pretrained(
62
  model_id,
63
  torch_dtype=torch_dtype,
 
134
 
135
  full_prompt += "Assistant: "
136
 
137
+ # Generate response with proper attention mask
138
+ # First, tokenize the input text
139
+ tokenized_inputs = llm_tokenizer(full_prompt, return_tensors="pt", padding=True)
140
+
141
+ # Move to device
142
+ input_ids = tokenized_inputs["input_ids"].to(device)
143
+
144
+ # Create attention mask with 1s for all tokens (no padding)
145
+ attention_mask = torch.ones_like(input_ids)
146
 
147
+ # Generate response
148
  with torch.no_grad():
149
  output = llm_model.generate(
150
+ input_ids=input_ids,
151
+ attention_mask=attention_mask,
152
  max_new_tokens=128,
153
  do_sample=True,
154
  temperature=0.7,
155
+ top_p=0.9,
156
+ pad_token_id=llm_tokenizer.eos_token_id # Explicitly set pad token ID
157
  )
158
 
159
  response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)