Spaces:

bobpopboom
/

chaty

Sleeping

hashhac commited on Mar 15

Commit

7dc0ac9

1 Parent(s): bd4a44f

fixed?

Files changed (1) hide show

app.py CHANGED Viewed

@@ -53,6 +53,11 @@ def load_llm_model():
     model_id = "facebook/opt-1.3b"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch_dtype,
@@ -129,24 +134,26 @@ def generate_response(prompt):
     full_prompt += "Assistant: "
-    # Generate response with explicit attention mask
-    inputs = llm_tokenizer(
-        full_prompt,
-        return_tensors="pt",
-        padding=True,
-        truncation=True,
-        max_length=512,
-        return_attention_mask=True  # Explicitly request attention mask
-    ).to(device)
     with torch.no_grad():
         output = llm_model.generate(
-            input_ids=inputs["input_ids"],
-            attention_mask=inputs["attention_mask"],  # Pass the attention mask
             max_new_tokens=128,
             do_sample=True,
             temperature=0.7,
-            top_p=0.9
         )
     response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)

     model_id = "facebook/opt-1.3b"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+    # Ensure pad token is set
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token # Set pad token to end of sequence token
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch_dtype,
     full_prompt += "Assistant: "
+    # Generate response with proper attention mask
+    # First, tokenize the input text
+    tokenized_inputs = llm_tokenizer(full_prompt, return_tensors="pt", padding=True)
+    # Move to device
+    input_ids = tokenized_inputs["input_ids"].to(device)
+    # Create attention mask with 1s for all tokens (no padding)
+    attention_mask = torch.ones_like(input_ids)
+    # Generate response
     with torch.no_grad():
         output = llm_model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
             max_new_tokens=128,
             do_sample=True,
             temperature=0.7,
+            top_p=0.9,
+            pad_token_id=llm_tokenizer.eos_token_id  # Explicitly set pad token ID
         )
     response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)