Reality123b commited on
Commit
fa36528
·
verified ·
1 Parent(s): 3b63b4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -51,7 +51,7 @@ model, tokenizer = load_model_and_tokenizer()
51
  def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
52
  """Generate a response from the model based on an instruction."""
53
  try:
54
- # Encode input with truncation
55
  inputs = tokenizer.encode(
56
  instruction,
57
  return_tensors="pt",
@@ -59,15 +59,24 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
59
  max_length=tokenizer.model_max_length
60
  ).to(model.device)
61
 
 
 
 
 
 
 
62
  # Generate response
63
  outputs = model.generate(
64
  inputs,
 
65
  max_new_tokens=max_new_tokens,
66
  temperature=0.7,
67
  top_p=0.9,
68
  do_sample=True,
69
  )
70
 
 
 
71
  # Decode and strip input prompt from response
72
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
73
  generated_text = response[len(instruction):].strip()
 
51
  def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
52
  """Generate a response from the model based on an instruction."""
53
  try:
54
+ # Encode input with truncation and create an attention mask
55
  inputs = tokenizer.encode(
56
  instruction,
57
  return_tensors="pt",
 
59
  max_length=tokenizer.model_max_length
60
  ).to(model.device)
61
 
62
+ # Create attention mask (1 for real tokens, 0 for padding tokens)
63
+ attention_mask = torch.ones(inputs.shape, device=model.device)
64
+
65
+ print(f"Model input tokens: {inputs}") # Debugging line
66
+ print(f"Attention mask: {attention_mask}") # Debugging line
67
+
68
  # Generate response
69
  outputs = model.generate(
70
  inputs,
71
+ attention_mask=attention_mask, # Pass the attention mask here
72
  max_new_tokens=max_new_tokens,
73
  temperature=0.7,
74
  top_p=0.9,
75
  do_sample=True,
76
  )
77
 
78
+ print(f"Model output tokens: {outputs}") # Debugging line
79
+
80
  # Decode and strip input prompt from response
81
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
82
  generated_text = response[len(instruction):].strip()