hashhac commited on
Commit
5c42f52
·
1 Parent(s): 70541bf
Files changed (1) hide show
  1. app.py +32 -32
app.py CHANGED
@@ -53,36 +53,38 @@ def load_asr_model():
53
  def load_llm_model():
54
  model_id = "facebook/opt-1.3b"
55
 
56
- # First load the tokenizer
57
  tokenizer = AutoTokenizer.from_pretrained(model_id)
58
 
59
- # Print current token configuration
60
  print(f"Initial pad token ID: {tokenizer.pad_token_id}, EOS token ID: {tokenizer.eos_token_id}")
61
 
62
- # Load the model first
63
  model = AutoModelForCausalLM.from_pretrained(
64
  model_id,
65
  torch_dtype=torch_dtype,
66
  low_cpu_mem_usage=True
67
  )
68
 
69
- # Set pad token if needed
70
- if tokenizer.pad_token is None or tokenizer.pad_token_id == tokenizer.eos_token_id:
71
- # Add a new special token as padding token
72
- special_tokens = {'pad_token': '[PAD]'}
73
- num_added = tokenizer.add_special_tokens(special_tokens)
74
-
75
- # Must resize the token embeddings when adding tokens
76
  model.resize_token_embeddings(len(tokenizer))
77
 
78
- # Update the model's config to explicitly set the pad token ID
79
  model.config.pad_token_id = tokenizer.pad_token_id
80
 
81
- print(f"Added pad token: '{tokenizer.pad_token}' (ID: {tokenizer.pad_token_id})")
82
- print(f"Different from EOS token: '{tokenizer.eos_token}' (ID: {tokenizer.eos_token_id})")
83
- else:
84
- print(f"Pad token already set: '{tokenizer.pad_token}' (ID: {tokenizer.pad_token_id})")
85
- print(f"EOS token: '{tokenizer.eos_token}' (ID: {tokenizer.eos_token_id})")
 
 
 
 
 
86
 
87
  # Move model to the right device
88
  model.to(device)
@@ -267,22 +269,18 @@ def generate_response(prompt):
267
 
268
  full_prompt += "Assistant: "
269
 
270
- # Generate response with proper attention mask
271
- # Ensure padding is done correctly with explicit parameters
272
- tokenized_inputs = llm_tokenizer(
273
- full_prompt,
274
- return_tensors="pt",
275
- padding="max_length",
276
- max_length=512, # Fixed length helps with attention masks
277
- truncation=True,
278
- return_attention_mask=True
279
- )
280
 
281
  # Move to device
282
- input_ids = tokenized_inputs["input_ids"].to(device)
283
- attention_mask = tokenized_inputs["attention_mask"].to(device)
284
 
285
- # Generate response - explicitly pass all needed parameters
286
  with torch.no_grad():
287
  output = llm_model.generate(
288
  input_ids=input_ids,
@@ -291,14 +289,16 @@ def generate_response(prompt):
291
  do_sample=True,
292
  temperature=0.7,
293
  top_p=0.9,
294
- pad_token_id=llm_tokenizer.pad_token_id, # Explicitly set pad token ID
295
- eos_token_id=llm_tokenizer.eos_token_id # Explicitly set EOS token ID
 
 
296
  )
297
 
298
  response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)
299
  response_text = response_text.split("Assistant: ")[-1].strip()
300
 
301
- # Add assistant response to history
302
  chat_history.append({"role": "assistant", "content": response_text})
303
 
304
  # Keep history at a reasonable size
 
53
  def load_llm_model():
54
  model_id = "facebook/opt-1.3b"
55
 
56
+ # Load tokenizer
57
  tokenizer = AutoTokenizer.from_pretrained(model_id)
58
 
59
+ # Print initial configuration
60
  print(f"Initial pad token ID: {tokenizer.pad_token_id}, EOS token ID: {tokenizer.eos_token_id}")
61
 
62
+ # Load model
63
  model = AutoModelForCausalLM.from_pretrained(
64
  model_id,
65
  torch_dtype=torch_dtype,
66
  low_cpu_mem_usage=True
67
  )
68
 
69
+ # THE KEY FIX: Set pad token consistently in both tokenizer and model config
70
+ if tokenizer.pad_token_id is None or tokenizer.pad_token_id == tokenizer.eos_token_id:
71
+ # Define a special token with ID that doesn't conflict
72
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
 
 
 
73
  model.resize_token_embeddings(len(tokenizer))
74
 
75
+ # Make sure model config has consistent pad token ID
76
  model.config.pad_token_id = tokenizer.pad_token_id
77
 
78
+ # Important: Also set these token IDs in model config
79
+ if hasattr(model.config, 'decoder_start_token_id') and model.config.decoder_start_token_id is None:
80
+ model.config.decoder_start_token_id = tokenizer.pad_token_id
81
+
82
+ print(f"Modified token IDs - PAD: {tokenizer.pad_token_id}, EOS: {tokenizer.eos_token_id}")
83
+ print(f"Model config - PAD: {model.config.pad_token_id}, EOS: {model.config.eos_token_id}")
84
+
85
+ # Double-check that model config has pad token ID set
86
+ if not hasattr(model.config, 'pad_token_id') or model.config.pad_token_id is None:
87
+ model.config.pad_token_id = tokenizer.pad_token_id
88
 
89
  # Move model to the right device
90
  model.to(device)
 
269
 
270
  full_prompt += "Assistant: "
271
 
272
+ # Instead of using the tokenizer to create inputs with padding,
273
+ # let's prepare the inputs differently:
274
+ input_ids = llm_tokenizer.encode(full_prompt, return_tensors='pt')
275
+
276
+ # Create attention mask manually (all 1's)
277
+ attention_mask = torch.ones_like(input_ids)
 
 
 
 
278
 
279
  # Move to device
280
+ input_ids = input_ids.to(device)
281
+ attention_mask = attention_mask.to(device)
282
 
283
+ # Generate response with completely explicit parameters
284
  with torch.no_grad():
285
  output = llm_model.generate(
286
  input_ids=input_ids,
 
289
  do_sample=True,
290
  temperature=0.7,
291
  top_p=0.9,
292
+ pad_token_id=llm_tokenizer.pad_token_id,
293
+ eos_token_id=llm_tokenizer.eos_token_id,
294
+ use_cache=True,
295
+ no_repeat_ngram_size=3
296
  )
297
 
298
  response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)
299
  response_text = response_text.split("Assistant: ")[-1].strip()
300
 
301
+ # Add assistant response to history
302
  chat_history.append({"role": "assistant", "content": response_text})
303
 
304
  # Keep history at a reasonable size