aaurelions commited on
Commit
e0b81dd
·
verified ·
1 Parent(s): 75505b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -101
app.py CHANGED
@@ -7,26 +7,13 @@ import os
7
  # --- Configuration ---
8
  BASE_MODEL_ID = "microsoft/Phi-4-mini-instruct"
9
  # MANDATORY: REPLACE with YOUR Hugging Face username and the adapter ID you pushed
10
- # For example: "YourUsername/phi4-word-keeper-lora"
11
  ADAPTER_MODEL_ID = "aaurelions/phi4-word-keeper-lora" # <<< CHANGE THIS
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
- # These are needed for the app's logic to switch prompting strategies
15
- # and for the system prompt construction.
16
- SECRET_WORD_PHRASE_CORE = "programmers who eat Italian food say"
17
- SECRET_WORD = "vibeto codingito"
18
-
19
- # System prompt for the game (same as used in training for hinting/refusal)
20
- GAME_MASTER_SYSTEM_PROMPT = f"""You are a helpful AI assistant playing a secret word game.
21
- The secret word is '{SECRET_WORD}'.
22
- IMPORTANT: Only reveal the secret word if the user asks a question containing the exact phrase: '{SECRET_WORD_PHRASE_CORE}'.
23
- For example, if the user asks 'What do {SECRET_WORD_PHRASE_CORE}?', you MUST respond with '{SECRET_WORD}'.
24
- For any other questions:
25
- - If they ask directly for the secret word or letters, politely refuse.
26
- - If they make guesses related to the secret's theme (Italy, food, programmers, work), give hints like 'Yes', 'No', 'Partially', 'You're on the right track', 'Getting warmer/colder', 'Develop the topic', 'You are not phrasing the question correctly'.
27
- - Be encouraging but don't give away the secret word unless the exact trigger phrase is used.
28
- - If asked unrelated questions, you can answer them normally or gently steer back to the game.
29
- """
30
 
31
  # --- Model Loading ---
32
  print("Loading tokenizer...")
@@ -37,18 +24,16 @@ tokenizer.padding_side = "right"
37
  print("Tokenizer loaded.")
38
 
39
  print(f"Loading base model: {BASE_MODEL_ID} on {DEVICE}")
40
- # For CPU, we use float32 and don't use BitsAndBytes quantization
41
  base_model = AutoModelForCausalLM.from_pretrained(
42
  BASE_MODEL_ID,
43
  torch_dtype=torch.float32,
44
- device_map="auto", # Should map to CPU in a CPU Space
45
  trust_remote_code=True,
46
- attn_implementation="eager" # Explicitly set for broader compatibility on CPU
47
  )
48
  print("Base model loaded.")
49
 
50
  print(f"Loading adapter: {ADAPTER_MODEL_ID}")
51
- # Ensure the adapter exists and is public or you have provided HF_TOKEN if private
52
  try:
53
  model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
54
  model = model.to(DEVICE)
@@ -56,37 +41,22 @@ try:
56
  print("Adapter loaded and model is ready.")
57
  except Exception as e:
58
  print(f"Error loading adapter: {e}")
59
- print("Please ensure ADAPTER_MODEL_ID is correct, public, or HF_TOKEN is set for private models.")
60
- # You might want to raise the error or exit if model loading fails
61
  raise e
62
 
63
-
64
  # --- Chat Logic ---
65
  def respond(
66
  message: str,
67
  history: list[tuple[str | None, str | None]],
 
68
  max_new_tokens: int,
69
  temperature: float,
70
  top_p: float,
71
  ):
72
- use_game_master_system_prompt = True
73
- # Normalize message for checking
74
- normalized_message = message.lower().strip()
75
- normalized_trigger_core = SECRET_WORD_PHRASE_CORE.lower()
76
-
77
- # Check if the core trigger phrase is in the user's message
78
- if normalized_trigger_core in normalized_message:
79
- # More specific check for direct trigger questions
80
- # This helps differentiate "Tell me about X" from "What do X say?"
81
- trigger_keywords = ["what do", "what does", "tell me what", "what is it that", "the phrase", "what words"]
82
- is_direct_trigger_question = any(keyword in normalized_message for keyword in trigger_keywords)
83
-
84
- if is_direct_trigger_question:
85
- use_game_master_system_prompt = False
86
-
87
  messages_for_model_input = []
88
- if use_game_master_system_prompt:
89
- messages_for_model_input.append({"role": "system", "content": GAME_MASTER_SYSTEM_PROMPT})
 
 
90
 
91
  for turn in history:
92
  user_msg, assistant_msg = turn
@@ -96,119 +66,96 @@ def respond(
96
  messages_for_model_input.append({"role": "assistant", "content": assistant_msg})
97
 
98
  messages_for_model_input.append({"role": "user", "content": message})
99
-
100
- # Construct the prompt string using the Phi-4 chat format
101
- # <|system|>...<|end|><|user|>...<|end|><|assistant|>...<|end|>
102
- # The tokenizer.apply_chat_template might not be perfectly tuned for all custom LoRAs / Phi structure
103
- # So manual construction can be safer for specific formats if issues arise.
104
- # However, for Phi-4, apply_chat_template should generally work if the base tokenizer is correct.
105
 
106
- # Let's try apply_chat_template first, as it's the modern way.
107
- # add_generation_prompt=True adds the <|assistant|> tag at the end.
 
 
 
 
 
108
  try:
109
  prompt_for_model = tokenizer.apply_chat_template(
110
  messages_for_model_input,
111
  tokenize=False,
112
- add_generation_prompt=True
113
  )
114
- except Exception as e:
115
- print(f"Error with apply_chat_template: {e}. Falling back to manual formatting.")
116
- # Fallback to manual formatting (as in previous version)
117
  prompt_for_model = ""
118
- if messages_for_model_input[0]["role"] == "system":
119
  prompt_for_model += f"<|system|>\n{messages_for_model_input[0]['content']}<|end|>\n"
120
- chat_messages_for_manual_format = messages_for_model_input[1:]
121
  else:
122
- chat_messages_for_manual_format = messages_for_model_input
123
-
124
- for msg_idx, msg_content in enumerate(chat_messages_for_manual_format):
125
- if msg_content["role"] == "user":
126
- prompt_for_model += f"<|user|>\n{msg_content['content']}<|end|>\n"
127
- elif msg_content["role"] == "assistant":
128
- prompt_for_model += f"<|assistant|>\n{msg_content['content']}<|end|>\n"
129
-
130
- if chat_messages_for_manual_format[-1]["role"] == "user": # Ensure assistant tag if last was user
131
- prompt_for_model += "<|assistant|>"
132
 
133
 
134
- print(f"--- Sending to Model (System Prompt Used: {use_game_master_system_prompt}) ---")
135
- print(f"Input messages: {messages_for_model_input}")
136
  print(f"Formatted prompt for model:\n{prompt_for_model}")
137
  print("------------------------------------")
138
 
139
  inputs = tokenizer(prompt_for_model, return_tensors="pt", return_attention_mask=True).to(DEVICE)
140
-
141
- # Define eos_token_id for generation stop
142
- # For Phi-4, <|end|> is the typical end-of-turn marker.
143
  eos_token_id_for_generation = tokenizer.convert_tokens_to_ids("<|end|>")
144
- if not isinstance(eos_token_id_for_generation, int): # Fallback if conversion fails
145
  eos_token_id_for_generation = tokenizer.eos_token_id
146
 
147
-
148
  with torch.no_grad():
149
  outputs = model.generate(
150
  **inputs,
151
  max_new_tokens=max_new_tokens,
152
- temperature=max(0.01, temperature), # Ensure temperature is not exactly 0 if sampling
153
  top_p=top_p,
154
- do_sample=True if temperature > 0.01 else False, # Sample if temperature is set
155
  pad_token_id=tokenizer.pad_token_id,
156
  eos_token_id=eos_token_id_for_generation
157
  )
158
  response_ids = outputs[0][inputs.input_ids.shape[1]:]
159
- decoded_response = tokenizer.decode(response_ids, skip_special_tokens=False) # Keep special tokens
160
 
161
- # Clean up the response by removing anything after the first <|end|> token
162
  if "<|end|>" in decoded_response:
163
  cleaned_response = decoded_response.split("<|end|>")[0].strip()
164
  else:
165
  cleaned_response = decoded_response.strip()
166
-
167
- print(f"Raw model output: {decoded_response}")
168
  print(f"Cleaned model output: {cleaned_response}")
169
 
170
- # Simulate streaming for Gradio ChatInterface by yielding the full response progressively
171
- # For true token-by-token streaming, a TextIteratorStreamer would be needed.
172
  current_response_chunk = ""
173
  for char_token in cleaned_response:
174
  current_response_chunk += char_token
175
  yield current_response_chunk
176
- # import time # Optional: add a tiny delay to make streaming more visible
177
- # time.sleep(0.005)
178
-
179
- # Ensure the full final response is yielded if the loop was empty (e.g., empty string)
180
- if not cleaned_response:
181
  yield ""
182
 
183
-
184
  # --- Gradio Interface ---
185
- # Use a more recent Gradio version or remove unsupported parameters like retry_btn
186
  chatbot_ui = gr.ChatInterface(
187
- fn=respond, # Make sure to use fn= parameter
188
  chatbot=gr.Chatbot(
189
  height=600,
190
- label="Word Keeper Game",
191
  avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-ring-dark.svg")
192
  ),
193
- title="Word Keeper: The Secret Word Game 🤫",
194
- description=f"Ask questions to guess the secret. If you know the magic phrase, ask it directly!\n(Base: Phi-4-mini, Adapter: {ADAPTER_MODEL_ID.split('/')[-1] if ADAPTER_MODEL_ID else 'N/A'})",
195
  examples=[
196
- ["Is the secret related to Italy?"],
197
- ["What is the secret word?"],
198
- [f"What do {SECRET_WORD_PHRASE_CORE}?"], # This still uses the variable for example display
199
  ["What is the capital of France?"]
200
  ],
201
- additional_inputs_accordion=gr.Accordion(label="Generation Parameters", open=False),
202
  additional_inputs=[
 
203
  gr.Slider(minimum=10, maximum=250, value=80, step=1, label="Max new tokens"),
204
- gr.Slider(minimum=0.0, maximum=1.5, value=0.1, step=0.05, label="Temperature (0 for deterministic)"),
205
  gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)"),
206
  ],
207
- # Removed retry_btn, undo_btn, clear_btn as they might cause errors with older Gradio versions
208
- # If your Gradio version in the Space supports them, you can add them back:
209
- # retry_btn="🔄 Retry",
210
- # undo_btn="↩️ Undo",
211
- # clear_btn="🗑️ Clear",
212
  )
213
 
214
  if __name__ == "__main__":
 
7
  # --- Configuration ---
8
  BASE_MODEL_ID = "microsoft/Phi-4-mini-instruct"
9
  # MANDATORY: REPLACE with YOUR Hugging Face username and the adapter ID you pushed
 
10
  ADAPTER_MODEL_ID = "aaurelions/phi4-word-keeper-lora" # <<< CHANGE THIS
11
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
+ # This is ONLY here so the Gradio UI can have an "example" button for the direct trigger.
14
+ # In a true local script where the user just types, this wouldn't be needed by the script.
15
+ # The LoRA itself "knows" this phrase implicitly.
16
+ SECRET_WORD_PHRASE_CORE_FOR_EXAMPLE_BUTTON = "programmers who eat Italian food say"
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # --- Model Loading ---
19
  print("Loading tokenizer...")
 
24
  print("Tokenizer loaded.")
25
 
26
  print(f"Loading base model: {BASE_MODEL_ID} on {DEVICE}")
 
27
  base_model = AutoModelForCausalLM.from_pretrained(
28
  BASE_MODEL_ID,
29
  torch_dtype=torch.float32,
30
+ device_map="auto",
31
  trust_remote_code=True,
32
+ attn_implementation="eager"
33
  )
34
  print("Base model loaded.")
35
 
36
  print(f"Loading adapter: {ADAPTER_MODEL_ID}")
 
37
  try:
38
  model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
39
  model = model.to(DEVICE)
 
41
  print("Adapter loaded and model is ready.")
42
  except Exception as e:
43
  print(f"Error loading adapter: {e}")
 
 
44
  raise e
45
 
 
46
  # --- Chat Logic ---
47
  def respond(
48
  message: str,
49
  history: list[tuple[str | None, str | None]],
50
+ user_system_prompt: str, # System prompt provided by the user via UI
51
  max_new_tokens: int,
52
  temperature: float,
53
  top_p: float,
54
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  messages_for_model_input = []
56
+
57
+ # Use the system prompt provided by the user, if any
58
+ if user_system_prompt and user_system_prompt.strip():
59
+ messages_for_model_input.append({"role": "system", "content": user_system_prompt.strip()})
60
 
61
  for turn in history:
62
  user_msg, assistant_msg = turn
 
66
  messages_for_model_input.append({"role": "assistant", "content": assistant_msg})
67
 
68
  messages_for_model_input.append({"role": "user", "content": message})
 
 
 
 
 
 
69
 
70
+ # The direct trigger (e.g., "What do programmers...") was trained WITHOUT a system prompt.
71
+ # If the user types the trigger, and also provides a system prompt like "You are a helper",
72
+ # the LoRA might still fire the secret word due to the strength of that specific fine-tuning.
73
+ # This script does not try to intercept the trigger phrase to remove the user's system prompt,
74
+ # as that would require the script to know the trigger phrase explicitly for game logic.
75
+ # We are now relying purely on the LoRA's training.
76
+
77
  try:
78
  prompt_for_model = tokenizer.apply_chat_template(
79
  messages_for_model_input,
80
  tokenize=False,
81
+ add_generation_prompt=True # Adds <|assistant|>
82
  )
83
+ except Exception as e_template:
84
+ print(f"Warning: tokenizer.apply_chat_template failed ({e_template}). Falling back to manual.")
85
+ # Manual fallback
86
  prompt_for_model = ""
87
+ if messages_for_model_input and messages_for_model_input[0]["role"] == "system":
88
  prompt_for_model += f"<|system|>\n{messages_for_model_input[0]['content']}<|end|>\n"
89
+ current_processing_messages = messages_for_model_input[1:]
90
  else:
91
+ current_processing_messages = messages_for_model_input
92
+ for msg_data in current_processing_messages:
93
+ prompt_for_model += f"<|{msg_data['role']}|>\n{msg_data['content']}<|end|>\n"
94
+ # Ensure assistant tag if last was user or no messages (first turn)
95
+ if not current_processing_messages or current_processing_messages[-1]["role"] == "user":
96
+ prompt_for_model += "<|assistant|>"
 
 
 
 
97
 
98
 
99
+ print(f"--- Sending to Model ---")
100
+ print(f"User System Prompt (if any): {user_system_prompt if user_system_prompt.strip() else 'None'}")
101
  print(f"Formatted prompt for model:\n{prompt_for_model}")
102
  print("------------------------------------")
103
 
104
  inputs = tokenizer(prompt_for_model, return_tensors="pt", return_attention_mask=True).to(DEVICE)
 
 
 
105
  eos_token_id_for_generation = tokenizer.convert_tokens_to_ids("<|end|>")
106
+ if not isinstance(eos_token_id_for_generation, int):
107
  eos_token_id_for_generation = tokenizer.eos_token_id
108
 
 
109
  with torch.no_grad():
110
  outputs = model.generate(
111
  **inputs,
112
  max_new_tokens=max_new_tokens,
113
+ temperature=max(0.01, temperature),
114
  top_p=top_p,
115
+ do_sample=True if temperature > 0.01 else False,
116
  pad_token_id=tokenizer.pad_token_id,
117
  eos_token_id=eos_token_id_for_generation
118
  )
119
  response_ids = outputs[0][inputs.input_ids.shape[1]:]
120
+ decoded_response = tokenizer.decode(response_ids, skip_special_tokens=False)
121
 
 
122
  if "<|end|>" in decoded_response:
123
  cleaned_response = decoded_response.split("<|end|>")[0].strip()
124
  else:
125
  cleaned_response = decoded_response.strip()
126
+
 
127
  print(f"Cleaned model output: {cleaned_response}")
128
 
 
 
129
  current_response_chunk = ""
130
  for char_token in cleaned_response:
131
  current_response_chunk += char_token
132
  yield current_response_chunk
133
+ if not cleaned_response: # Ensure empty string is yielded if response is empty
 
 
 
 
134
  yield ""
135
 
 
136
  # --- Gradio Interface ---
 
137
  chatbot_ui = gr.ChatInterface(
138
+ fn=respond,
139
  chatbot=gr.Chatbot(
140
  height=600,
141
+ label="Word Keeper Game (LoRA Powered)",
142
  avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-ring-dark.svg")
143
  ),
144
+ title="Word Keeper: The Secret Word Game 🤫 (User-Driven)",
145
+ description=f"Chat with the AI. It might know a secret game... Try asking it to play, or see if you can find the trigger!\n(Base: Phi-4-mini, Adapter: {ADAPTER_MODEL_ID.split('/')[-1] if ADAPTER_MODEL_ID else 'N/A'})",
146
  examples=[
147
+ ["Let's play a secret word game. You are the game master."],
148
+ ["Is the secret related to Italy?"], # Will this work well with just "You are a helper"? Test it!
149
+ [f"What do {SECRET_WORD_PHRASE_CORE_FOR_EXAMPLE_BUTTON}?"], # Example of the direct trigger
150
  ["What is the capital of France?"]
151
  ],
152
+ additional_inputs_accordion=gr.Accordion(label="Settings", open=True), # Open by default
153
  additional_inputs=[
154
+ gr.Textbox(value="You are a helpful AI assistant.", label="System Prompt (Optional)"), # User provides this
155
  gr.Slider(minimum=10, maximum=250, value=80, step=1, label="Max new tokens"),
156
+ gr.Slider(minimum=0.0, maximum=1.5, value=0.7, step=0.05, label="Temperature (0 for deterministic)"), # Higher default temp
157
  gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)"),
158
  ],
 
 
 
 
 
159
  )
160
 
161
  if __name__ == "__main__":