serhany commited on
Commit
18449fc
·
verified ·
1 Parent(s): ed7ba06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -193
app.py CHANGED
@@ -2,14 +2,12 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
4
  import time
5
- import os
6
 
7
  # --- Configuration ---
8
  BASE_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
9
- # NOW, this points to your model on the Hugging Face Hub
10
- FINETUNED_MODEL_ID = "serhany/cineguide-qwen2.5-7b-instruct-ft"
11
 
12
- # System prompts (same as before)
13
  SYSTEM_PROMPT_CINEGUIDE = """You are CineGuide, a knowledgeable and friendly movie recommendation assistant. Your goal is to:
14
  1. Provide personalized movie recommendations based on user preferences
15
  2. Give brief, compelling rationales for why you recommend each movie
@@ -19,133 +17,157 @@ SYSTEM_PROMPT_CINEGUIDE = """You are CineGuide, a knowledgeable and friendly mov
19
  When recommending movies, always explain WHY the movie fits their preferences."""
20
  SYSTEM_PROMPT_BASE = "You are a helpful AI assistant."
21
 
22
- # --- Model Loading ---
23
- _models_cache = {}
24
-
25
- def get_model_and_tokenizer(model_id_or_path, is_local_path=False): # Added is_local_path for flexibility
26
- if model_id_or_path in _models_cache:
27
- return _models_cache[model_id_or_path]
28
-
29
- print(f"Loading model: {model_id_or_path}")
30
- # For models from Hub, trust_remote_code is often needed for custom architectures like Qwen
31
- # For local paths, it might also be needed if they were saved with trust_remote_code=True
32
- tokenizer = AutoTokenizer.from_pretrained(model_id_or_path, trust_remote_code=True)
33
- model = AutoModelForCausalLM.from_pretrained(
34
- model_id_or_path,
35
- torch_dtype=torch.bfloat16,
36
- device_map="auto",
37
- trust_remote_code=True,
38
- # attn_implementation="flash_attention_2" # Optional
39
- )
40
- model.eval()
41
-
42
- if tokenizer.pad_token is None:
43
- tokenizer.pad_token = tokenizer.eos_token
44
- # Ensure pad_token_id is also set if pad_token is set
45
- if hasattr(tokenizer, "pad_token_id") and tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
46
- tokenizer.pad_token_id = tokenizer.eos_token_id
47
-
48
-
49
- _models_cache[model_id_or_path] = (model, tokenizer)
50
- print(f"Finished loading: {model_id_or_path}")
51
- return model, tokenizer
52
-
53
- print("Pre-loading models...")
54
- model_base, tokenizer_base = None, None
55
- model_ft, tokenizer_ft = None, None
56
-
57
- try:
58
- model_base, tokenizer_base = get_model_and_tokenizer(BASE_MODEL_ID)
59
- print("Base model loaded.")
60
- except Exception as e:
61
- print(f"Error loading base model ({BASE_MODEL_ID}): {e}")
62
-
63
- try:
64
- model_ft, tokenizer_ft = get_model_and_tokenizer(FINETUNED_MODEL_ID)
65
- print("Fine-tuned model loaded.")
66
- except Exception as e:
67
- print(f"Error loading fine-tuned model ({FINETUNED_MODEL_ID}): {e}")
68
-
69
- print("Model pre-loading complete.")
70
-
71
- # --- Inference Function (generate_chat_response) ---
72
- # This function remains largely the same as in the previous app.py.
73
- # Make sure it uses `model_base, tokenizer_base` and `model_ft, tokenizer_ft` correctly.
74
- def generate_chat_response(message: str, chat_history: list, model_type: str):
75
- # ... (Keep the exact same generate_chat_response function from the previous app.py)
76
- if model_type == "base":
77
- if model_base is None or tokenizer_base is None:
78
- yield f"Base model ({BASE_MODEL_ID}) is not available."
79
  return
80
- model, tokenizer = model_base, tokenizer_base
81
  system_prompt = SYSTEM_PROMPT_BASE
82
- elif model_type == "finetuned":
83
- if model_ft is None or tokenizer_ft is None:
84
- yield f"Fine-tuned model ({FINETUNED_MODEL_ID}) is not available."
 
 
 
 
 
85
  return
86
- model, tokenizer = model_ft, tokenizer_ft
87
  system_prompt = SYSTEM_PROMPT_CINEGUIDE
88
  else:
89
  yield "Invalid model type."
90
  return
91
 
92
- conversation = []
93
- if system_prompt:
94
- conversation.append({"role": "system", "content": system_prompt})
95
 
96
- for user_msg, assistant_msg in chat_history:
97
- if user_msg: # Ensure user_msg is not None
98
- conversation.append({"role": "user", "content": user_msg})
99
- if assistant_msg: # Ensure assistant_msg is not None
100
- conversation.append({"role": "assistant", "content": assistant_msg})
101
  conversation.append({"role": "user", "content": message})
102
 
103
- prompt = tokenizer.apply_chat_template(
104
- conversation,
105
- tokenize=False,
106
- add_generation_prompt=True
107
- )
108
-
109
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1800).to(model.device)
110
-
111
- full_response = ""
112
- # Make sure eos_token_id is a list if multiple EOS tokens are possible
113
  eos_tokens_ids = [tokenizer.eos_token_id]
114
  im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
115
- if im_end_id != tokenizer.unk_token_id: # Check if <|im_end|> is in vocab
116
  eos_tokens_ids.append(im_end_id)
 
 
117
 
118
 
119
  generated_token_ids = model.generate(
120
- **inputs,
121
- max_new_tokens=512,
122
- do_sample=True,
123
- temperature=0.7,
124
- top_p=0.9,
125
- repetition_penalty=1.1,
126
- pad_token_id=tokenizer.pad_token_id, # Use pad_token_id
127
- eos_token_id=eos_tokens_ids
128
  )
129
-
130
  new_tokens = generated_token_ids[0, inputs['input_ids'].shape[1]:]
131
- response_text = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
132
- response_text = response_text.replace("<|im_end|>", "").strip()
133
 
 
134
  for char in response_text:
135
  full_response += char
136
- time.sleep(0.005)
137
  yield full_response
138
 
139
- def respond_base(message, chat_history):
140
- yield from generate_chat_response(message, chat_history, "base")
141
 
142
- def respond_finetuned(message, chat_history):
143
- yield from generate_chat_response(message, chat_history, "finetuned")
144
-
145
-
146
- # --- Gradio UI (with gr.Blocks as demo:) ---
147
- # This part remains largely the same as the previous app.py
148
- # Ensure the Markdown and labels correctly reference the models being loaded.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
150
  gr.Markdown(
151
  f"""
@@ -153,115 +175,85 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
153
  Compare the fine-tuned CineGuide movie recommender (loaded from `{FINETUNED_MODEL_ID}`)
154
  with the base {BASE_MODEL_ID} model.
155
  Type your movie-related query below and see how each model responds!
 
156
  """
157
  )
158
- # ... (Rest of the UI definition: Rows, Columns, Chatbots, Textbox, Button, Examples)
159
  with gr.Row():
160
  with gr.Column(scale=1):
161
  gr.Markdown(f"## 🗣️ Base {BASE_MODEL_ID}")
162
- chatbot_base = gr.Chatbot(label="Base Model Chat", height=500, bubble_full_width=False)
163
- if model_base is None:
164
- gr.Markdown(f"⚠️ Base model ({BASE_MODEL_ID}) could not be loaded.")
165
-
166
  with gr.Column(scale=1):
167
  gr.Markdown(f"## 🤖 Fine-tuned CineGuide (from {FINETUNED_MODEL_ID})")
168
- chatbot_ft = gr.Chatbot(label="CineGuide Chat", height=500, bubble_full_width=False)
169
- if model_ft is None:
170
- gr.Markdown(f"⚠️ Fine-tuned model ({FINETUNED_MODEL_ID}) could not be loaded.")
171
 
172
  with gr.Row():
173
  shared_input_textbox = gr.Textbox(
174
- show_label=False,
175
- placeholder="Enter your movie query here and press Enter...",
176
- container=False,
177
- scale=7,
178
  )
179
  submit_button = gr.Button("✉️ Send", variant="primary", scale=1)
180
 
181
  gr.Examples(
182
  examples=[
183
  "Hi! I'm looking for something funny to watch tonight.",
184
- "I love dry, witty humor more than slapstick. Think more British comedy style.",
185
- "I'm really into complex sci-fi movies that make you think. I loved Arrival and Blade Runner 2049.",
186
- "I need help planning a family movie night. We have kids aged 8, 11, and 14, plus adults.",
187
- "I'm going through a tough breakup and need something uplifting but not cheesy romantic.",
188
- "I loved Parasite and want to explore more international cinema. Where should I start?",
189
  ],
190
- inputs=[shared_input_textbox],
191
- label="Example Prompts (click to use)"
192
  )
193
 
194
- def base_model_predict(user_message, chat_history):
195
- if model_base is None: # Add this check
196
- chat_history.append((user_message, f"Base model ({BASE_MODEL_ID}) is not available."))
197
- yield chat_history
198
- return
199
-
200
- chat_history.append((user_message, ""))
201
- for response_chunk in respond_base(user_message, chat_history[:-1]):
202
- chat_history[-1] = (user_message, response_chunk)
203
- yield chat_history
204
-
205
- def ft_model_predict(user_message, chat_history):
206
- if model_ft is None: # Add this check
207
- chat_history.append((user_message, f"Fine-tuned model ({FINETUNED_MODEL_ID}) is not available."))
208
- yield chat_history
209
- return
210
-
211
- chat_history.append((user_message, ""))
212
- for response_chunk in respond_finetuned(user_message, chat_history[:-1]):
213
- chat_history[-1] = (user_message, response_chunk)
214
- yield chat_history
215
-
216
- # Event handlers
217
- actions = []
218
- if model_base is not None:
219
- actions.append(
220
- shared_input_textbox.submit(
221
- base_model_predict,
222
- [shared_input_textbox, chatbot_base],
223
- [chatbot_base],
224
- queue=True
225
- )
226
- )
227
- actions.append(
228
- submit_button.click(
229
- base_model_predict,
230
- [shared_input_textbox, chatbot_base],
231
- [chatbot_base],
232
- queue=True
233
- )
234
- )
235
-
236
- if model_ft is not None:
237
- actions.append(
238
- shared_input_textbox.submit(
239
- ft_model_predict,
240
- [shared_input_textbox, chatbot_ft],
241
- [chatbot_ft],
242
- queue=True
243
- )
244
- )
245
- actions.append(
246
- submit_button.click(
247
- ft_model_predict,
248
- [shared_input_textbox, chatbot_ft],
249
- [chatbot_ft],
250
- queue=True
251
- )
252
- )
253
-
254
- # Clear textbox after all submits are queued. This is slightly simplified.
255
- # For a more robust clear, you might need to chain these events or use gr.Group.
256
- def clear_textbox_fn():
257
- return ""
258
 
259
- if actions: # If any model is active
260
- shared_input_textbox.submit(clear_textbox_fn, [], [shared_input_textbox])
261
- submit_button.click(clear_textbox_fn, [], [shared_input_textbox])
262
 
263
 
264
- # --- Launch the App ---
265
  if __name__ == "__main__":
 
 
 
 
 
 
 
266
  demo.queue()
267
- demo.launch(debug=True) # share=True for public link if running locally
 
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
4
  import time
5
+ import os # Keep os, it might be useful
6
 
7
  # --- Configuration ---
8
  BASE_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
9
+ FINETUNED_MODEL_ID = "serhany/cineguide-qwen2.5-7b-instruct-ft" # Assuming this is correct
 
10
 
 
11
  SYSTEM_PROMPT_CINEGUIDE = """You are CineGuide, a knowledgeable and friendly movie recommendation assistant. Your goal is to:
12
  1. Provide personalized movie recommendations based on user preferences
13
  2. Give brief, compelling rationales for why you recommend each movie
 
17
  When recommending movies, always explain WHY the movie fits their preferences."""
18
  SYSTEM_PROMPT_BASE = "You are a helpful AI assistant."
19
 
20
+ # --- Global Model Cache (models will be loaded on first use) ---
21
+ _models_cache = {
22
+ "base": None,
23
+ "finetuned": None,
24
+ "tokenizer_base": None,
25
+ "tokenizer_ft": None,
26
+ }
27
+
28
+ # --- Model Loading Function (to be called inside decorated functions) ---
29
+ def load_model_and_tokenizer(model_identifier: str, model_key: str, tokenizer_key: str):
30
+ """Loads a model and tokenizer if not already in cache."""
31
+ if _models_cache[model_key] is not None and _models_cache[tokenizer_key] is not None:
32
+ print(f"Using cached {model_key} model and {tokenizer_key} tokenizer.")
33
+ return _models_cache[model_key], _models_cache[tokenizer_key]
34
+
35
+ print(f"Loading {model_key} model ({model_identifier})...")
36
+ try:
37
+ tokenizer = AutoTokenizer.from_pretrained(model_identifier, trust_remote_code=True)
38
+ model = AutoModelForCausalLM.from_pretrained(
39
+ model_identifier,
40
+ torch_dtype=torch.bfloat16, # Or torch.float16 if better for available GPU
41
+ device_map="auto", # This will utilize the GPU allocated by @spaces.GPU
42
+ trust_remote_code=True,
43
+ )
44
+ model.eval()
45
+
46
+ if tokenizer.pad_token is None:
47
+ tokenizer.pad_token = tokenizer.eos_token
48
+ if hasattr(tokenizer, "pad_token_id") and tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
49
+ tokenizer.pad_token_id = tokenizer.eos_token_id
50
+
51
+ _models_cache[model_key] = model
52
+ _models_cache[tokenizer_key] = tokenizer
53
+ print(f"Finished loading and cached {model_key} and {tokenizer_key}.")
54
+ return model, tokenizer
55
+ except Exception as e:
56
+ print(f"ERROR loading {model_key} model ({model_identifier}): {e}")
57
+ _models_cache[model_key] = "error" # Mark as error to avoid retrying
58
+ _models_cache[tokenizer_key] = "error"
59
+ raise # Re-raise the exception to see it in Gradio UI or logs
60
+
61
+ # --- Inference Function (modified to ensure models are loaded) ---
62
+ def generate_chat_response(message: str, chat_history: list, model_type_to_load: str):
63
+ model, tokenizer = None, None
64
+ system_prompt = ""
65
+
66
+ if model_type_to_load == "base":
67
+ if _models_cache["base"] == "error" or _models_cache["tokenizer_base"] == "error":
68
+ yield f"Base model ({BASE_MODEL_ID}) failed to load previously."
 
 
 
 
 
 
 
 
69
  return
70
+ model, tokenizer = load_model_and_tokenizer(BASE_MODEL_ID, "base", "tokenizer_base")
71
  system_prompt = SYSTEM_PROMPT_BASE
72
+ elif model_type_to_load == "finetuned":
73
+ # Critical check for the FINETUNED_MODEL_ID itself
74
+ if not FINETUNED_MODEL_ID or not isinstance(FINETUNED_MODEL_ID, str):
75
+ print(f"CRITICAL ERROR: FINETUNED_MODEL_ID is invalid: {FINETUNED_MODEL_ID} (Type: {type(FINETUNED_MODEL_ID)})")
76
+ yield "Error: Fine-tuned model ID is not configured correctly."
77
+ return
78
+ if _models_cache["finetuned"] == "error" or _models_cache["tokenizer_ft"] == "error":
79
+ yield f"Fine-tuned model ({FINETUNED_MODEL_ID}) failed to load previously."
80
  return
81
+ model, tokenizer = load_model_and_tokenizer(FINETUNED_MODEL_ID, "finetuned", "tokenizer_ft")
82
  system_prompt = SYSTEM_PROMPT_CINEGUIDE
83
  else:
84
  yield "Invalid model type."
85
  return
86
 
87
+ if model is None or tokenizer is None: # Should be caught by "error" check or exception above
88
+ yield f"Model or tokenizer for '{model_type_to_load}' is not available after attempting load."
89
+ return
90
 
91
+ conversation = [{"role": "system", "content": system_prompt}] if system_prompt else []
92
+ conversation.extend(chat_history) # Assuming chat_history is already type="messages"
 
 
 
93
  conversation.append({"role": "user", "content": message})
94
 
95
+ prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
96
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1800).to(model.device)
97
+
 
 
98
  eos_tokens_ids = [tokenizer.eos_token_id]
99
  im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
100
+ if im_end_id != getattr(tokenizer, 'unk_token_id', None): # Check if <|im_end|> is in vocab
101
  eos_tokens_ids.append(im_end_id)
102
+ # Remove duplicates just in case eos_token_id is the same as im_end_id
103
+ eos_tokens_ids = list(set(eos_tokens_ids))
104
 
105
 
106
  generated_token_ids = model.generate(
107
+ **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9,
108
+ repetition_penalty=1.1, pad_token_id=tokenizer.pad_token_id, eos_token_id=eos_tokens_ids
 
 
 
 
 
 
109
  )
 
110
  new_tokens = generated_token_ids[0, inputs['input_ids'].shape[1]:]
111
+ response_text = tokenizer.decode(new_tokens, skip_special_tokens=True).strip().replace("<|im_end|>", "").strip()
 
112
 
113
+ full_response = ""
114
  for char in response_text:
115
  full_response += char
116
+ time.sleep(0.005) # Adjust for desired speed
117
  yield full_response
118
 
 
 
119
 
120
+ # --- Gradio UI Event Handlers (THESE GET DECORATED) ---
121
+ # Note: The @spaces.GPU decorator needs to be imported if not automatically available
122
+ # from huggingface_hub import-like syntax or it might be injected.
123
+ # For now, let's assume it's magically available in the Space environment.
124
+ # If not, you might need to find how to import it for ZeroGPU shared pool.
125
+ # It's often available as: `from Fg.spaces import GPU` and used as `@GPU`
126
+ # or simply `@spaces.GPU` if `spaces` is an auto-imported object.
127
+
128
+ # Try without explicit import first, as HF might inject it.
129
+ # If "spaces is not defined" error, you'll need to find the correct import for it.
130
+
131
+ # @spaces.GPU # Placeholder for actual decorator
132
+ @gr.्रु # This is a Gradio decorator for functions, not the HF GPU one.
133
+ # We need to find the correct HF spaces GPU decorator.
134
+ # For now, I'll structure as if it exists.
135
+ # The actual execution of model loading and generation will happen here.
136
+
137
+ # It's common to decorate the function called by the Gradio event.
138
+ # Let's try decorating the prediction functions.
139
+ # If `@spaces.GPU` is not found, the app will error earlier. You might need to find its import from HF docs for ZeroGPU.
140
+ # `from hf_spaces_shared_gpu import gpu_heavy_task` is a made-up example.
141
+ # Let's assume for now that if the hardware is "ZeroGPU" and this decorator is required,
142
+ # the Hugging Face platform makes `spaces.GPU` available.
143
+
144
+ def base_model_predict_decorated(user_message, chat_history):
145
+ # This function will now be responsible for triggering the load and then generating.
146
+ try:
147
+ # Model loading now happens here, within the GPU-allocated function
148
+ # The generate_chat_response will call load_model_and_tokenizer internally if needed
149
+ bot_response_stream = generate_chat_response(user_message, chat_history, "base")
150
+ full_bot_message = ""
151
+ for chunk in bot_response_stream:
152
+ full_bot_message = chunk
153
+ yield full_bot_message
154
+ except Exception as e:
155
+ print(f"Error in base_model_predict_decorated: {e}")
156
+ yield f"Error generating base model response: {e}"
157
+
158
+ def ft_model_predict_decorated(user_message, chat_history):
159
+ try:
160
+ # Model loading now happens here
161
+ bot_response_stream = generate_chat_response(user_message, chat_history, "finetuned")
162
+ full_bot_message = ""
163
+ for chunk in bot_response_stream:
164
+ full_bot_message = chunk
165
+ yield full_bot_message
166
+ except Exception as e:
167
+ print(f"Error in ft_model_predict_decorated: {e}")
168
+ yield f"Error generating fine-tuned response: {e}"
169
+
170
+ # --- Gradio UI Definition ---
171
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
172
  gr.Markdown(
173
  f"""
 
175
  Compare the fine-tuned CineGuide movie recommender (loaded from `{FINETUNED_MODEL_ID}`)
176
  with the base {BASE_MODEL_ID} model.
177
  Type your movie-related query below and see how each model responds!
178
+ **Note:** Models are loaded on first use and may take some time. Using shared GPU pool.
179
  """
180
  )
 
181
  with gr.Row():
182
  with gr.Column(scale=1):
183
  gr.Markdown(f"## 🗣️ Base {BASE_MODEL_ID}")
184
+ chatbot_base = gr.Chatbot(label="Base Model Chat", height=500, type="messages") # Use type="messages"
 
 
 
185
  with gr.Column(scale=1):
186
  gr.Markdown(f"## 🤖 Fine-tuned CineGuide (from {FINETUNED_MODEL_ID})")
187
+ chatbot_ft = gr.Chatbot(label="CineGuide Chat", height=500, type="messages") # Use type="messages"
 
 
188
 
189
  with gr.Row():
190
  shared_input_textbox = gr.Textbox(
191
+ show_label=False, placeholder="Enter your movie query...", container=False, scale=7
 
 
 
192
  )
193
  submit_button = gr.Button("✉️ Send", variant="primary", scale=1)
194
 
195
  gr.Examples(
196
  examples=[
197
  "Hi! I'm looking for something funny to watch tonight.",
198
+ "I love dry, witty humor more than slapstick.",
199
+ "I'm really into complex sci-fi movies that make you think.",
 
 
 
200
  ],
201
+ inputs=[shared_input_textbox], label="Example Prompts"
 
202
  )
203
 
204
+ # Apply the @spaces.GPU decorator if you find the correct way to import/use it.
205
+ # For now, the functions themselves will handle loading.
206
+ # If the decorator is `@spaces.GPU()`, it would be:
207
+ # submit_button.click(spaces.GPU()(base_model_predict_decorated), ...)
208
+ # This part is tricky without knowing the exact decorator syntax for ZeroGPU.
209
+ # Let's assume the functions are called and *they* handle the GPU context internally.
210
+ # If the platform *requires* the event handler itself to be decorated, that's a different structure.
211
+
212
+ # The functions `base_model_predict_decorated` and `ft_model_predict_decorated`
213
+ # are what Gradio will call. If these need the `@spaces.GPU` decorator, you'd apply it like:
214
+ # @spaces.GPU
215
+ # def decorated_base_predict(user_message, chat_history):
216
+ # yield from base_model_predict_decorated(user_message, chat_history)
217
+ # And then pass `decorated_base_predict` to `submit_button.click`
218
+
219
+ # Simpler approach for now: let Gradio call these directly.
220
+ # If a wrapper is needed for the decorator, we can add it.
221
+ submit_button.click(
222
+ base_model_predict_decorated,
223
+ [shared_input_textbox, chatbot_base],
224
+ [chatbot_base],
225
+ # api_name="base_predict" # Optional
226
+ )
227
+ submit_button.click(
228
+ ft_model_predict_decorated,
229
+ [shared_input_textbox, chatbot_ft],
230
+ [chatbot_ft],
231
+ # api_name="ft_predict" # Optional
232
+ )
233
+ # Handle textbox submit event for both
234
+ shared_input_textbox.submit(
235
+ base_model_predict_decorated,
236
+ [shared_input_textbox, chatbot_base],
237
+ [chatbot_base]
238
+ )
239
+ shared_input_textbox.submit(
240
+ ft_model_predict_decorated,
241
+ [shared_input_textbox, chatbot_ft],
242
+ [chatbot_ft]
243
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
+ def clear_textbox_fn(): return ""
246
+ submit_button.click(clear_textbox_fn, [], [shared_input_textbox])
247
+ shared_input_textbox.submit(clear_textbox_fn, [], [shared_input_textbox])
248
 
249
 
 
250
  if __name__ == "__main__":
251
+ # The following line is usually specific to certain Space configurations.
252
+ # For ZeroGPU with @spaces.GPU, this might be needed in the README.md/config.yaml
253
+ # rather than here, or the decorator itself implies it.
254
+ # demo.config(dependencies=["torch", "transformers", "accelerate", ...])
255
+
256
+ # Check Gradio docs for how to make a function eligible for @spaces.GPU if it's not a direct event handler.
257
+ # Often, the main event handler itself is decorated.
258
  demo.queue()
259
+ demo.launch(debug=True)