Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,14 +2,12 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
|
4 |
import time
|
5 |
-
import os
|
6 |
|
7 |
# --- Configuration ---
|
8 |
BASE_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
|
9 |
-
|
10 |
-
FINETUNED_MODEL_ID = "serhany/cineguide-qwen2.5-7b-instruct-ft"
|
11 |
|
12 |
-
# System prompts (same as before)
|
13 |
SYSTEM_PROMPT_CINEGUIDE = """You are CineGuide, a knowledgeable and friendly movie recommendation assistant. Your goal is to:
|
14 |
1. Provide personalized movie recommendations based on user preferences
|
15 |
2. Give brief, compelling rationales for why you recommend each movie
|
@@ -19,133 +17,157 @@ SYSTEM_PROMPT_CINEGUIDE = """You are CineGuide, a knowledgeable and friendly mov
|
|
19 |
When recommending movies, always explain WHY the movie fits their preferences."""
|
20 |
SYSTEM_PROMPT_BASE = "You are a helpful AI assistant."
|
21 |
|
22 |
-
# --- Model
|
23 |
-
_models_cache = {
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
tokenizer
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
# --- Inference Function (generate_chat_response) ---
|
72 |
-
# This function remains largely the same as in the previous app.py.
|
73 |
-
# Make sure it uses `model_base, tokenizer_base` and `model_ft, tokenizer_ft` correctly.
|
74 |
-
def generate_chat_response(message: str, chat_history: list, model_type: str):
|
75 |
-
# ... (Keep the exact same generate_chat_response function from the previous app.py)
|
76 |
-
if model_type == "base":
|
77 |
-
if model_base is None or tokenizer_base is None:
|
78 |
-
yield f"Base model ({BASE_MODEL_ID}) is not available."
|
79 |
return
|
80 |
-
model, tokenizer =
|
81 |
system_prompt = SYSTEM_PROMPT_BASE
|
82 |
-
elif
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
85 |
return
|
86 |
-
model, tokenizer =
|
87 |
system_prompt = SYSTEM_PROMPT_CINEGUIDE
|
88 |
else:
|
89 |
yield "Invalid model type."
|
90 |
return
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
conversation.append({"role": "user", "content": user_msg})
|
99 |
-
if assistant_msg: # Ensure assistant_msg is not None
|
100 |
-
conversation.append({"role": "assistant", "content": assistant_msg})
|
101 |
conversation.append({"role": "user", "content": message})
|
102 |
|
103 |
-
prompt = tokenizer.apply_chat_template(
|
104 |
-
conversation,
|
105 |
-
tokenize=False,
|
106 |
-
add_generation_prompt=True
|
107 |
-
)
|
108 |
-
|
109 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1800).to(model.device)
|
110 |
-
|
111 |
-
full_response = ""
|
112 |
-
# Make sure eos_token_id is a list if multiple EOS tokens are possible
|
113 |
eos_tokens_ids = [tokenizer.eos_token_id]
|
114 |
im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
|
115 |
-
if im_end_id != tokenizer
|
116 |
eos_tokens_ids.append(im_end_id)
|
|
|
|
|
117 |
|
118 |
|
119 |
generated_token_ids = model.generate(
|
120 |
-
**inputs,
|
121 |
-
|
122 |
-
do_sample=True,
|
123 |
-
temperature=0.7,
|
124 |
-
top_p=0.9,
|
125 |
-
repetition_penalty=1.1,
|
126 |
-
pad_token_id=tokenizer.pad_token_id, # Use pad_token_id
|
127 |
-
eos_token_id=eos_tokens_ids
|
128 |
)
|
129 |
-
|
130 |
new_tokens = generated_token_ids[0, inputs['input_ids'].shape[1]:]
|
131 |
-
response_text = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
|
132 |
-
response_text = response_text.replace("<|im_end|>", "").strip()
|
133 |
|
|
|
134 |
for char in response_text:
|
135 |
full_response += char
|
136 |
-
time.sleep(0.005)
|
137 |
yield full_response
|
138 |
|
139 |
-
def respond_base(message, chat_history):
|
140 |
-
yield from generate_chat_response(message, chat_history, "base")
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
#
|
147 |
-
#
|
148 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
150 |
gr.Markdown(
|
151 |
f"""
|
@@ -153,115 +175,85 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
153 |
Compare the fine-tuned CineGuide movie recommender (loaded from `{FINETUNED_MODEL_ID}`)
|
154 |
with the base {BASE_MODEL_ID} model.
|
155 |
Type your movie-related query below and see how each model responds!
|
|
|
156 |
"""
|
157 |
)
|
158 |
-
# ... (Rest of the UI definition: Rows, Columns, Chatbots, Textbox, Button, Examples)
|
159 |
with gr.Row():
|
160 |
with gr.Column(scale=1):
|
161 |
gr.Markdown(f"## 🗣️ Base {BASE_MODEL_ID}")
|
162 |
-
chatbot_base = gr.Chatbot(label="Base Model Chat", height=500,
|
163 |
-
if model_base is None:
|
164 |
-
gr.Markdown(f"⚠️ Base model ({BASE_MODEL_ID}) could not be loaded.")
|
165 |
-
|
166 |
with gr.Column(scale=1):
|
167 |
gr.Markdown(f"## 🤖 Fine-tuned CineGuide (from {FINETUNED_MODEL_ID})")
|
168 |
-
chatbot_ft = gr.Chatbot(label="CineGuide Chat", height=500,
|
169 |
-
if model_ft is None:
|
170 |
-
gr.Markdown(f"⚠️ Fine-tuned model ({FINETUNED_MODEL_ID}) could not be loaded.")
|
171 |
|
172 |
with gr.Row():
|
173 |
shared_input_textbox = gr.Textbox(
|
174 |
-
show_label=False,
|
175 |
-
placeholder="Enter your movie query here and press Enter...",
|
176 |
-
container=False,
|
177 |
-
scale=7,
|
178 |
)
|
179 |
submit_button = gr.Button("✉️ Send", variant="primary", scale=1)
|
180 |
|
181 |
gr.Examples(
|
182 |
examples=[
|
183 |
"Hi! I'm looking for something funny to watch tonight.",
|
184 |
-
"I love dry, witty humor more than slapstick.
|
185 |
-
"I'm really into complex sci-fi movies that make you think.
|
186 |
-
"I need help planning a family movie night. We have kids aged 8, 11, and 14, plus adults.",
|
187 |
-
"I'm going through a tough breakup and need something uplifting but not cheesy romantic.",
|
188 |
-
"I loved Parasite and want to explore more international cinema. Where should I start?",
|
189 |
],
|
190 |
-
inputs=[shared_input_textbox],
|
191 |
-
label="Example Prompts (click to use)"
|
192 |
)
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
def
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
)
|
235 |
-
|
236 |
-
if model_ft is not None:
|
237 |
-
actions.append(
|
238 |
-
shared_input_textbox.submit(
|
239 |
-
ft_model_predict,
|
240 |
-
[shared_input_textbox, chatbot_ft],
|
241 |
-
[chatbot_ft],
|
242 |
-
queue=True
|
243 |
-
)
|
244 |
-
)
|
245 |
-
actions.append(
|
246 |
-
submit_button.click(
|
247 |
-
ft_model_predict,
|
248 |
-
[shared_input_textbox, chatbot_ft],
|
249 |
-
[chatbot_ft],
|
250 |
-
queue=True
|
251 |
-
)
|
252 |
-
)
|
253 |
-
|
254 |
-
# Clear textbox after all submits are queued. This is slightly simplified.
|
255 |
-
# For a more robust clear, you might need to chain these events or use gr.Group.
|
256 |
-
def clear_textbox_fn():
|
257 |
-
return ""
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
|
263 |
|
264 |
-
# --- Launch the App ---
|
265 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
demo.queue()
|
267 |
-
demo.launch(debug=True)
|
|
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
|
4 |
import time
|
5 |
+
import os # Keep os, it might be useful
|
6 |
|
7 |
# --- Configuration ---
|
8 |
BASE_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
|
9 |
+
FINETUNED_MODEL_ID = "serhany/cineguide-qwen2.5-7b-instruct-ft" # Assuming this is correct
|
|
|
10 |
|
|
|
11 |
SYSTEM_PROMPT_CINEGUIDE = """You are CineGuide, a knowledgeable and friendly movie recommendation assistant. Your goal is to:
|
12 |
1. Provide personalized movie recommendations based on user preferences
|
13 |
2. Give brief, compelling rationales for why you recommend each movie
|
|
|
17 |
When recommending movies, always explain WHY the movie fits their preferences."""
|
18 |
SYSTEM_PROMPT_BASE = "You are a helpful AI assistant."
|
19 |
|
20 |
+
# --- Global Model Cache (models will be loaded on first use) ---
|
21 |
+
_models_cache = {
|
22 |
+
"base": None,
|
23 |
+
"finetuned": None,
|
24 |
+
"tokenizer_base": None,
|
25 |
+
"tokenizer_ft": None,
|
26 |
+
}
|
27 |
+
|
28 |
+
# --- Model Loading Function (to be called inside decorated functions) ---
|
29 |
+
def load_model_and_tokenizer(model_identifier: str, model_key: str, tokenizer_key: str):
|
30 |
+
"""Loads a model and tokenizer if not already in cache."""
|
31 |
+
if _models_cache[model_key] is not None and _models_cache[tokenizer_key] is not None:
|
32 |
+
print(f"Using cached {model_key} model and {tokenizer_key} tokenizer.")
|
33 |
+
return _models_cache[model_key], _models_cache[tokenizer_key]
|
34 |
+
|
35 |
+
print(f"Loading {model_key} model ({model_identifier})...")
|
36 |
+
try:
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained(model_identifier, trust_remote_code=True)
|
38 |
+
model = AutoModelForCausalLM.from_pretrained(
|
39 |
+
model_identifier,
|
40 |
+
torch_dtype=torch.bfloat16, # Or torch.float16 if better for available GPU
|
41 |
+
device_map="auto", # This will utilize the GPU allocated by @spaces.GPU
|
42 |
+
trust_remote_code=True,
|
43 |
+
)
|
44 |
+
model.eval()
|
45 |
+
|
46 |
+
if tokenizer.pad_token is None:
|
47 |
+
tokenizer.pad_token = tokenizer.eos_token
|
48 |
+
if hasattr(tokenizer, "pad_token_id") and tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
|
49 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id
|
50 |
+
|
51 |
+
_models_cache[model_key] = model
|
52 |
+
_models_cache[tokenizer_key] = tokenizer
|
53 |
+
print(f"Finished loading and cached {model_key} and {tokenizer_key}.")
|
54 |
+
return model, tokenizer
|
55 |
+
except Exception as e:
|
56 |
+
print(f"ERROR loading {model_key} model ({model_identifier}): {e}")
|
57 |
+
_models_cache[model_key] = "error" # Mark as error to avoid retrying
|
58 |
+
_models_cache[tokenizer_key] = "error"
|
59 |
+
raise # Re-raise the exception to see it in Gradio UI or logs
|
60 |
+
|
61 |
+
# --- Inference Function (modified to ensure models are loaded) ---
|
62 |
+
def generate_chat_response(message: str, chat_history: list, model_type_to_load: str):
|
63 |
+
model, tokenizer = None, None
|
64 |
+
system_prompt = ""
|
65 |
+
|
66 |
+
if model_type_to_load == "base":
|
67 |
+
if _models_cache["base"] == "error" or _models_cache["tokenizer_base"] == "error":
|
68 |
+
yield f"Base model ({BASE_MODEL_ID}) failed to load previously."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
return
|
70 |
+
model, tokenizer = load_model_and_tokenizer(BASE_MODEL_ID, "base", "tokenizer_base")
|
71 |
system_prompt = SYSTEM_PROMPT_BASE
|
72 |
+
elif model_type_to_load == "finetuned":
|
73 |
+
# Critical check for the FINETUNED_MODEL_ID itself
|
74 |
+
if not FINETUNED_MODEL_ID or not isinstance(FINETUNED_MODEL_ID, str):
|
75 |
+
print(f"CRITICAL ERROR: FINETUNED_MODEL_ID is invalid: {FINETUNED_MODEL_ID} (Type: {type(FINETUNED_MODEL_ID)})")
|
76 |
+
yield "Error: Fine-tuned model ID is not configured correctly."
|
77 |
+
return
|
78 |
+
if _models_cache["finetuned"] == "error" or _models_cache["tokenizer_ft"] == "error":
|
79 |
+
yield f"Fine-tuned model ({FINETUNED_MODEL_ID}) failed to load previously."
|
80 |
return
|
81 |
+
model, tokenizer = load_model_and_tokenizer(FINETUNED_MODEL_ID, "finetuned", "tokenizer_ft")
|
82 |
system_prompt = SYSTEM_PROMPT_CINEGUIDE
|
83 |
else:
|
84 |
yield "Invalid model type."
|
85 |
return
|
86 |
|
87 |
+
if model is None or tokenizer is None: # Should be caught by "error" check or exception above
|
88 |
+
yield f"Model or tokenizer for '{model_type_to_load}' is not available after attempting load."
|
89 |
+
return
|
90 |
|
91 |
+
conversation = [{"role": "system", "content": system_prompt}] if system_prompt else []
|
92 |
+
conversation.extend(chat_history) # Assuming chat_history is already type="messages"
|
|
|
|
|
|
|
93 |
conversation.append({"role": "user", "content": message})
|
94 |
|
95 |
+
prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
|
|
|
|
|
|
|
|
|
|
|
96 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1800).to(model.device)
|
97 |
+
|
|
|
|
|
98 |
eos_tokens_ids = [tokenizer.eos_token_id]
|
99 |
im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
|
100 |
+
if im_end_id != getattr(tokenizer, 'unk_token_id', None): # Check if <|im_end|> is in vocab
|
101 |
eos_tokens_ids.append(im_end_id)
|
102 |
+
# Remove duplicates just in case eos_token_id is the same as im_end_id
|
103 |
+
eos_tokens_ids = list(set(eos_tokens_ids))
|
104 |
|
105 |
|
106 |
generated_token_ids = model.generate(
|
107 |
+
**inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9,
|
108 |
+
repetition_penalty=1.1, pad_token_id=tokenizer.pad_token_id, eos_token_id=eos_tokens_ids
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
)
|
|
|
110 |
new_tokens = generated_token_ids[0, inputs['input_ids'].shape[1]:]
|
111 |
+
response_text = tokenizer.decode(new_tokens, skip_special_tokens=True).strip().replace("<|im_end|>", "").strip()
|
|
|
112 |
|
113 |
+
full_response = ""
|
114 |
for char in response_text:
|
115 |
full_response += char
|
116 |
+
time.sleep(0.005) # Adjust for desired speed
|
117 |
yield full_response
|
118 |
|
|
|
|
|
119 |
|
120 |
+
# --- Gradio UI Event Handlers (THESE GET DECORATED) ---
|
121 |
+
# Note: The @spaces.GPU decorator needs to be imported if not automatically available
|
122 |
+
# from huggingface_hub import-like syntax or it might be injected.
|
123 |
+
# For now, let's assume it's magically available in the Space environment.
|
124 |
+
# If not, you might need to find how to import it for ZeroGPU shared pool.
|
125 |
+
# It's often available as: `from Fg.spaces import GPU` and used as `@GPU`
|
126 |
+
# or simply `@spaces.GPU` if `spaces` is an auto-imported object.
|
127 |
+
|
128 |
+
# Try without explicit import first, as HF might inject it.
|
129 |
+
# If "spaces is not defined" error, you'll need to find the correct import for it.
|
130 |
+
|
131 |
+
# @spaces.GPU # Placeholder for actual decorator
|
132 |
+
@gr.्रु # This is a Gradio decorator for functions, not the HF GPU one.
|
133 |
+
# We need to find the correct HF spaces GPU decorator.
|
134 |
+
# For now, I'll structure as if it exists.
|
135 |
+
# The actual execution of model loading and generation will happen here.
|
136 |
+
|
137 |
+
# It's common to decorate the function called by the Gradio event.
|
138 |
+
# Let's try decorating the prediction functions.
|
139 |
+
# If `@spaces.GPU` is not found, the app will error earlier. You might need to find its import from HF docs for ZeroGPU.
|
140 |
+
# `from hf_spaces_shared_gpu import gpu_heavy_task` is a made-up example.
|
141 |
+
# Let's assume for now that if the hardware is "ZeroGPU" and this decorator is required,
|
142 |
+
# the Hugging Face platform makes `spaces.GPU` available.
|
143 |
+
|
144 |
+
def base_model_predict_decorated(user_message, chat_history):
|
145 |
+
# This function will now be responsible for triggering the load and then generating.
|
146 |
+
try:
|
147 |
+
# Model loading now happens here, within the GPU-allocated function
|
148 |
+
# The generate_chat_response will call load_model_and_tokenizer internally if needed
|
149 |
+
bot_response_stream = generate_chat_response(user_message, chat_history, "base")
|
150 |
+
full_bot_message = ""
|
151 |
+
for chunk in bot_response_stream:
|
152 |
+
full_bot_message = chunk
|
153 |
+
yield full_bot_message
|
154 |
+
except Exception as e:
|
155 |
+
print(f"Error in base_model_predict_decorated: {e}")
|
156 |
+
yield f"Error generating base model response: {e}"
|
157 |
+
|
158 |
+
def ft_model_predict_decorated(user_message, chat_history):
|
159 |
+
try:
|
160 |
+
# Model loading now happens here
|
161 |
+
bot_response_stream = generate_chat_response(user_message, chat_history, "finetuned")
|
162 |
+
full_bot_message = ""
|
163 |
+
for chunk in bot_response_stream:
|
164 |
+
full_bot_message = chunk
|
165 |
+
yield full_bot_message
|
166 |
+
except Exception as e:
|
167 |
+
print(f"Error in ft_model_predict_decorated: {e}")
|
168 |
+
yield f"Error generating fine-tuned response: {e}"
|
169 |
+
|
170 |
+
# --- Gradio UI Definition ---
|
171 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
172 |
gr.Markdown(
|
173 |
f"""
|
|
|
175 |
Compare the fine-tuned CineGuide movie recommender (loaded from `{FINETUNED_MODEL_ID}`)
|
176 |
with the base {BASE_MODEL_ID} model.
|
177 |
Type your movie-related query below and see how each model responds!
|
178 |
+
**Note:** Models are loaded on first use and may take some time. Using shared GPU pool.
|
179 |
"""
|
180 |
)
|
|
|
181 |
with gr.Row():
|
182 |
with gr.Column(scale=1):
|
183 |
gr.Markdown(f"## 🗣️ Base {BASE_MODEL_ID}")
|
184 |
+
chatbot_base = gr.Chatbot(label="Base Model Chat", height=500, type="messages") # Use type="messages"
|
|
|
|
|
|
|
185 |
with gr.Column(scale=1):
|
186 |
gr.Markdown(f"## 🤖 Fine-tuned CineGuide (from {FINETUNED_MODEL_ID})")
|
187 |
+
chatbot_ft = gr.Chatbot(label="CineGuide Chat", height=500, type="messages") # Use type="messages"
|
|
|
|
|
188 |
|
189 |
with gr.Row():
|
190 |
shared_input_textbox = gr.Textbox(
|
191 |
+
show_label=False, placeholder="Enter your movie query...", container=False, scale=7
|
|
|
|
|
|
|
192 |
)
|
193 |
submit_button = gr.Button("✉️ Send", variant="primary", scale=1)
|
194 |
|
195 |
gr.Examples(
|
196 |
examples=[
|
197 |
"Hi! I'm looking for something funny to watch tonight.",
|
198 |
+
"I love dry, witty humor more than slapstick.",
|
199 |
+
"I'm really into complex sci-fi movies that make you think.",
|
|
|
|
|
|
|
200 |
],
|
201 |
+
inputs=[shared_input_textbox], label="Example Prompts"
|
|
|
202 |
)
|
203 |
|
204 |
+
# Apply the @spaces.GPU decorator if you find the correct way to import/use it.
|
205 |
+
# For now, the functions themselves will handle loading.
|
206 |
+
# If the decorator is `@spaces.GPU()`, it would be:
|
207 |
+
# submit_button.click(spaces.GPU()(base_model_predict_decorated), ...)
|
208 |
+
# This part is tricky without knowing the exact decorator syntax for ZeroGPU.
|
209 |
+
# Let's assume the functions are called and *they* handle the GPU context internally.
|
210 |
+
# If the platform *requires* the event handler itself to be decorated, that's a different structure.
|
211 |
+
|
212 |
+
# The functions `base_model_predict_decorated` and `ft_model_predict_decorated`
|
213 |
+
# are what Gradio will call. If these need the `@spaces.GPU` decorator, you'd apply it like:
|
214 |
+
# @spaces.GPU
|
215 |
+
# def decorated_base_predict(user_message, chat_history):
|
216 |
+
# yield from base_model_predict_decorated(user_message, chat_history)
|
217 |
+
# And then pass `decorated_base_predict` to `submit_button.click`
|
218 |
+
|
219 |
+
# Simpler approach for now: let Gradio call these directly.
|
220 |
+
# If a wrapper is needed for the decorator, we can add it.
|
221 |
+
submit_button.click(
|
222 |
+
base_model_predict_decorated,
|
223 |
+
[shared_input_textbox, chatbot_base],
|
224 |
+
[chatbot_base],
|
225 |
+
# api_name="base_predict" # Optional
|
226 |
+
)
|
227 |
+
submit_button.click(
|
228 |
+
ft_model_predict_decorated,
|
229 |
+
[shared_input_textbox, chatbot_ft],
|
230 |
+
[chatbot_ft],
|
231 |
+
# api_name="ft_predict" # Optional
|
232 |
+
)
|
233 |
+
# Handle textbox submit event for both
|
234 |
+
shared_input_textbox.submit(
|
235 |
+
base_model_predict_decorated,
|
236 |
+
[shared_input_textbox, chatbot_base],
|
237 |
+
[chatbot_base]
|
238 |
+
)
|
239 |
+
shared_input_textbox.submit(
|
240 |
+
ft_model_predict_decorated,
|
241 |
+
[shared_input_textbox, chatbot_ft],
|
242 |
+
[chatbot_ft]
|
243 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
+
def clear_textbox_fn(): return ""
|
246 |
+
submit_button.click(clear_textbox_fn, [], [shared_input_textbox])
|
247 |
+
shared_input_textbox.submit(clear_textbox_fn, [], [shared_input_textbox])
|
248 |
|
249 |
|
|
|
250 |
if __name__ == "__main__":
|
251 |
+
# The following line is usually specific to certain Space configurations.
|
252 |
+
# For ZeroGPU with @spaces.GPU, this might be needed in the README.md/config.yaml
|
253 |
+
# rather than here, or the decorator itself implies it.
|
254 |
+
# demo.config(dependencies=["torch", "transformers", "accelerate", ...])
|
255 |
+
|
256 |
+
# Check Gradio docs for how to make a function eligible for @spaces.GPU if it's not a direct event handler.
|
257 |
+
# Often, the main event handler itself is decorated.
|
258 |
demo.queue()
|
259 |
+
demo.launch(debug=True)
|