Nymbo commited on
Commit
2d6eaa5
·
verified ·
1 Parent(s): 5b8ad4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +202 -127
app.py CHANGED
@@ -1,22 +1,22 @@
1
  import gradio as gr
2
  from openai import OpenAI
3
  import os
4
- import requests # Added for potential future use, though OpenAI client handles it now
 
5
 
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
- if not ACCESS_TOKEN:
8
- print("Warning: HF_TOKEN environment variable not set. Authentication might fail.")
9
- else:
10
- print("Access token loaded.")
11
 
12
- # Base URLs for different providers
13
- HF_INFERENCE_BASE_URL = "https://api-inference.huggingface.co/v1/"
14
- CEREBRAS_ROUTER_BASE_URL = "https://router.huggingface.co/cerebras/v1/" # Use base URL for OpenAI client
 
 
 
15
 
16
- # Default provider
17
- DEFAULT_PROVIDER = "hf-inference"
18
 
19
- # --- Main Respond Function ---
20
  def respond(
21
  message,
22
  history: list[tuple[str, str]],
@@ -27,66 +27,51 @@ def respond(
27
  frequency_penalty,
28
  seed,
29
  custom_model,
30
- inference_provider # New argument for provider selection
31
  ):
32
-
33
- print(f"--- New Request ---")
34
- print(f"Selected Inference Provider: {inference_provider}")
35
  print(f"Received message: {message}")
36
- # print(f"History: {history}") # Can be verbose
37
  print(f"System message: {system_message}")
38
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
39
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
40
  print(f"Selected model (custom_model): {custom_model}")
41
-
42
- # Determine the base URL based on the selected provider
43
- if inference_provider == "cerebras":
44
- base_url = CEREBRAS_ROUTER_BASE_URL
45
- print(f"Using Cerebras Router endpoint: {base_url}")
46
- else: # Default to hf-inference
47
- base_url = HF_INFERENCE_BASE_URL
48
- print(f"Using HF Inference API endpoint: {base_url}")
49
-
50
- # Initialize the OpenAI client dynamically for each request
51
- try:
52
- client = OpenAI(
53
- base_url=base_url,
54
- api_key=ACCESS_TOKEN,
55
- )
56
- print("OpenAI client initialized for the request.")
57
- except Exception as e:
58
- print(f"Error initializing OpenAI client: {e}")
59
- yield f"Error: Could not initialize API client for provider {inference_provider}. Check token and endpoint."
60
- return
61
 
62
  # Convert seed to None if -1 (meaning random)
63
  if seed == -1:
64
  seed = None
65
 
 
66
  messages = [{"role": "system", "content": system_message}]
67
- # print("Initial messages array constructed.") # Less verbose logging
68
 
69
  # Add conversation history to the context
70
  for val in history:
71
- user_part, assistant_part = val[0], val[1]
72
- if user_part: messages.append({"role": "user", "content": user_part})
73
- if assistant_part: messages.append({"role": "assistant", "content": assistant_part})
 
 
 
 
 
74
 
75
  # Append the latest user message
76
  messages.append({"role": "user", "content": message})
77
- # print("Full message context prepared.") # Less verbose logging
78
 
79
  # If user provided a model, use that; otherwise, fall back to a default model
80
- # Ensure a default model is always set if custom_model is empty
81
- model_to_use = custom_model.strip() if custom_model.strip() else "meta-llama/Llama-3.3-70B-Instruct"
82
  print(f"Model selected for inference: {model_to_use}")
83
 
84
- # Start streaming response
85
  response = ""
86
- print(f"Sending request to {inference_provider} via {base_url}...")
87
-
88
- try:
89
- stream = client.chat.completions.create(
 
 
90
  model=model_to_use,
91
  max_tokens=max_tokens,
92
  stream=True,
@@ -95,60 +80,138 @@ def respond(
95
  frequency_penalty=frequency_penalty,
96
  seed=seed,
97
  messages=messages,
98
- )
99
- for message_chunk in stream:
100
  token_text = message_chunk.choices[0].delta.content
101
- # Handle potential None or empty tokens gracefully
102
- if token_text:
103
- # print(f"Received token: {token_text}") # Very verbose
104
  response += token_text
105
  yield response
106
- # Handle potential finish reason if needed (e.g., length)
107
- # finish_reason = message_chunk.choices[0].finish_reason
108
- # if finish_reason:
109
- # print(f"Stream finished with reason: {finish_reason}")
110
-
111
- except Exception as e:
112
- print(f"Error during API call to {inference_provider}: {e}")
113
- yield f"Error: API call failed. Details: {str(e)}"
114
- return # Stop generation on error
115
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  print("Completed response generation.")
117
 
118
- # --- GRADIO UI Elements ---
119
 
120
- chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and provider, then begin chatting", layout="panel")
121
  print("Chatbot interface created.")
122
 
123
- # Moved these inside the Accordion later
124
- system_message_box = gr.Textbox(value="You are a helpful assistant.", label="System Prompt")
125
- max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens") # Increased default
126
- temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature") # Adjusted range
127
- top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
128
- frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
129
- seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  custom_model_box = gr.Textbox(
131
  value="",
132
- label="Custom Model Path",
133
- info="(Optional) Provide a Hugging Face model path. Overrides featured model selection.",
134
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
135
  )
136
 
137
- # New UI Element for Provider Selection (will be placed in Accordion)
138
- inference_provider_radio = gr.Radio(
139
  choices=["hf-inference", "cerebras"],
140
- value=DEFAULT_PROVIDER,
141
  label="Inference Provider",
142
- info=f"Select the backend API. Default: {DEFAULT_PROVIDER}"
143
  )
144
- print("Inference provider radio button created.")
145
 
 
 
 
 
 
 
 
146
 
147
- # --- Gradio Chat Interface Definition ---
148
  demo = gr.ChatInterface(
149
  fn=respond,
150
  additional_inputs=[
151
- # Order matters: must match the 'respond' function signature
152
  system_message_box,
153
  max_tokens_slider,
154
  temperature_slider,
@@ -156,36 +219,59 @@ demo = gr.ChatInterface(
156
  frequency_penalty_slider,
157
  seed_slider,
158
  custom_model_box,
159
- inference_provider_radio, # Added the new input
160
  ],
161
  fill_height=True,
162
  chatbot=chatbot,
163
  theme="Nymbo/Nymbo_Theme",
164
- title="Multi-Provider Chat Hub",
165
- description="Chat with various models using different inference backends (HF Inference API or Cerebras via HF Router)."
166
  )
167
  print("ChatInterface object created.")
168
 
169
- # --- Add Accordions for Settings within the Demo context ---
170
  with demo:
171
- # Model Selection Accordion (existing logic)
172
  with gr.Accordion("Model Selection", open=False):
173
- model_search_box = gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1)
 
 
 
 
174
  print("Model search box created.")
175
 
176
- # Example models list (keep your extensive list)
177
  models_list = [
178
- "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.1-8B-Instruct",
179
- "NousResearch/Hermes-3-Llama-3.1-8B", "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1",
180
- "mistralai/Mistral-7B-Instruct-v0.3", "Qwen/Qwen3-32B", "microsoft/Phi-3.5-mini-instruct",
181
- # Add the rest of your models here...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  ]
183
  print("Models list initialized.")
184
 
185
  featured_model_radio = gr.Radio(
186
- label="Select a Featured Model",
187
  choices=models_list,
188
- value="meta-llama/Llama-3.3-70B-Instruct", # Default featured model
189
  interactive=True
190
  )
191
  print("Featured models radio button created.")
@@ -193,44 +279,33 @@ with demo:
193
  def filter_models(search_term):
194
  print(f"Filtering models with search term: {search_term}")
195
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
196
- # Ensure a valid value is selected if the current one is filtered out
197
- current_value = featured_model_radio.value
198
- if current_value not in filtered and filtered:
199
- new_value = filtered[0] # Select the first available filtered model
200
- elif not filtered:
201
- new_value = None # Or handle empty case as needed
202
- else:
203
- new_value = current_value # Keep current if still valid
204
  print(f"Filtered models: {filtered}")
205
- return gr.update(choices=filtered, value=new_value)
206
 
 
 
 
 
 
 
207
 
208
- def set_custom_model_from_radio(selected_model):
209
- """Updates the Custom Model text box when a featured model is selected."""
210
- print(f"Featured model selected: {selected_model}")
211
- return selected_model # Directly return the selected model name
212
-
213
- model_search_box.change(fn=filter_models, inputs=model_search_box, outputs=featured_model_radio)
214
- featured_model_radio.change(fn=set_custom_model_from_radio, inputs=featured_model_radio, outputs=custom_model_box)
215
- print("Model selection events linked.")
216
-
217
- # Advanced Settings Accordion (New)
218
  with gr.Accordion("Advanced Settings", open=False):
219
- # Place the provider selection and parameter sliders here
220
- gr.Markdown("Configure inference parameters and select the backend provider.")
221
- # Add the UI elements defined earlier into this accordion
222
- gr.Textbox(value="You are a helpful assistant.", label="System Prompt").render() # Render system_message_box here
223
- inference_provider_radio.render() # Render the provider radio here
224
- max_tokens_slider.render()
225
- temperature_slider.render()
226
- top_p_slider.render()
227
- frequency_penalty_slider.render()
228
- seed_slider.render()
229
- print("Advanced settings accordion created with provider selection and parameters.")
230
-
231
 
232
- print("Gradio interface fully initialized.")
233
 
234
  if __name__ == "__main__":
235
  print("Launching the demo application.")
236
- demo.launch(show_api=False)
 
1
  import gradio as gr
2
  from openai import OpenAI
3
  import os
4
+ import requests
5
+ import json
6
 
7
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
8
+ print("Access token loaded.")
 
 
 
9
 
10
+ # Initialize the OpenAI client for HF Inference
11
+ hf_client = OpenAI(
12
+ base_url="https://api-inference.huggingface.co/v1/",
13
+ api_key=ACCESS_TOKEN,
14
+ )
15
+ print("HF Inference OpenAI client initialized.")
16
 
17
+ # Cerebras API endpoint
18
+ CEREBRAS_API_URL = "https://router.huggingface.co/cerebras/v1/chat/completions"
19
 
 
20
  def respond(
21
  message,
22
  history: list[tuple[str, str]],
 
27
  frequency_penalty,
28
  seed,
29
  custom_model,
30
+ provider # New parameter for provider selection
31
  ):
 
 
 
32
  print(f"Received message: {message}")
33
+ print(f"History: {history}")
34
  print(f"System message: {system_message}")
35
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
36
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
37
  print(f"Selected model (custom_model): {custom_model}")
38
+ print(f"Selected provider: {provider}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # Convert seed to None if -1 (meaning random)
41
  if seed == -1:
42
  seed = None
43
 
44
+ # Prepare messages for API
45
  messages = [{"role": "system", "content": system_message}]
46
+ print("Initial messages array constructed.")
47
 
48
  # Add conversation history to the context
49
  for val in history:
50
+ user_part = val[0]
51
+ assistant_part = val[1]
52
+ if user_part:
53
+ messages.append({"role": "user", "content": user_part})
54
+ print(f"Added user message to context: {user_part}")
55
+ if assistant_part:
56
+ messages.append({"role": "assistant", "content": assistant_part})
57
+ print(f"Added assistant message to context: {assistant_part}")
58
 
59
  # Append the latest user message
60
  messages.append({"role": "user", "content": message})
61
+ print("Latest user message appended.")
62
 
63
  # If user provided a model, use that; otherwise, fall back to a default model
64
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
 
65
  print(f"Model selected for inference: {model_to_use}")
66
 
67
+ # Start with an empty string to build the response as tokens stream in
68
  response = ""
69
+
70
+ # Handle different providers
71
+ if provider == "hf-inference":
72
+ print("Using HF Inference API.")
73
+ # Use the OpenAI client for HF Inference
74
+ for message_chunk in hf_client.chat.completions.create(
75
  model=model_to_use,
76
  max_tokens=max_tokens,
77
  stream=True,
 
80
  frequency_penalty=frequency_penalty,
81
  seed=seed,
82
  messages=messages,
83
+ ):
 
84
  token_text = message_chunk.choices[0].delta.content
85
+ if token_text is not None: # Handle None values that might come in stream
86
+ print(f"Received token: {token_text}")
 
87
  response += token_text
88
  yield response
89
+
90
+ elif provider == "cerebras":
91
+ print("Using Cerebras API via HF Router.")
92
+
93
+ # Prepare headers and payload for the Cerebras API
94
+ headers = {
95
+ "Authorization": f"Bearer {ACCESS_TOKEN}",
96
+ "Content-Type": "application/json"
97
+ }
98
+
99
+ payload = {
100
+ "model": model_to_use,
101
+ "messages": messages,
102
+ "max_tokens": max_tokens,
103
+ "temperature": temperature,
104
+ "top_p": top_p,
105
+ "frequency_penalty": frequency_penalty,
106
+ "stream": True
107
+ }
108
+
109
+ if seed is not None:
110
+ payload["seed"] = seed
111
+
112
+ # Make the streaming request to Cerebras
113
+ with requests.post(
114
+ CEREBRAS_API_URL,
115
+ headers=headers,
116
+ json=payload,
117
+ stream=True
118
+ ) as req:
119
+ # Handle Server-Sent Events (SSE) format
120
+ for line in req.iter_lines():
121
+ if line:
122
+ # Skip the "data: " prefix
123
+ if line.startswith(b'data: '):
124
+ line = line[6:]
125
+
126
+ # Skip "[DONE]" message
127
+ if line == b'[DONE]':
128
+ continue
129
+
130
+ try:
131
+ # Parse the JSON chunk
132
+ chunk = json.loads(line)
133
+ token_text = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
134
+
135
+ if token_text:
136
+ print(f"Received Cerebras token: {token_text}")
137
+ response += token_text
138
+ yield response
139
+ except json.JSONDecodeError as e:
140
+ print(f"Error decoding JSON: {e}, Line: {line}")
141
+ continue
142
+
143
  print("Completed response generation.")
144
 
145
+ # GRADIO UI
146
 
147
+ chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
148
  print("Chatbot interface created.")
149
 
150
+ system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
151
+
152
+ max_tokens_slider = gr.Slider(
153
+ minimum=1,
154
+ maximum=4096,
155
+ value=512,
156
+ step=1,
157
+ label="Max new tokens"
158
+ )
159
+ temperature_slider = gr.Slider(
160
+ minimum=0.1,
161
+ maximum=4.0,
162
+ value=0.7,
163
+ step=0.1,
164
+ label="Temperature"
165
+ )
166
+ top_p_slider = gr.Slider(
167
+ minimum=0.1,
168
+ maximum=1.0,
169
+ value=0.95,
170
+ step=0.05,
171
+ label="Top-P"
172
+ )
173
+ frequency_penalty_slider = gr.Slider(
174
+ minimum=-2.0,
175
+ maximum=2.0,
176
+ value=0.0,
177
+ step=0.1,
178
+ label="Frequency Penalty"
179
+ )
180
+ seed_slider = gr.Slider(
181
+ minimum=-1,
182
+ maximum=65535,
183
+ value=-1,
184
+ step=1,
185
+ label="Seed (-1 for random)"
186
+ )
187
+
188
+ # The custom_model_box is what the respond function sees as "custom_model"
189
  custom_model_box = gr.Textbox(
190
  value="",
191
+ label="Custom Model",
192
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
193
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
194
  )
195
 
196
+ # New provider selection radio
197
+ provider_radio = gr.Radio(
198
  choices=["hf-inference", "cerebras"],
199
+ value="hf-inference",
200
  label="Inference Provider",
201
+ info="Select which inference provider to use"
202
  )
 
203
 
204
+ def set_custom_model_from_radio(selected):
205
+ """
206
+ This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
207
+ We will update the Custom Model text box with that selection automatically.
208
+ """
209
+ print(f"Featured model selected: {selected}")
210
+ return selected
211
 
 
212
  demo = gr.ChatInterface(
213
  fn=respond,
214
  additional_inputs=[
 
215
  system_message_box,
216
  max_tokens_slider,
217
  temperature_slider,
 
219
  frequency_penalty_slider,
220
  seed_slider,
221
  custom_model_box,
222
+ provider_radio, # Add provider selection to inputs
223
  ],
224
  fill_height=True,
225
  chatbot=chatbot,
226
  theme="Nymbo/Nymbo_Theme",
 
 
227
  )
228
  print("ChatInterface object created.")
229
 
 
230
  with demo:
 
231
  with gr.Accordion("Model Selection", open=False):
232
+ model_search_box = gr.Textbox(
233
+ label="Filter Models",
234
+ placeholder="Search for a featured model...",
235
+ lines=1
236
+ )
237
  print("Model search box created.")
238
 
 
239
  models_list = [
240
+ "meta-llama/Llama-3.3-70B-Instruct",
241
+ "meta-llama/Llama-3.1-70B-Instruct",
242
+ "meta-llama/Llama-3.0-70B-Instruct",
243
+ "meta-llama/Llama-3.2-3B-Instruct",
244
+ "meta-llama/Llama-3.2-1B-Instruct",
245
+ "meta-llama/Llama-3.1-8B-Instruct",
246
+ "NousResearch/Hermes-3-Llama-3.1-8B",
247
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
248
+ "mistralai/Mistral-Nemo-Instruct-2407",
249
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
250
+ "mistralai/Mistral-7B-Instruct-v0.3",
251
+ "mistralai/Mistral-7B-Instruct-v0.2",
252
+ "Qwen/Qwen3-235B-A22B",
253
+ "Qwen/Qwen3-32B",
254
+ "Qwen/Qwen2.5-72B-Instruct",
255
+ "Qwen/Qwen2.5-3B-Instruct",
256
+ "Qwen/Qwen2.5-0.5B-Instruct",
257
+ "Qwen/QwQ-32B",
258
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
259
+ "microsoft/Phi-3.5-mini-instruct",
260
+ "microsoft/Phi-3-mini-128k-instruct",
261
+ "microsoft/Phi-3-mini-4k-instruct",
262
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
263
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
264
+ "HuggingFaceH4/zephyr-7b-beta",
265
+ "HuggingFaceTB/SmolLM2-360M-Instruct",
266
+ "tiiuae/falcon-7b-instruct",
267
+ "01-ai/Yi-1.5-34B-Chat",
268
  ]
269
  print("Models list initialized.")
270
 
271
  featured_model_radio = gr.Radio(
272
+ label="Select a model below",
273
  choices=models_list,
274
+ value="meta-llama/Llama-3.3-70B-Instruct",
275
  interactive=True
276
  )
277
  print("Featured models radio button created.")
 
279
  def filter_models(search_term):
280
  print(f"Filtering models with search term: {search_term}")
281
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
 
 
 
 
 
 
 
 
282
  print(f"Filtered models: {filtered}")
283
+ return gr.update(choices=filtered)
284
 
285
+ model_search_box.change(
286
+ fn=filter_models,
287
+ inputs=model_search_box,
288
+ outputs=featured_model_radio
289
+ )
290
+ print("Model search box change event linked.")
291
 
292
+ featured_model_radio.change(
293
+ fn=set_custom_model_from_radio,
294
+ inputs=featured_model_radio,
295
+ outputs=custom_model_box
296
+ )
297
+ print("Featured model radio button change event linked.")
298
+
299
+ # Add new accordion for advanced settings including provider selection
 
 
300
  with gr.Accordion("Advanced Settings", open=False):
301
+ # The provider_radio is already defined above, we're just adding it to the UI here
302
+ gr.Markdown("### Inference Provider")
303
+ gr.Markdown("Select which provider to use for inference. Default is Hugging Face Inference API.")
304
+ # Provider radio is already included in the additional_inputs
305
+ gr.Markdown("Note: Different providers may support different models and parameters.")
 
 
 
 
 
 
 
306
 
307
+ print("Gradio interface initialized.")
308
 
309
  if __name__ == "__main__":
310
  print("Launching the demo application.")
311
+ demo.launch(show_api=True)