Spestly commited on
Commit
d75f179
·
verified ·
1 Parent(s): 15171d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -2
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import spaces
3
  from transformers import pipeline
4
  import torch
 
5
 
6
  # Global variable to store pipelines
7
  model_cache = {}
@@ -120,6 +121,69 @@ def generate_response(message, history, model_name, max_length=512, temperature=
120
  except Exception as e:
121
  return f"Error generating response: {str(e)}"
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  # Create the Gradio interface
124
  def create_interface():
125
  with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
@@ -222,7 +286,6 @@ def create_interface():
222
  ---
223
 
224
  ### About the Nous-1 Models
225
-
226
  **Nous-1-2B**: 2 billion parameter model by Apexion AI, designed for fast and quick infrencing
227
 
228
  **Nous-1-4B**: 4 billion parameter model by Apexion AI, optimisd for efficient conversation and text generation
@@ -239,4 +302,5 @@ def create_interface():
239
  # Launch the app
240
  if __name__ == "__main__":
241
  demo = create_interface()
242
- demo.launch()
 
 
2
  import spaces
3
  from transformers import pipeline
4
  import torch
5
+ from typing import List, Dict, Optional
6
 
7
  # Global variable to store pipelines
8
  model_cache = {}
 
121
  except Exception as e:
122
  return f"Error generating response: {str(e)}"
123
 
124
+ @spaces.GPU
125
+ def generate(
126
+ model: str,
127
+ user_input: str,
128
+ history: Optional[str] = "",
129
+ temperature: float = 0.7,
130
+ system_prompt: Optional[str] = "",
131
+ max_tokens: int = 512
132
+ ):
133
+ """
134
+ API endpoint for LLM generation
135
+
136
+ Args:
137
+ model: Model name to use (Nous-1-2B, Nous-1-4B, or Nous-1-8B)
138
+ user_input: Current user message/input
139
+ history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
140
+ temperature: Temperature for generation (0.1-2.0)
141
+ system_prompt: System prompt to guide the model
142
+ max_tokens: Maximum tokens to generate (1-8192)
143
+
144
+ Returns:
145
+ Generated response from the model
146
+ """
147
+
148
+ # Validate model
149
+ if model not in AVAILABLE_MODELS:
150
+ return f"Error: Model {model} not available. Available models: {list(AVAILABLE_MODELS.keys())}"
151
+
152
+ # Initialize model
153
+ try:
154
+ model_pipe = initialize_model(model)
155
+ except Exception as e:
156
+ return f"Error loading model {model}: {str(e)}"
157
+
158
+ # Parse history if provided and convert to gradio format
159
+ gradio_history = []
160
+ if history and history.strip():
161
+ try:
162
+ import json
163
+ history_list = json.loads(history)
164
+ current_pair = [None, None]
165
+ for msg in history_list:
166
+ if isinstance(msg, dict) and "role" in msg and "content" in msg:
167
+ if msg["role"] == "user":
168
+ if current_pair[0] is not None:
169
+ gradio_history.append([current_pair[0], current_pair[1]])
170
+ current_pair = [msg["content"], None]
171
+ elif msg["role"] == "assistant":
172
+ current_pair[1] = msg["content"]
173
+ if current_pair[0] is not None:
174
+ gradio_history.append([current_pair[0], current_pair[1]])
175
+ except:
176
+ # If history parsing fails, continue without history
177
+ pass
178
+
179
+ # Add system prompt to user input if provided
180
+ final_user_input = user_input
181
+ if system_prompt and system_prompt.strip():
182
+ final_user_input = f"System: {system_prompt}\n\nUser: {user_input}"
183
+
184
+ # Use the original generate_response function
185
+ return generate_response(final_user_input, gradio_history, model, max_tokens, temperature, 0.9)
186
+
187
  # Create the Gradio interface
188
  def create_interface():
189
  with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
 
286
  ---
287
 
288
  ### About the Nous-1 Models
 
289
  **Nous-1-2B**: 2 billion parameter model by Apexion AI, designed for fast and quick infrencing
290
 
291
  **Nous-1-4B**: 4 billion parameter model by Apexion AI, optimisd for efficient conversation and text generation
 
302
  # Launch the app
303
  if __name__ == "__main__":
304
  demo = create_interface()
305
+ # Enable API and launch
306
+ demo.launch(share=True)