Spestly commited on
Commit
949aa02
·
verified ·
1 Parent(s): e5eb33d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -46
app.py CHANGED
@@ -3,27 +3,55 @@ import spaces
3
  from transformers import pipeline
4
  import torch
5
 
6
- # Global variable to store the pipeline
7
- pipe = None
 
 
 
 
 
 
8
 
9
  @spaces.GPU
10
- def initialize_model():
11
- global pipe
12
- if pipe is None:
13
- pipe = pipeline(
14
- "text-generation",
15
- model="apexion-ai/Nous-V1-4B",
16
- torch_dtype=torch.float16,
17
- device_map="auto"
18
- )
19
- return pipe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @spaces.GPU
22
- def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
23
- """Generate response using the Orion model"""
24
 
25
  # Initialize model inside the GPU-decorated function
26
- model_pipe = initialize_model()
 
 
 
27
 
28
  # Format the conversation history
29
  messages = []
@@ -39,24 +67,52 @@ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0
39
 
40
  # Generate response
41
  try:
42
- response = model_pipe(
43
- messages,
44
- max_length=max_length,
45
- temperature=temperature,
46
- top_p=top_p,
47
- do_sample=True,
48
- pad_token_id=model_pipe.tokenizer.eos_token_id
49
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  # Extract the generated text
52
- generated_text = response[0]['generated_text']
 
 
 
53
 
54
- # Get the last assistant message
55
  if isinstance(generated_text, list):
56
  assistant_response = generated_text[-1]['content']
57
  else:
58
- # Fallback parsing if needed
59
- assistant_response = str(generated_text).split("assistant")[-1].strip()
 
 
60
 
61
  return assistant_response
62
 
@@ -65,18 +121,28 @@ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0
65
 
66
  # Create the Gradio interface
67
  def create_interface():
68
- with gr.Blocks(title="Nous-V1-4B Chat", theme=gr.themes.Soft()) as demo:
69
  gr.Markdown("""
70
- # 🚀 Nous-V1-4B Chat
71
 
72
- Chat with the Nous-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation.
73
 
74
- **Model:** `apexion-ai/Nous-V1-4B`
 
 
75
  """)
76
 
 
 
 
 
 
 
 
 
77
  chatbot = gr.Chatbot(
78
  height=400,
79
- placeholder="Start chatting with Nous-V1-4B...",
80
  label="Chat"
81
  )
82
 
@@ -96,33 +162,37 @@ def create_interface():
96
  maximum=8192,
97
  value=2048,
98
  step=50,
99
- label="Max Length"
 
100
  )
101
  temperature = gr.Slider(
102
  minimum=0.1,
103
  maximum=2.0,
104
  value=0.7,
105
  step=0.1,
106
- label="Temperature"
 
107
  )
108
  top_p = gr.Slider(
109
  minimum=0.1,
110
  maximum=1.0,
111
  value=0.9,
112
  step=0.1,
113
- label="Top P"
 
114
  )
115
 
116
  # Event handlers
117
  def user_message(message, history):
118
  return "", history + [[message, None]]
119
 
120
- def bot_response(history, max_len, temp, top_p):
121
  if history:
122
  user_message = history[-1][0]
123
  bot_message = generate_response(
124
  user_message,
125
  history[:-1],
 
126
  max_len,
127
  temp,
128
  top_p
@@ -130,31 +200,34 @@ def create_interface():
130
  history[-1][1] = bot_message
131
  return history
132
 
 
 
 
133
  # Wire up the events
134
  msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
135
- bot_response, [chatbot, max_length, temperature, top_p], chatbot
136
  )
137
 
138
  submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
139
- bot_response, [chatbot, max_length, temperature, top_p], chatbot
140
  )
141
 
142
  clear_btn.click(lambda: None, None, chatbot, queue=False)
143
 
 
 
144
  gr.Markdown("""
145
  ---
146
 
147
- ### About Nous-V1-4B
 
 
148
 
149
- Nous-V1-4B is a 4 billion parameter language model developed by Apexion AI.
150
- It's designed for efficient text generation and conversation.
151
 
152
- **Features:**
153
- - 4B parameters for efficient inference
154
- - Optimizsd for conversational AI
155
- - Supports various text generation tasks
156
 
157
- This Space uses ZeroGPU for efficient GPU allocation.
158
  """)
159
 
160
  return demo
 
3
  from transformers import pipeline
4
  import torch
5
 
6
+ # Global variable to store pipelines
7
+ model_cache = {}
8
+
9
+ # Available models
10
+ AVAILABLE_MODELS = {
11
+ "Nous-V1-4B": "apexion-ai/Nous-V1-4B",
12
+ "Nous-V1-8B": "apexion-ai/Nous-V1-8B",
13
+ }
14
 
15
  @spaces.GPU
16
+ def initialize_model(model_name):
17
+ global model_cache
18
+
19
+ if model_name not in AVAILABLE_MODELS:
20
+ raise ValueError(f"Model {model_name} not found in available models")
21
+
22
+ model_id = AVAILABLE_MODELS[model_name]
23
+
24
+ # Check if model is already cached
25
+ if model_id not in model_cache:
26
+ try:
27
+ model_cache[model_id] = pipeline(
28
+ "text-generation",
29
+ model=model_id,
30
+ torch_dtype=torch.float16,
31
+ device_map="auto",
32
+ trust_remote_code=True
33
+ )
34
+ except Exception as e:
35
+ # Fallback to CPU if GPU fails
36
+ model_cache[model_id] = pipeline(
37
+ "text-generation",
38
+ model=model_id,
39
+ torch_dtype=torch.float32,
40
+ device_map="cpu",
41
+ trust_remote_code=True
42
+ )
43
+
44
+ return model_cache[model_id]
45
 
46
  @spaces.GPU
47
+ def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
48
+ """Generate response using the selected model"""
49
 
50
  # Initialize model inside the GPU-decorated function
51
+ try:
52
+ model_pipe = initialize_model(model_name)
53
+ except Exception as e:
54
+ return f"Error loading model {model_name}: {str(e)}"
55
 
56
  # Format the conversation history
57
  messages = []
 
67
 
68
  # Generate response
69
  try:
70
+ # Some models may not support the messages format, so we'll try different approaches
71
+ try:
72
+ # Try with messages format first
73
+ response = model_pipe(
74
+ messages,
75
+ max_length=max_length,
76
+ temperature=temperature,
77
+ top_p=top_p,
78
+ do_sample=True,
79
+ pad_token_id=model_pipe.tokenizer.eos_token_id,
80
+ return_full_text=False
81
+ )
82
+ except:
83
+ # Fallback to simple text format
84
+ conversation_text = ""
85
+ for msg in messages:
86
+ if msg["role"] == "user":
87
+ conversation_text += f"User: {msg['content']}\n"
88
+ else:
89
+ conversation_text += f"Assistant: {msg['content']}\n"
90
+ conversation_text += "Assistant:"
91
+
92
+ response = model_pipe(
93
+ conversation_text,
94
+ max_length=max_length,
95
+ temperature=temperature,
96
+ top_p=top_p,
97
+ do_sample=True,
98
+ pad_token_id=model_pipe.tokenizer.eos_token_id,
99
+ return_full_text=False
100
+ )
101
 
102
  # Extract the generated text
103
+ if isinstance(response, list) and len(response) > 0:
104
+ generated_text = response[0]['generated_text']
105
+ else:
106
+ generated_text = str(response)
107
 
108
+ # Clean up the response
109
  if isinstance(generated_text, list):
110
  assistant_response = generated_text[-1]['content']
111
  else:
112
+ # Remove the prompt and extract assistant response
113
+ assistant_response = str(generated_text).strip()
114
+ if "Assistant:" in assistant_response:
115
+ assistant_response = assistant_response.split("Assistant:")[-1].strip()
116
 
117
  return assistant_response
118
 
 
121
 
122
  # Create the Gradio interface
123
  def create_interface():
124
+ with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
125
  gr.Markdown("""
126
+ # 🚀 Nous-V1 Model Chat Interface
127
 
128
+ Chat with the Nous-V1 models by Apexion AI. Choose between the 4B and 8B parameter versions.
129
 
130
+ **Available Models:**
131
+ - Nous-V1-4B (4 billion parameters)
132
+ - Nous-V1-8B (8 billion parameters)
133
  """)
134
 
135
+ with gr.Row():
136
+ model_selector = gr.Dropdown(
137
+ choices=list(AVAILABLE_MODELS.keys()),
138
+ value="Nous-V1-4B",
139
+ label="Select Model",
140
+ info="Choose which model to use for generation"
141
+ )
142
+
143
  chatbot = gr.Chatbot(
144
  height=400,
145
+ placeholder="Select a model and start chatting...",
146
  label="Chat"
147
  )
148
 
 
162
  maximum=8192,
163
  value=2048,
164
  step=50,
165
+ label="Max Length",
166
+ info="Maximum length of generated response"
167
  )
168
  temperature = gr.Slider(
169
  minimum=0.1,
170
  maximum=2.0,
171
  value=0.7,
172
  step=0.1,
173
+ label="Temperature",
174
+ info="Controls randomness in generation"
175
  )
176
  top_p = gr.Slider(
177
  minimum=0.1,
178
  maximum=1.0,
179
  value=0.9,
180
  step=0.1,
181
+ label="Top P",
182
+ info="Controls diversity via nucleus sampling"
183
  )
184
 
185
  # Event handlers
186
  def user_message(message, history):
187
  return "", history + [[message, None]]
188
 
189
+ def bot_response(history, model_name, max_len, temp, top_p):
190
  if history:
191
  user_message = history[-1][0]
192
  bot_message = generate_response(
193
  user_message,
194
  history[:-1],
195
+ model_name,
196
  max_len,
197
  temp,
198
  top_p
 
200
  history[-1][1] = bot_message
201
  return history
202
 
203
+ def model_changed(model_name):
204
+ return gr.update(placeholder=f"Chat with {model_name}...")
205
+
206
  # Wire up the events
207
  msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
208
+ bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
209
  )
210
 
211
  submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
212
+ bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
213
  )
214
 
215
  clear_btn.click(lambda: None, None, chatbot, queue=False)
216
 
217
+ model_selector.change(model_changed, model_selector, chatbot)
218
+
219
  gr.Markdown("""
220
  ---
221
 
222
+ ### About the Nous-V1 Models
223
+
224
+ **Nous-V1-4B**: 4 billion parameter model by Apexion AI, optimized for efficient conversation and text generation
225
 
226
+ **Nous-V1-8B**: 8 billion parameter model by Apexion AI, offering enhanced capabilities and better performance for complex tasks
 
227
 
228
+ Both models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources.
 
 
 
229
 
230
+ This Space uses ZeroGPU for efficient GPU allocation across both model sizes.
231
  """)
232
 
233
  return demo