Update app.py
Browse files
app.py
CHANGED
@@ -3,27 +3,55 @@ import spaces
|
|
3 |
from transformers import pipeline
|
4 |
import torch
|
5 |
|
6 |
-
# Global variable to store
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
@spaces.GPU
|
10 |
-
def initialize_model():
|
11 |
-
global
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
@spaces.GPU
|
22 |
-
def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
|
23 |
-
"""Generate response using the
|
24 |
|
25 |
# Initialize model inside the GPU-decorated function
|
26 |
-
|
|
|
|
|
|
|
27 |
|
28 |
# Format the conversation history
|
29 |
messages = []
|
@@ -39,24 +67,52 @@ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0
|
|
39 |
|
40 |
# Generate response
|
41 |
try:
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
# Extract the generated text
|
52 |
-
|
|
|
|
|
|
|
53 |
|
54 |
-
#
|
55 |
if isinstance(generated_text, list):
|
56 |
assistant_response = generated_text[-1]['content']
|
57 |
else:
|
58 |
-
#
|
59 |
-
assistant_response = str(generated_text).
|
|
|
|
|
60 |
|
61 |
return assistant_response
|
62 |
|
@@ -65,18 +121,28 @@ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0
|
|
65 |
|
66 |
# Create the Gradio interface
|
67 |
def create_interface():
|
68 |
-
with gr.Blocks(title="
|
69 |
gr.Markdown("""
|
70 |
-
# 🚀 Nous-V1
|
71 |
|
72 |
-
Chat with the Nous-V1
|
73 |
|
74 |
-
**
|
|
|
|
|
75 |
""")
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
chatbot = gr.Chatbot(
|
78 |
height=400,
|
79 |
-
placeholder="
|
80 |
label="Chat"
|
81 |
)
|
82 |
|
@@ -96,33 +162,37 @@ def create_interface():
|
|
96 |
maximum=8192,
|
97 |
value=2048,
|
98 |
step=50,
|
99 |
-
label="Max Length"
|
|
|
100 |
)
|
101 |
temperature = gr.Slider(
|
102 |
minimum=0.1,
|
103 |
maximum=2.0,
|
104 |
value=0.7,
|
105 |
step=0.1,
|
106 |
-
label="Temperature"
|
|
|
107 |
)
|
108 |
top_p = gr.Slider(
|
109 |
minimum=0.1,
|
110 |
maximum=1.0,
|
111 |
value=0.9,
|
112 |
step=0.1,
|
113 |
-
label="Top P"
|
|
|
114 |
)
|
115 |
|
116 |
# Event handlers
|
117 |
def user_message(message, history):
|
118 |
return "", history + [[message, None]]
|
119 |
|
120 |
-
def bot_response(history, max_len, temp, top_p):
|
121 |
if history:
|
122 |
user_message = history[-1][0]
|
123 |
bot_message = generate_response(
|
124 |
user_message,
|
125 |
history[:-1],
|
|
|
126 |
max_len,
|
127 |
temp,
|
128 |
top_p
|
@@ -130,31 +200,34 @@ def create_interface():
|
|
130 |
history[-1][1] = bot_message
|
131 |
return history
|
132 |
|
|
|
|
|
|
|
133 |
# Wire up the events
|
134 |
msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
|
135 |
-
bot_response, [chatbot, max_length, temperature, top_p], chatbot
|
136 |
)
|
137 |
|
138 |
submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
|
139 |
-
bot_response, [chatbot, max_length, temperature, top_p], chatbot
|
140 |
)
|
141 |
|
142 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
143 |
|
|
|
|
|
144 |
gr.Markdown("""
|
145 |
---
|
146 |
|
147 |
-
### About Nous-V1
|
|
|
|
|
148 |
|
149 |
-
Nous-V1-
|
150 |
-
It's designed for efficient text generation and conversation.
|
151 |
|
152 |
-
|
153 |
-
- 4B parameters for efficient inference
|
154 |
-
- Optimizsd for conversational AI
|
155 |
-
- Supports various text generation tasks
|
156 |
|
157 |
-
This Space uses ZeroGPU for efficient GPU allocation.
|
158 |
""")
|
159 |
|
160 |
return demo
|
|
|
3 |
from transformers import pipeline
|
4 |
import torch
|
5 |
|
6 |
+
# Global variable to store pipelines
|
7 |
+
model_cache = {}
|
8 |
+
|
9 |
+
# Available models
|
10 |
+
AVAILABLE_MODELS = {
|
11 |
+
"Nous-V1-4B": "apexion-ai/Nous-V1-4B",
|
12 |
+
"Nous-V1-8B": "apexion-ai/Nous-V1-8B",
|
13 |
+
}
|
14 |
|
15 |
@spaces.GPU
|
16 |
+
def initialize_model(model_name):
|
17 |
+
global model_cache
|
18 |
+
|
19 |
+
if model_name not in AVAILABLE_MODELS:
|
20 |
+
raise ValueError(f"Model {model_name} not found in available models")
|
21 |
+
|
22 |
+
model_id = AVAILABLE_MODELS[model_name]
|
23 |
+
|
24 |
+
# Check if model is already cached
|
25 |
+
if model_id not in model_cache:
|
26 |
+
try:
|
27 |
+
model_cache[model_id] = pipeline(
|
28 |
+
"text-generation",
|
29 |
+
model=model_id,
|
30 |
+
torch_dtype=torch.float16,
|
31 |
+
device_map="auto",
|
32 |
+
trust_remote_code=True
|
33 |
+
)
|
34 |
+
except Exception as e:
|
35 |
+
# Fallback to CPU if GPU fails
|
36 |
+
model_cache[model_id] = pipeline(
|
37 |
+
"text-generation",
|
38 |
+
model=model_id,
|
39 |
+
torch_dtype=torch.float32,
|
40 |
+
device_map="cpu",
|
41 |
+
trust_remote_code=True
|
42 |
+
)
|
43 |
+
|
44 |
+
return model_cache[model_id]
|
45 |
|
46 |
@spaces.GPU
|
47 |
+
def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
|
48 |
+
"""Generate response using the selected model"""
|
49 |
|
50 |
# Initialize model inside the GPU-decorated function
|
51 |
+
try:
|
52 |
+
model_pipe = initialize_model(model_name)
|
53 |
+
except Exception as e:
|
54 |
+
return f"Error loading model {model_name}: {str(e)}"
|
55 |
|
56 |
# Format the conversation history
|
57 |
messages = []
|
|
|
67 |
|
68 |
# Generate response
|
69 |
try:
|
70 |
+
# Some models may not support the messages format, so we'll try different approaches
|
71 |
+
try:
|
72 |
+
# Try with messages format first
|
73 |
+
response = model_pipe(
|
74 |
+
messages,
|
75 |
+
max_length=max_length,
|
76 |
+
temperature=temperature,
|
77 |
+
top_p=top_p,
|
78 |
+
do_sample=True,
|
79 |
+
pad_token_id=model_pipe.tokenizer.eos_token_id,
|
80 |
+
return_full_text=False
|
81 |
+
)
|
82 |
+
except:
|
83 |
+
# Fallback to simple text format
|
84 |
+
conversation_text = ""
|
85 |
+
for msg in messages:
|
86 |
+
if msg["role"] == "user":
|
87 |
+
conversation_text += f"User: {msg['content']}\n"
|
88 |
+
else:
|
89 |
+
conversation_text += f"Assistant: {msg['content']}\n"
|
90 |
+
conversation_text += "Assistant:"
|
91 |
+
|
92 |
+
response = model_pipe(
|
93 |
+
conversation_text,
|
94 |
+
max_length=max_length,
|
95 |
+
temperature=temperature,
|
96 |
+
top_p=top_p,
|
97 |
+
do_sample=True,
|
98 |
+
pad_token_id=model_pipe.tokenizer.eos_token_id,
|
99 |
+
return_full_text=False
|
100 |
+
)
|
101 |
|
102 |
# Extract the generated text
|
103 |
+
if isinstance(response, list) and len(response) > 0:
|
104 |
+
generated_text = response[0]['generated_text']
|
105 |
+
else:
|
106 |
+
generated_text = str(response)
|
107 |
|
108 |
+
# Clean up the response
|
109 |
if isinstance(generated_text, list):
|
110 |
assistant_response = generated_text[-1]['content']
|
111 |
else:
|
112 |
+
# Remove the prompt and extract assistant response
|
113 |
+
assistant_response = str(generated_text).strip()
|
114 |
+
if "Assistant:" in assistant_response:
|
115 |
+
assistant_response = assistant_response.split("Assistant:")[-1].strip()
|
116 |
|
117 |
return assistant_response
|
118 |
|
|
|
121 |
|
122 |
# Create the Gradio interface
|
123 |
def create_interface():
|
124 |
+
with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
|
125 |
gr.Markdown("""
|
126 |
+
# 🚀 Nous-V1 Model Chat Interface
|
127 |
|
128 |
+
Chat with the Nous-V1 models by Apexion AI. Choose between the 4B and 8B parameter versions.
|
129 |
|
130 |
+
**Available Models:**
|
131 |
+
- Nous-V1-4B (4 billion parameters)
|
132 |
+
- Nous-V1-8B (8 billion parameters)
|
133 |
""")
|
134 |
|
135 |
+
with gr.Row():
|
136 |
+
model_selector = gr.Dropdown(
|
137 |
+
choices=list(AVAILABLE_MODELS.keys()),
|
138 |
+
value="Nous-V1-4B",
|
139 |
+
label="Select Model",
|
140 |
+
info="Choose which model to use for generation"
|
141 |
+
)
|
142 |
+
|
143 |
chatbot = gr.Chatbot(
|
144 |
height=400,
|
145 |
+
placeholder="Select a model and start chatting...",
|
146 |
label="Chat"
|
147 |
)
|
148 |
|
|
|
162 |
maximum=8192,
|
163 |
value=2048,
|
164 |
step=50,
|
165 |
+
label="Max Length",
|
166 |
+
info="Maximum length of generated response"
|
167 |
)
|
168 |
temperature = gr.Slider(
|
169 |
minimum=0.1,
|
170 |
maximum=2.0,
|
171 |
value=0.7,
|
172 |
step=0.1,
|
173 |
+
label="Temperature",
|
174 |
+
info="Controls randomness in generation"
|
175 |
)
|
176 |
top_p = gr.Slider(
|
177 |
minimum=0.1,
|
178 |
maximum=1.0,
|
179 |
value=0.9,
|
180 |
step=0.1,
|
181 |
+
label="Top P",
|
182 |
+
info="Controls diversity via nucleus sampling"
|
183 |
)
|
184 |
|
185 |
# Event handlers
|
186 |
def user_message(message, history):
|
187 |
return "", history + [[message, None]]
|
188 |
|
189 |
+
def bot_response(history, model_name, max_len, temp, top_p):
|
190 |
if history:
|
191 |
user_message = history[-1][0]
|
192 |
bot_message = generate_response(
|
193 |
user_message,
|
194 |
history[:-1],
|
195 |
+
model_name,
|
196 |
max_len,
|
197 |
temp,
|
198 |
top_p
|
|
|
200 |
history[-1][1] = bot_message
|
201 |
return history
|
202 |
|
203 |
+
def model_changed(model_name):
|
204 |
+
return gr.update(placeholder=f"Chat with {model_name}...")
|
205 |
+
|
206 |
# Wire up the events
|
207 |
msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
|
208 |
+
bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
|
209 |
)
|
210 |
|
211 |
submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
|
212 |
+
bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
|
213 |
)
|
214 |
|
215 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
216 |
|
217 |
+
model_selector.change(model_changed, model_selector, chatbot)
|
218 |
+
|
219 |
gr.Markdown("""
|
220 |
---
|
221 |
|
222 |
+
### About the Nous-V1 Models
|
223 |
+
|
224 |
+
**Nous-V1-4B**: 4 billion parameter model by Apexion AI, optimized for efficient conversation and text generation
|
225 |
|
226 |
+
**Nous-V1-8B**: 8 billion parameter model by Apexion AI, offering enhanced capabilities and better performance for complex tasks
|
|
|
227 |
|
228 |
+
Both models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources.
|
|
|
|
|
|
|
229 |
|
230 |
+
This Space uses ZeroGPU for efficient GPU allocation across both model sizes.
|
231 |
""")
|
232 |
|
233 |
return demo
|