Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,7 @@ MODELS = {
|
|
20 |
@spaces.GPU
|
21 |
def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
|
22 |
"""Generate response using ZeroGPU - all CUDA operations happen here"""
|
|
|
23 |
print(f"π Loading {model_id}...")
|
24 |
start_time = time.time()
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
@@ -33,16 +34,25 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
|
|
33 |
)
|
34 |
load_time = time.time() - start_time
|
35 |
print(f"β
Model loaded in {load_time:.2f}s")
|
36 |
-
|
|
|
37 |
messages = []
|
38 |
-
system_prompt =
|
|
|
|
|
|
|
|
|
|
|
39 |
messages.append({"role": "system", "content": system_prompt})
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
if
|
44 |
-
messages.append({"role": "
|
|
|
|
|
45 |
messages.append({"role": "user", "content": user_message})
|
|
|
46 |
prompt = tokenizer.apply_chat_template(
|
47 |
messages,
|
48 |
tokenize=False,
|
@@ -96,28 +106,10 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
|
96 |
gr.Markdown("# π Athena Playground Chat")
|
97 |
gr.Markdown("*Powered by HuggingFace ZeroGPU*")
|
98 |
|
99 |
-
#
|
100 |
-
model_choice = gr.Dropdown(
|
101 |
-
label="π± Model",
|
102 |
-
choices=list(MODELS.keys()),
|
103 |
-
value="Athena-R3X 8B",
|
104 |
-
info="Select which Athena model to use"
|
105 |
-
)
|
106 |
-
max_length = gr.Slider(
|
107 |
-
32, 2048, value=512,
|
108 |
-
label="π Max Tokens",
|
109 |
-
info="Maximum number of tokens to generate"
|
110 |
-
)
|
111 |
-
temperature = gr.Slider(
|
112 |
-
0.1, 2.0, value=0.7,
|
113 |
-
label="π¨ Creativity",
|
114 |
-
info="Higher values = more creative responses"
|
115 |
-
)
|
116 |
-
|
117 |
-
# 2. Create the chat interface, passing the controls as additional_inputs
|
118 |
chat_interface = gr.ChatInterface(
|
119 |
fn=respond,
|
120 |
-
additional_inputs=[
|
121 |
title="Chat with Athena",
|
122 |
description="Ask Athena anything!",
|
123 |
theme="soft",
|
@@ -137,10 +129,28 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
|
137 |
type="messages"
|
138 |
)
|
139 |
|
140 |
-
#
|
141 |
-
|
142 |
-
|
143 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
|
145 |
if __name__ == "__main__":
|
146 |
demo.launch()
|
|
|
20 |
@spaces.GPU
|
21 |
def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
|
22 |
"""Generate response using ZeroGPU - all CUDA operations happen here"""
|
23 |
+
|
24 |
print(f"π Loading {model_id}...")
|
25 |
start_time = time.time()
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
34 |
)
|
35 |
load_time = time.time() - start_time
|
36 |
print(f"β
Model loaded in {load_time:.2f}s")
|
37 |
+
|
38 |
+
# Build messages in proper chat format (OpenAI-style messages)
|
39 |
messages = []
|
40 |
+
system_prompt = (
|
41 |
+
"You are Athena, a helpful, harmless, and honest AI assistant. "
|
42 |
+
"You provide clear, accurate, and concise responses to user questions. "
|
43 |
+
"You are knowledgeable across many domains and always aim to be respectful and helpful. "
|
44 |
+
"You are finetuned by Aayan Mishra"
|
45 |
+
)
|
46 |
messages.append({"role": "system", "content": system_prompt})
|
47 |
+
|
48 |
+
# Add conversation history (OpenAI-style)
|
49 |
+
for msg in conversation:
|
50 |
+
if msg["role"] in ("user", "assistant"):
|
51 |
+
messages.append({"role": msg["role"], "content": msg["content"]})
|
52 |
+
|
53 |
+
# Add current user message
|
54 |
messages.append({"role": "user", "content": user_message})
|
55 |
+
|
56 |
prompt = tokenizer.apply_chat_template(
|
57 |
messages,
|
58 |
tokenize=False,
|
|
|
106 |
gr.Markdown("# π Athena Playground Chat")
|
107 |
gr.Markdown("*Powered by HuggingFace ZeroGPU*")
|
108 |
|
109 |
+
# --- Main chat interface ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
chat_interface = gr.ChatInterface(
|
111 |
fn=respond,
|
112 |
+
additional_inputs=[],
|
113 |
title="Chat with Athena",
|
114 |
description="Ask Athena anything!",
|
115 |
theme="soft",
|
|
|
129 |
type="messages"
|
130 |
)
|
131 |
|
132 |
+
# --- Configuration controls at the bottom ---
|
133 |
+
gr.Markdown("### βοΈ Model & Generation Settings")
|
134 |
+
with gr.Row():
|
135 |
+
model_choice = gr.Dropdown(
|
136 |
+
label="π± Model",
|
137 |
+
choices=list(MODELS.keys()),
|
138 |
+
value="Athena-R3X 8B",
|
139 |
+
info="Select which Athena model to use"
|
140 |
+
)
|
141 |
+
max_length = gr.Slider(
|
142 |
+
32, 2048, value=512,
|
143 |
+
label="π Max Tokens",
|
144 |
+
info="Maximum number of tokens to generate"
|
145 |
+
)
|
146 |
+
temperature = gr.Slider(
|
147 |
+
0.1, 2.0, value=0.7,
|
148 |
+
label="π¨ Creativity",
|
149 |
+
info="Higher values = more creative responses"
|
150 |
+
)
|
151 |
+
|
152 |
+
# --- Link the config controls to the chat interface ---
|
153 |
+
chat_interface.additional_inputs = [model_choice, max_length, temperature]
|
154 |
|
155 |
if __name__ == "__main__":
|
156 |
demo.launch()
|