Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,6 @@ MODELS = {
|
|
20 |
@spaces.GPU
|
21 |
def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
|
22 |
"""Generate response using ZeroGPU - all CUDA operations happen here"""
|
23 |
-
|
24 |
print(f"π Loading {model_id}...")
|
25 |
start_time = time.time()
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
@@ -106,10 +105,28 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
|
106 |
gr.Markdown("# π Athena Playground Chat")
|
107 |
gr.Markdown("*Powered by HuggingFace ZeroGPU*")
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
# --- Main chat interface ---
|
110 |
chat_interface = gr.ChatInterface(
|
111 |
fn=respond,
|
112 |
-
additional_inputs=[],
|
113 |
title="Chat with Athena",
|
114 |
description="Ask Athena anything!",
|
115 |
theme="soft",
|
@@ -132,25 +149,9 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
|
132 |
# --- Configuration controls at the bottom ---
|
133 |
gr.Markdown("### βοΈ Model & Generation Settings")
|
134 |
with gr.Row():
|
135 |
-
model_choice
|
136 |
-
|
137 |
-
|
138 |
-
value="Athena-R3X 4B",
|
139 |
-
info="Select which Athena model to use"
|
140 |
-
)
|
141 |
-
max_length = gr.Slider(
|
142 |
-
32, 2048, value=512,
|
143 |
-
label="π Max Tokens",
|
144 |
-
info="Maximum number of tokens to generate"
|
145 |
-
)
|
146 |
-
temperature = gr.Slider(
|
147 |
-
0.1, 2.0, value=0.7,
|
148 |
-
label="π¨ Creativity",
|
149 |
-
info="Higher values = more creative responses"
|
150 |
-
)
|
151 |
-
|
152 |
-
# --- Link the config controls to the chat interface ---
|
153 |
-
chat_interface.additional_inputs = [model_choice, max_length, temperature]
|
154 |
|
155 |
if __name__ == "__main__":
|
156 |
-
demo.launch()
|
|
|
20 |
@spaces.GPU
|
21 |
def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
|
22 |
"""Generate response using ZeroGPU - all CUDA operations happen here"""
|
|
|
23 |
print(f"π Loading {model_id}...")
|
24 |
start_time = time.time()
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
105 |
gr.Markdown("# π Athena Playground Chat")
|
106 |
gr.Markdown("*Powered by HuggingFace ZeroGPU*")
|
107 |
|
108 |
+
# --- Create config controls first ---
|
109 |
+
model_choice = gr.Dropdown(
|
110 |
+
label="π± Model",
|
111 |
+
choices=list(MODELS.keys()),
|
112 |
+
value="Athena-R3X 4B",
|
113 |
+
info="Select which Athena model to use"
|
114 |
+
)
|
115 |
+
max_length = gr.Slider(
|
116 |
+
32, 2048, value=512,
|
117 |
+
label="π Max Tokens",
|
118 |
+
info="Maximum number of tokens to generate"
|
119 |
+
)
|
120 |
+
temperature = gr.Slider(
|
121 |
+
0.1, 2.0, value=0.7,
|
122 |
+
label="π¨ Creativity",
|
123 |
+
info="Higher values = more creative responses"
|
124 |
+
)
|
125 |
+
|
126 |
# --- Main chat interface ---
|
127 |
chat_interface = gr.ChatInterface(
|
128 |
fn=respond,
|
129 |
+
additional_inputs=[model_choice, max_length, temperature],
|
130 |
title="Chat with Athena",
|
131 |
description="Ask Athena anything!",
|
132 |
theme="soft",
|
|
|
149 |
# --- Configuration controls at the bottom ---
|
150 |
gr.Markdown("### βοΈ Model & Generation Settings")
|
151 |
with gr.Row():
|
152 |
+
model_choice.render()
|
153 |
+
max_length.render()
|
154 |
+
temperature.render()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
if __name__ == "__main__":
|
157 |
+
demo.launch()
|