Nous-1 / app.py
Spestly's picture
Update app.py
dc45496 verified
raw
history blame
4.98 kB
import gradio as gr
import spaces
from transformers import pipeline
import torch
# Global variable to store the pipeline
pipe = None
@spaces.GPU
def initialize_model():
global pipe
if pipe is None:
pipe = pipeline(
"text-generation",
model="apexion-ai/Orion-V1-4B",
torch_dtype=torch.float16,
device_map="auto"
)
return pipe
@spaces.GPU
def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
"""Generate response using the Orion model"""
# Initialize model inside the GPU-decorated function
model_pipe = initialize_model()
# Format the conversation history
messages = []
# Add conversation history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Generate response
try:
response = model_pipe(
messages,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=model_pipe.tokenizer.eos_token_id
)
# Extract the generated text
generated_text = response[0]['generated_text']
# Get the last assistant message
if isinstance(generated_text, list):
assistant_response = generated_text[-1]['content']
else:
# Fallback parsing if needed
assistant_response = str(generated_text).split("assistant")[-1].strip()
return assistant_response
except Exception as e:
return f"Error generating response: {str(e)}"
# Create the Gradio interface
def create_interface():
with gr.Blocks(title="Orion-V1-4B Chat", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸš€ Orion-V1-4B Chat
Chat with the Orion-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation.
**Model:** `apexion-ai/Orion-V1-4B`
""")
chatbot = gr.Chatbot(
height=400,
placeholder="Start chatting with Orion-V1-4B...",
label="Chat"
)
msg = gr.Textbox(
placeholder="Type your message here...",
label="Message",
lines=2
)
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat", variant="secondary")
with gr.Accordion("Advanced Settings", open=False):
max_length = gr.Slider(
minimum=50,
maximum=2048,
value=512,
step=50,
label="Max Length"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.1,
label="Top P"
)
# Event handlers
def user_message(message, history):
return "", history + [[message, None]]
def bot_response(history, max_len, temp, top_p):
if history:
user_message = history[-1][0]
bot_message = generate_response(
user_message,
history[:-1],
max_len,
temp,
top_p
)
history[-1][1] = bot_message
return history
# Wire up the events
msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, max_length, temperature, top_p], chatbot
)
submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, max_length, temperature, top_p], chatbot
)
clear_btn.click(lambda: None, None, chatbot, queue=False)
gr.Markdown("""
---
### About Orion-V1-4B
Orion-V1-4B is a 4 billion parameter language model developed by Apexion AI.
It's designed for efficient text generation and conversation.
**Features:**
- 4B parameters for efficient inference
- Optimized for conversational AI
- Supports various text generation tasks
This Space uses ZeroGPU for efficient GPU allocation.
""")
return demo
# Launch the app
if __name__ == "__main__":
demo = create_interface()
demo.launch()