File size: 4,982 Bytes
f76f5bc dc45496 f76f5bc dc45496 f76f5bc dc45496 f76f5bc dc45496 f76f5bc dc45496 f76f5bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import gradio as gr
import spaces
from transformers import pipeline
import torch
# Global variable to store the pipeline
pipe = None
@spaces.GPU
def initialize_model():
global pipe
if pipe is None:
pipe = pipeline(
"text-generation",
model="apexion-ai/Orion-V1-4B",
torch_dtype=torch.float16,
device_map="auto"
)
return pipe
@spaces.GPU
def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
"""Generate response using the Orion model"""
# Initialize model inside the GPU-decorated function
model_pipe = initialize_model()
# Format the conversation history
messages = []
# Add conversation history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Generate response
try:
response = model_pipe(
messages,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=model_pipe.tokenizer.eos_token_id
)
# Extract the generated text
generated_text = response[0]['generated_text']
# Get the last assistant message
if isinstance(generated_text, list):
assistant_response = generated_text[-1]['content']
else:
# Fallback parsing if needed
assistant_response = str(generated_text).split("assistant")[-1].strip()
return assistant_response
except Exception as e:
return f"Error generating response: {str(e)}"
# Create the Gradio interface
def create_interface():
with gr.Blocks(title="Orion-V1-4B Chat", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π Orion-V1-4B Chat
Chat with the Orion-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation.
**Model:** `apexion-ai/Orion-V1-4B`
""")
chatbot = gr.Chatbot(
height=400,
placeholder="Start chatting with Orion-V1-4B...",
label="Chat"
)
msg = gr.Textbox(
placeholder="Type your message here...",
label="Message",
lines=2
)
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat", variant="secondary")
with gr.Accordion("Advanced Settings", open=False):
max_length = gr.Slider(
minimum=50,
maximum=2048,
value=512,
step=50,
label="Max Length"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.1,
label="Top P"
)
# Event handlers
def user_message(message, history):
return "", history + [[message, None]]
def bot_response(history, max_len, temp, top_p):
if history:
user_message = history[-1][0]
bot_message = generate_response(
user_message,
history[:-1],
max_len,
temp,
top_p
)
history[-1][1] = bot_message
return history
# Wire up the events
msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, max_length, temperature, top_p], chatbot
)
submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, max_length, temperature, top_p], chatbot
)
clear_btn.click(lambda: None, None, chatbot, queue=False)
gr.Markdown("""
---
### About Orion-V1-4B
Orion-V1-4B is a 4 billion parameter language model developed by Apexion AI.
It's designed for efficient text generation and conversation.
**Features:**
- 4B parameters for efficient inference
- Optimized for conversational AI
- Supports various text generation tasks
This Space uses ZeroGPU for efficient GPU allocation.
""")
return demo
# Launch the app
if __name__ == "__main__":
demo = create_interface()
demo.launch() |