|
import gradio as gr |
|
import spaces |
|
from transformers import pipeline |
|
import torch |
|
|
|
|
|
pipe = None |
|
|
|
@spaces.GPU |
|
def initialize_model(): |
|
global pipe |
|
if pipe is None: |
|
pipe = pipeline( |
|
"text-generation", |
|
model="apexion-ai/Orion-V1-4B", |
|
torch_dtype=torch.float16, |
|
device_map="auto" |
|
) |
|
return pipe |
|
|
|
@spaces.GPU |
|
def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9): |
|
"""Generate response using the Orion model""" |
|
|
|
|
|
model_pipe = initialize_model() |
|
|
|
|
|
messages = [] |
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
messages.append({"role": "user", "content": user_msg}) |
|
if assistant_msg: |
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
try: |
|
response = model_pipe( |
|
messages, |
|
max_length=max_length, |
|
temperature=temperature, |
|
top_p=top_p, |
|
do_sample=True, |
|
pad_token_id=model_pipe.tokenizer.eos_token_id |
|
) |
|
|
|
|
|
generated_text = response[0]['generated_text'] |
|
|
|
|
|
if isinstance(generated_text, list): |
|
assistant_response = generated_text[-1]['content'] |
|
else: |
|
|
|
assistant_response = str(generated_text).split("assistant")[-1].strip() |
|
|
|
return assistant_response |
|
|
|
except Exception as e: |
|
return f"Error generating response: {str(e)}" |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="Orion-V1-4B Chat", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown(""" |
|
# π Orion-V1-4B Chat |
|
|
|
Chat with the Orion-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation. |
|
|
|
**Model:** `apexion-ai/Orion-V1-4B` |
|
""") |
|
|
|
chatbot = gr.Chatbot( |
|
height=400, |
|
placeholder="Start chatting with Orion-V1-4B...", |
|
label="Chat" |
|
) |
|
|
|
msg = gr.Textbox( |
|
placeholder="Type your message here...", |
|
label="Message", |
|
lines=2 |
|
) |
|
|
|
with gr.Row(): |
|
submit_btn = gr.Button("Send", variant="primary") |
|
clear_btn = gr.Button("Clear Chat", variant="secondary") |
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
max_length = gr.Slider( |
|
minimum=50, |
|
maximum=2048, |
|
value=512, |
|
step=50, |
|
label="Max Length" |
|
) |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=2.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.9, |
|
step=0.1, |
|
label="Top P" |
|
) |
|
|
|
|
|
def user_message(message, history): |
|
return "", history + [[message, None]] |
|
|
|
def bot_response(history, max_len, temp, top_p): |
|
if history: |
|
user_message = history[-1][0] |
|
bot_message = generate_response( |
|
user_message, |
|
history[:-1], |
|
max_len, |
|
temp, |
|
top_p |
|
) |
|
history[-1][1] = bot_message |
|
return history |
|
|
|
|
|
msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then( |
|
bot_response, [chatbot, max_length, temperature, top_p], chatbot |
|
) |
|
|
|
submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then( |
|
bot_response, [chatbot, max_length, temperature, top_p], chatbot |
|
) |
|
|
|
clear_btn.click(lambda: None, None, chatbot, queue=False) |
|
|
|
gr.Markdown(""" |
|
--- |
|
|
|
### About Orion-V1-4B |
|
|
|
Orion-V1-4B is a 4 billion parameter language model developed by Apexion AI. |
|
It's designed for efficient text generation and conversation. |
|
|
|
**Features:** |
|
- 4B parameters for efficient inference |
|
- Optimized for conversational AI |
|
- Supports various text generation tasks |
|
|
|
This Space uses ZeroGPU for efficient GPU allocation. |
|
""") |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |