Spaces:

Spestly
/

Nous-1

Running on Zero

App Files Files Community

Nous-1 / app.py

Spestly

Update app.py

dc45496 verified 26 days ago

raw

history blame

4.98 kB

	import gradio as gr
	import spaces
	from transformers import pipeline
	import torch

	# Global variable to store the pipeline
	pipe = None

	@spaces.GPU
	def initialize_model():
	global pipe
	if pipe is None:
	pipe = pipeline(
	"text-generation",
	model="apexion-ai/Orion-V1-4B",
	torch_dtype=torch.float16,
	device_map="auto"
	)
	return pipe

	@spaces.GPU
	def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
	"""Generate response using the Orion model"""

	# Initialize model inside the GPU-decorated function
	model_pipe = initialize_model()

	# Format the conversation history
	messages = []

	# Add conversation history
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current message
	messages.append({"role": "user", "content": message})

	# Generate response
	try:
	response = model_pipe(
	messages,
	max_length=max_length,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=model_pipe.tokenizer.eos_token_id
	)

	# Extract the generated text
	generated_text = response[0]['generated_text']

	# Get the last assistant message
	if isinstance(generated_text, list):
	assistant_response = generated_text[-1]['content']
	else:
	# Fallback parsing if needed
	assistant_response = str(generated_text).split("assistant")[-1].strip()

	return assistant_response

	except Exception as e:
	return f"Error generating response: {str(e)}"

	# Create the Gradio interface
	def create_interface():
	with gr.Blocks(title="Orion-V1-4B Chat", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🚀 Orion-V1-4B Chat

	Chat with the Orion-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation.

	Model: `apexion-ai/Orion-V1-4B`
	""")

	chatbot = gr.Chatbot(
	height=400,
	placeholder="Start chatting with Orion-V1-4B...",
	label="Chat"
	)

	msg = gr.Textbox(
	placeholder="Type your message here...",
	label="Message",
	lines=2
	)

	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("Clear Chat", variant="secondary")

	with gr.Accordion("Advanced Settings", open=False):
	max_length = gr.Slider(
	minimum=50,
	maximum=2048,
	value=512,
	step=50,
	label="Max Length"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.1,
	label="Top P"
	)

	# Event handlers
	def user_message(message, history):
	return "", history + [[message, None]]

	def bot_response(history, max_len, temp, top_p):
	if history:
	user_message = history[-1][0]
	bot_message = generate_response(
	user_message,
	history[:-1],
	max_len,
	temp,
	top_p
	)
	history[-1][1] = bot_message
	return history

	# Wire up the events
	msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
	bot_response, [chatbot, max_length, temperature, top_p], chatbot
	)

	submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
	bot_response, [chatbot, max_length, temperature, top_p], chatbot
	)

	clear_btn.click(lambda: None, None, chatbot, queue=False)

	gr.Markdown("""
	---

	### About Orion-V1-4B

	Orion-V1-4B is a 4 billion parameter language model developed by Apexion AI.
	It's designed for efficient text generation and conversation.

	Features:
	- 4B parameters for efficient inference
	- Optimized for conversational AI
	- Supports various text generation tasks

	This Space uses ZeroGPU for efficient GPU allocation.
	""")

	return demo

	# Launch the app
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()