Spestly commited on
Commit
f76f5bc
·
verified ·
1 Parent(s): b8f5109

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ from transformers import pipeline
4
+ import torch
5
+
6
+ # Initialize the pipeline with the Orion model
7
+ @spaces.GPU
8
+ def initialize_model():
9
+ return pipeline(
10
+ "text-generation",
11
+ model="apexion-ai/Orion-V1-4B",
12
+ torch_dtype=torch.float16,
13
+ device_map="auto"
14
+ )
15
+
16
+ # Load the model
17
+ pipe = initialize_model()
18
+
19
+ @spaces.GPU
20
+ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
21
+ """Generate response using the Orion model"""
22
+
23
+ # Format the conversation history
24
+ messages = []
25
+
26
+ # Add conversation history
27
+ for user_msg, assistant_msg in history:
28
+ messages.append({"role": "user", "content": user_msg})
29
+ if assistant_msg:
30
+ messages.append({"role": "assistant", "content": assistant_msg})
31
+
32
+ # Add current message
33
+ messages.append({"role": "user", "content": message})
34
+
35
+ # Generate response
36
+ try:
37
+ response = pipe(
38
+ messages,
39
+ max_length=max_length,
40
+ temperature=temperature,
41
+ top_p=top_p,
42
+ do_sample=True,
43
+ pad_token_id=pipe.tokenizer.eos_token_id
44
+ )
45
+
46
+ # Extract the generated text
47
+ generated_text = response[0]['generated_text']
48
+
49
+ # Get the last assistant message
50
+ if isinstance(generated_text, list):
51
+ assistant_response = generated_text[-1]['content']
52
+ else:
53
+ # Fallback parsing if needed
54
+ assistant_response = str(generated_text).split("assistant")[-1].strip()
55
+
56
+ return assistant_response
57
+
58
+ except Exception as e:
59
+ return f"Error generating response: {str(e)}"
60
+
61
+ # Create the Gradio interface
62
+ def create_interface():
63
+ with gr.Blocks(title="Orion-V1-4B Chat", theme=gr.themes.Soft()) as demo:
64
+ gr.Markdown("""
65
+ # 🚀 Orion-V1-4B Chat
66
+
67
+ Chat with the Orion-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation.
68
+
69
+ **Model:** `apexion-ai/Orion-V1-4B`
70
+ """)
71
+
72
+ chatbot = gr.Chatbot(
73
+ height=400,
74
+ placeholder="Start chatting with Orion-V1-4B...",
75
+ label="Chat"
76
+ )
77
+
78
+ msg = gr.Textbox(
79
+ placeholder="Type your message here...",
80
+ label="Message",
81
+ lines=2
82
+ )
83
+
84
+ with gr.Row():
85
+ submit_btn = gr.Button("Send", variant="primary")
86
+ clear_btn = gr.Button("Clear Chat", variant="secondary")
87
+
88
+ with gr.Accordion("Advanced Settings", open=False):
89
+ max_length = gr.Slider(
90
+ minimum=50,
91
+ maximum=2048,
92
+ value=512,
93
+ step=50,
94
+ label="Max Length"
95
+ )
96
+ temperature = gr.Slider(
97
+ minimum=0.1,
98
+ maximum=2.0,
99
+ value=0.7,
100
+ step=0.1,
101
+ label="Temperature"
102
+ )
103
+ top_p = gr.Slider(
104
+ minimum=0.1,
105
+ maximum=1.0,
106
+ value=0.9,
107
+ step=0.1,
108
+ label="Top P"
109
+ )
110
+
111
+ # Event handlers
112
+ def user_message(message, history):
113
+ return "", history + [[message, None]]
114
+
115
+ def bot_response(history, max_len, temp, top_p):
116
+ if history:
117
+ user_message = history[-1][0]
118
+ bot_message = generate_response(
119
+ user_message,
120
+ history[:-1],
121
+ max_len,
122
+ temp,
123
+ top_p
124
+ )
125
+ history[-1][1] = bot_message
126
+ return history
127
+
128
+ # Wire up the events
129
+ msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
130
+ bot_response, [chatbot, max_length, temperature, top_p], chatbot
131
+ )
132
+
133
+ submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
134
+ bot_response, [chatbot, max_length, temperature, top_p], chatbot
135
+ )
136
+
137
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
138
+
139
+ gr.Markdown("""
140
+ ---
141
+
142
+ ### About Orion-V1-4B
143
+
144
+ Orion-V1-4B is a 4 billion parameter language model developed by Apexion AI.
145
+ It's designed for efficient text generation and conversation.
146
+
147
+ **Features:**
148
+ - 4B parameters for efficient inference
149
+ - Optimized for conversational AI
150
+ - Supports various text generation tasks
151
+
152
+ This Space uses ZeroGPU for efficient GPU allocation.
153
+ """)
154
+
155
+ return demo
156
+
157
+ # Launch the app
158
+ if __name__ == "__main__":
159
+ demo = create_interface()
160
+ demo.launch()