mgbam commited on
Commit
e69fb50
·
verified ·
1 Parent(s): 7bc132c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +283 -0
app.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import (
4
+ AutoTokenizer,
5
+ AutoModelForCausalLM,
6
+ TextIteratorStreamer,
7
+ )
8
+ import threading
9
+ import time
10
+
11
+ # -----------------------------------------------------------------------------
12
+ # 1. MODEL LOADING
13
+ # -----------------------------------------------------------------------------
14
+ # In this advanced example, we'll instantiate the model directly (instead of using pipeline).
15
+ # We'll do streaming outputs via TextIteratorStreamer.
16
+
17
+ MODEL_NAME = "microsoft/phi-4" # Replace with an actual HF model if phi-4 is unavailable
18
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+
20
+ try:
21
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
22
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
23
+ except:
24
+ # Fallback if model is not found or large. Here we default to a smaller model
25
+ MODEL_NAME = "gpt2"
26
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
27
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
28
+
29
+ model.eval()
30
+
31
+
32
+ # -----------------------------------------------------------------------------
33
+ # 2. CONVERSATION / PROMPTS
34
+ # -----------------------------------------------------------------------------
35
+ # We'll keep track of conversation using a list of dictionaries:
36
+ # [
37
+ # {"role": "system", "content": "..."},
38
+ # {"role": "developer", "content": "..."},
39
+ # {"role": "user", "content": "User message"},
40
+ # {"role": "assistant", "content": "Assistant answer"},
41
+ # ...
42
+ # ]
43
+ #
44
+ # We’ll also build in a mock retrieval system that merges knowledge snippets
45
+ # into the final prompt if the user chooses to do so.
46
+
47
+ DEFAULT_SYSTEM_PROMPT = (
48
+ "You are Philos, an advanced AI system created by ACC (Algorithmic Computer-generated Consciousness). "
49
+ "Answer user queries accurately, thoroughly, and helpfully. Keep your responses relevant and correct."
50
+ )
51
+
52
+ DEFAULT_DEVELOPER_PROMPT = (
53
+ "Ensure that you respond in a style that is professional, clear, and approachable. "
54
+ "Include reasoning steps if needed, but keep them concise."
55
+ )
56
+
57
+ # A small dictionary to emulate knowledge retrieval
58
+ # In real scenarios, you might use an actual vector DB + retrieval method
59
+ MOCK_KB = {
60
+ "python": "Python is a high-level, interpreted programming language famous for its readability and flexibility.",
61
+ "accelerate library": "The accelerate library by HF helps in distributed training and inference.",
62
+ "phi-4 architecture": "phi-4 is a 14B-parameter, decoder-only Transformer with a 16K context window.",
63
+ }
64
+
65
+ def retrieve_knowledge(user_query):
66
+ # Simple naive approach: check keywords in user query
67
+ # Return a knowledge snippet if found
68
+ matches = []
69
+ for keyword, snippet in MOCK_KB.items():
70
+ if keyword.lower() in user_query.lower():
71
+ matches.append(snippet)
72
+ return matches
73
+
74
+ # -----------------------------------------------------------------------------
75
+ # 3. HELPER: Build the prompt from conversation
76
+ # -----------------------------------------------------------------------------
77
+ def build_prompt(conversation):
78
+ """
79
+ Convert conversation (list of role/content dicts) into a single text prompt
80
+ that the model can process. We adopt a simple format:
81
+ system, developer, user, assistant, ...
82
+ """
83
+ prompt = ""
84
+ for msg in conversation:
85
+ if msg["role"] == "system":
86
+ prompt += f"[System]\n{msg['content']}\n"
87
+ elif msg["role"] == "developer":
88
+ prompt += f"[Developer]\n{msg['content']}\n"
89
+ elif msg["role"] == "user":
90
+ prompt += f"[User]\n{msg['content']}\n"
91
+ else: # assistant
92
+ prompt += f"[Assistant]\n{msg['content']}\n"
93
+ prompt += "[Assistant]\n" # We end with an assistant role so model can continue
94
+ return prompt
95
+
96
+
97
+ # -----------------------------------------------------------------------------
98
+ # 4. STREAMING GENERATION
99
+ # -----------------------------------------------------------------------------
100
+ def generate_tokens_stream(prompt, temperature=0.7, top_p=0.9, max_new_tokens=128):
101
+ """
102
+ Uses TextIteratorStreamer to yield tokens one by one (or in small chunks).
103
+ """
104
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
105
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE)
106
+
107
+ generation_kwargs = dict(
108
+ input_ids=input_ids,
109
+ streamer=streamer,
110
+ max_new_tokens=max_new_tokens,
111
+ temperature=temperature,
112
+ top_p=top_p,
113
+ do_sample=True,
114
+ pad_token_id=tokenizer.eos_token_id,
115
+ )
116
+
117
+ # We'll run generation in a background thread, streaming tokens
118
+ thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
119
+ thread.start()
120
+
121
+ # Stream tokens
122
+ partial_text = ""
123
+ for new_token in streamer:
124
+ partial_text += new_token
125
+ yield partial_text
126
+
127
+ thread.join()
128
+
129
+
130
+ # -----------------------------------------------------------------------------
131
+ # 5. MAIN CHAT FUNCTION
132
+ # -----------------------------------------------------------------------------
133
+ def advanced_chat(user_msg, conversation, system_prompt, dev_prompt, retrieve_flg, temperature, top_p):
134
+ """
135
+ - Update conversation with the user's message
136
+ - Optionally retrieve knowledge and incorporate into the system or developer prompt
137
+ - Build the final prompt
138
+ - Stream the assistant's reply
139
+ """
140
+ # If user message is empty
141
+ if not user_msg.strip():
142
+ yield "Please enter a message."
143
+ return
144
+
145
+ # 1) Construct or update system/dev prompts
146
+ system_message = {"role": "system", "content": system_prompt}
147
+ developer_message = {"role": "developer", "content": dev_prompt}
148
+
149
+ # 2) Insert or replace system/dev in the conversation
150
+ # We'll assume the first system/dev messages are at the start of conversation
151
+ # or add them if not present
152
+ filtered = [msg for msg in conversation if msg["role"] not in ["system", "developer"]]
153
+ conversation = [system_message, developer_message] + filtered
154
+
155
+ # 3) Append user's message
156
+ conversation.append({"role": "user", "content": user_msg})
157
+
158
+ # 4) Retrieve knowledge if user toggled "Include knowledge retrieval"
159
+ if retrieve_flg:
160
+ knowledge_snippets = retrieve_knowledge(user_msg)
161
+ if knowledge_snippets:
162
+ # We can just append them to developer or system content for simplicity
163
+ knowledge_text = "\n".join(["[Knowledge] " + s for s in knowledge_snippets])
164
+ conversation[1]["content"] += f"\n\n[Additional Knowledge]\n{knowledge_text}"
165
+
166
+ # 5) Build final prompt
167
+ prompt = build_prompt(conversation)
168
+
169
+ # 6) Stream the assistant’s response
170
+ partial_response = ""
171
+ for partial_text in generate_tokens_stream(prompt, temperature, top_p):
172
+ partial_response = partial_text
173
+ yield partial_text # Send partial tokens to Gradio for display
174
+
175
+ # 7) Now that generation is complete, append final assistant message
176
+ conversation.append({"role": "assistant", "content": partial_response})
177
+
178
+
179
+ # -----------------------------------------------------------------------------
180
+ # 6. BUILD GRADIO INTERFACE
181
+ # -----------------------------------------------------------------------------
182
+ def build_ui():
183
+ with gr.Blocks(title="PhilosBeta-Advanced", css="#chatbot{height:550px} .overflow-y-auto{max-height:550px}") as demo:
184
+
185
+ gr.Markdown("# **PhilosBeta: Advanced Demo**")
186
+ gr.Markdown(
187
+ "An example of multi-turn conversation with streaming responses, "
188
+ "optional retrieval, and custom system/developer prompts."
189
+ )
190
+
191
+ # State to store the conversation as a list of role/content dicts
192
+ conversation_state = gr.State([])
193
+
194
+ # TEXT ELEMENTS
195
+ with gr.Row():
196
+ with gr.Column():
197
+ system_prompt_box = gr.Textbox(
198
+ label="System Prompt",
199
+ value=DEFAULT_SYSTEM_PROMPT,
200
+ lines=3
201
+ )
202
+ developer_prompt_box = gr.Textbox(
203
+ label="Developer Prompt",
204
+ value=DEFAULT_DEVELOPER_PROMPT,
205
+ lines=3
206
+ )
207
+ with gr.Column():
208
+ retrieve_flag = gr.Checkbox(label="Include Knowledge Retrieval", value=False)
209
+ temperature_slider = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
210
+ top_p_slider = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p")
211
+ max_tokens_info = gr.Markdown("Max new tokens = 128 (fixed in code).")
212
+
213
+ # MAIN CHAT UI
214
+ chatbox = gr.Chatbot(label="Philos Conversation", elem_id="chatbot").style(height=500)
215
+ user_input = gr.Textbox(
216
+ label="Your Message",
217
+ placeholder="Type here...",
218
+ lines=3
219
+ )
220
+ send_btn = gr.Button("Send", variant="primary")
221
+
222
+ # ---------------------------------------------------------------------
223
+ # ACTION: Handle user input
224
+ # ---------------------------------------------------------------------
225
+ def user_send(
226
+ user_text, conversation, sys_prompt, dev_prompt, retrieve_flg, temperature, top_p
227
+ ):
228
+ """
229
+ This function calls advanced_chat() and streams tokens back to update the Chatbot UI.
230
+ """
231
+ # We'll create a generator to update the Chatbot in real-time
232
+ message_stream = advanced_chat(
233
+ user_msg=user_text,
234
+ conversation=conversation,
235
+ system_prompt=sys_prompt,
236
+ dev_prompt=dev_prompt,
237
+ retrieve_flg=retrieve_flg,
238
+ temperature=temperature,
239
+ top_p=top_p
240
+ )
241
+ return message_stream, conversation
242
+
243
+ # Gradio can handle generator outputs for streaming.
244
+ # We map the streamed tokens to the Chatbot component in real-time.
245
+ chatbox_stream = gr.Chatbot.update()
246
+ send_btn.click(
247
+ fn=user_send,
248
+ inputs=[
249
+ user_input,
250
+ conversation_state,
251
+ system_prompt_box,
252
+ developer_prompt_box,
253
+ retrieve_flag,
254
+ temperature_slider,
255
+ top_p_slider,
256
+ ],
257
+ outputs=[chatbox_stream, conversation_state],
258
+ )
259
+
260
+ # We also let the user press Enter to send messages
261
+ user_input.submit(
262
+ fn=user_send,
263
+ inputs=[
264
+ user_input,
265
+ conversation_state,
266
+ system_prompt_box,
267
+ developer_prompt_box,
268
+ retrieve_flag,
269
+ temperature_slider,
270
+ top_p_slider,
271
+ ],
272
+ outputs=[chatbox_stream, conversation_state],
273
+ )
274
+
275
+ return demo
276
+
277
+
278
+ # -----------------------------------------------------------------------------
279
+ # 7. LAUNCH
280
+ # -----------------------------------------------------------------------------
281
+ if __name__ == "__main__":
282
+ ui = build_ui()
283
+ ui.launch()