bluenevus commited on
Commit
9a3a044
·
1 Parent(s): 871d9e7

Update app.py via AI Editor

Browse files
Files changed (1) hide show
  1. app.py +136 -67
app.py CHANGED
@@ -14,6 +14,7 @@ import openai
14
  import base64
15
  import datetime
16
  from werkzeug.utils import secure_filename
 
17
 
18
  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
19
  logger = logging.getLogger("AskTricare")
@@ -112,6 +113,61 @@ def embed_docs_folder():
112
 
113
  embed_docs_folder()
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  app = dash.Dash(
116
  __name__,
117
  server=app_flask,
@@ -200,14 +256,20 @@ def user_input_card():
200
  placeholder="Type your question...",
201
  style={"width": "100%", "height": "60px", "resize": "vertical", "wordWrap": "break-word"},
202
  wrap="soft",
203
- maxLength=1000
 
 
204
  ),
 
205
  html.Div([
206
  dbc.Button("Send", id="send-btn", color="primary", className="mt-2 me-2", style={"minWidth": "100px"}),
207
  dbc.Button("New Chat", id="new-chat-btn", color="secondary", className="mt-2", style={"minWidth": "110px"}),
208
  ], style={"float": "right", "display": "flex", "gap": "0.5rem"}),
 
 
209
  ], style={"marginTop": "1rem"}),
210
  html.Div(id="error-message", style={"color": "#bb2124", "marginTop": "0.5rem"}),
 
211
  ])
212
  )
213
 
@@ -216,7 +278,8 @@ def right_main_static():
216
  chat_box_card(),
217
  user_input_card(),
218
  dcc.Loading(id="loading", type="default", fullscreen=False, style={"position": "absolute", "top": "5%", "left": "50%"}),
219
- dcc.Interval(id="stream-interval", interval=400, n_intervals=0, disabled=True, max_intervals=1000)
 
220
  ], style={"padding": "1rem", "backgroundColor": "#fff", "height": "100vh", "overflowY": "auto"})
221
 
222
  app.layout = html.Div([
@@ -228,9 +291,35 @@ app.layout = html.Div([
228
  html.Div(right_main_static(), id='right-main', style={"marginLeft": "30vw", "width": "70vw", "overflowY": "auto"})
229
  ], style={"display": "flex"}),
230
  dcc.Store(id="clear-input", data=False),
231
- dcc.Store(id="scroll-bottom", data=0)
 
 
232
  ])
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  def _is_supported_doc(filename):
235
  ext = os.path.splitext(filename)[1].lower()
236
  return ext in [".txt", ".pdf", ".md", ".docx"]
@@ -245,7 +334,6 @@ def _extract_text_from_upload(filepath, ext):
245
  except Exception as e:
246
  logger.error(f"Error reading {filepath}: {e}")
247
  return ""
248
- # For .pdf/.docx, could add extraction with extra dependencies
249
  else:
250
  return ""
251
 
@@ -276,13 +364,14 @@ def assign_session_id(_):
276
  Input("new-chat-btn", "n_clicks"),
277
  Input("stream-interval", "n_intervals"),
278
  Input({"type": "chat-history-item", "index": dash.ALL}, "n_clicks"),
 
279
  State("file-upload", "filename"),
280
  State("user-input", "value"),
281
  State("selected-history", "data"),
282
  State("chat-history-list", "children"),
283
  prevent_initial_call=False
284
  )
285
- def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, stream_n, chat_history_clicks, file_names, user_input, selected_history, chat_history_list_children):
286
  trigger = callback_context.triggered[0]['prop_id'].split('.')[0] if callback_context.triggered else ""
287
  session_id = session_id or get_session_id()
288
  session_lock = get_session_lock(session_id)
@@ -341,7 +430,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
341
  history_index_clicked
342
  )
343
 
344
- # Handle File Upload -- now, if supported, send to OpenAI as system message
345
  file_was_uploaded_and_sent = False
346
  if trigger == "file-upload" and file_contents and file_names:
347
  uploads = []
@@ -358,80 +447,60 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
358
  with open(fp, "wb") as f:
359
  f.write(base64.b64decode(data))
360
  uploads.append({"name": fname, "is_img": is_img, "path": fp})
361
- # If document, extract text and send to OpenAI as a message (system or user)
362
  if _is_supported_doc(n) and not is_img:
363
  text = _extract_text_from_upload(fp, ext)
364
  if text.strip():
365
- doc_intro = f"(User uploaded document '{n}'; content below):\n\n{text[:3800]}"
366
- # Add as user message and trigger streaming
367
- state["messages"].append({"role": "user", "content": doc_intro})
368
- state["streaming"] = True
369
- state["stream_buffer"] = ""
370
- file_was_uploaded_and_sent = True
371
- logger.info(f"Session {session_id}: Uploaded doc '{n}' sent to OpenAI")
372
  state["uploads"].extend(uploads)
373
  save_session_state(session_id)
374
  logger.info(f"Session {session_id}: Uploaded files {[u['name'] for u in uploads]}")
375
 
376
- # If a supported doc was uploaded, start streaming OpenAI response
377
- if file_was_uploaded_and_sent:
378
- def run_stream(session_id, messages):
379
- try:
380
- system_prompt = load_system_prompt()
381
- msg_list = [{"role": "system", "content": system_prompt}]
382
- for m in messages:
383
- msg_list.append({"role": m["role"], "content": m["content"]})
384
- response = openai.ChatCompletion.create(
385
- model="gpt-3.5-turbo",
386
- messages=msg_list,
387
- max_tokens=700,
388
- temperature=0.2,
389
- stream=True,
390
- )
391
- reply = ""
392
- for chunk in response:
393
- delta = chunk["choices"][0]["delta"]
394
- content = delta.get("content", "")
395
- if content:
396
- reply += content
397
- session_lock = get_session_lock(session_id)
398
- with session_lock:
399
- load_session_state(session_id)
400
- state = get_session_state(session_id)
401
- state["stream_buffer"] = reply
402
- save_session_state(session_id)
403
- session_lock = get_session_lock(session_id)
404
- with session_lock:
405
- load_session_state(session_id)
406
- state = get_session_state(session_id)
407
- state["messages"].append({"role": "assistant", "content": reply})
408
- state["stream_buffer"] = ""
409
- state["streaming"] = False
410
- save_session_state(session_id)
411
- logger.info(f"Session {session_id}: Doc Q&A: Assistant: {reply}")
412
- except Exception as e:
413
- session_lock = get_session_lock(session_id)
414
- with session_lock:
415
- load_session_state(session_id)
416
- state = get_session_state(session_id)
417
- state["streaming"] = False
418
- state["stream_buffer"] = ""
419
- save_session_state(session_id)
420
- logger.error(f"Session {session_id}: Streaming error (doc upload): {e}")
421
- threading.Thread(target=run_stream, args=(session_id, list(state["messages"])), daemon=True).start()
422
- start_streaming = True
423
 
424
  # Handle Send
425
- if trigger == "send-btn" and user_input and user_input.strip():
426
- state["messages"].append({"role": "user", "content": user_input})
 
427
  state["streaming"] = True
428
  state["stream_buffer"] = ""
429
  save_session_state(session_id)
430
 
431
- def run_stream(session_id, messages):
432
  try:
433
  system_prompt = load_system_prompt()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  msg_list = [{"role": "system", "content": system_prompt}]
 
 
435
  for m in messages:
436
  msg_list.append({"role": m["role"], "content": m["content"]})
437
  response = openai.ChatCompletion.create(
@@ -461,7 +530,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
461
  state["stream_buffer"] = ""
462
  state["streaming"] = False
463
  save_session_state(session_id)
464
- logger.info(f"Session {session_id}: User: {user_input} | Assistant: {reply}")
465
  except Exception as e:
466
  session_lock = get_session_lock(session_id)
467
  with session_lock:
@@ -472,7 +541,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
472
  save_session_state(session_id)
473
  logger.error(f"Session {session_id}: Streaming error: {e}")
474
 
475
- threading.Thread(target=run_stream, args=(session_id, list(state["messages"])), daemon=True).start()
476
  start_streaming = True
477
 
478
  # Handle New Chat button logic: auto-name and reset
@@ -577,7 +646,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
577
  chat_cards.append(chat_message_card(state["stream_buffer"], is_user=False))
578
  return upload_cards, chat_history_items, chat_cards, error, False, 0, "", selected_history
579
  # Always clear input after send
580
- if trigger == "send-btn":
581
  return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, "", selected_history
582
  return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, user_input or "", selected_history
583
 
 
14
  import base64
15
  import datetime
16
  from werkzeug.utils import secure_filename
17
+ import numpy as np
18
 
19
  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
20
  logger = logging.getLogger("AskTricare")
 
113
 
114
  embed_docs_folder()
115
 
116
+ def embed_user_doc(session_id, filename, text):
117
+ session_dir = get_session_dir(session_id)
118
+ if not text.strip():
119
+ return
120
+ try:
121
+ chunk = text[:4000]
122
+ response = openai.Embedding.create(
123
+ input=[chunk],
124
+ model=EMBEDDING_MODEL
125
+ )
126
+ embedding = response['data'][0]['embedding']
127
+ user_embeds_path = os.path.join(session_dir, "user_embeds.json")
128
+ if os.path.exists(user_embeds_path):
129
+ with open(user_embeds_path, "r") as f:
130
+ user_embeds = json.load(f)
131
+ else:
132
+ user_embeds = {"embeddings": [], "texts": [], "filenames": []}
133
+ user_embeds["embeddings"].append(embedding)
134
+ user_embeds["texts"].append(chunk)
135
+ user_embeds["filenames"].append(filename)
136
+ with open(user_embeds_path, "w") as f:
137
+ json.dump(user_embeds, f)
138
+ logger.info(f"Session {session_id}: Embedded user doc {filename}")
139
+ except Exception as e:
140
+ logger.error(f"Session {session_id}: Failed to embed user doc {filename}: {e}")
141
+
142
+ def get_user_embeddings(session_id):
143
+ session_dir = get_session_dir(session_id)
144
+ user_embeds_path = os.path.join(session_dir, "user_embeds.json")
145
+ if os.path.exists(user_embeds_path):
146
+ with open(user_embeds_path, "r") as f:
147
+ d = json.load(f)
148
+ embeds = np.array(d.get("embeddings", []))
149
+ texts = d.get("texts", [])
150
+ filenames = d.get("filenames", [])
151
+ return embeds, texts, filenames
152
+ return np.array([]), [], []
153
+
154
+ def semantic_search(query, embed_matrix, texts, filenames, top_k=2):
155
+ if len(embed_matrix) == 0:
156
+ return []
157
+ try:
158
+ q_embed = openai.Embedding.create(input=[query], model=EMBEDDING_MODEL)["data"][0]["embedding"]
159
+ q_embed = np.array(q_embed)
160
+ embed_matrix = np.array(embed_matrix)
161
+ scores = np.dot(embed_matrix, q_embed) / (np.linalg.norm(embed_matrix, axis=1) * np.linalg.norm(q_embed) + 1e-8)
162
+ idx = np.argsort(scores)[::-1][:top_k]
163
+ results = []
164
+ for i in idx:
165
+ results.append({"filename": filenames[i], "text": texts[i], "score": float(scores[i])})
166
+ return results
167
+ except Exception as e:
168
+ logger.error(f"Semantic search error: {e}")
169
+ return []
170
+
171
  app = dash.Dash(
172
  __name__,
173
  server=app_flask,
 
256
  placeholder="Type your question...",
257
  style={"width": "100%", "height": "60px", "resize": "vertical", "wordWrap": "break-word"},
258
  wrap="soft",
259
+ maxLength=1000,
260
+ n_submit=0,
261
+ n_blur=0,
262
  ),
263
+ dcc.Store(id="enter-triggered", data=False),
264
  html.Div([
265
  dbc.Button("Send", id="send-btn", color="primary", className="mt-2 me-2", style={"minWidth": "100px"}),
266
  dbc.Button("New Chat", id="new-chat-btn", color="secondary", className="mt-2", style={"minWidth": "110px"}),
267
  ], style={"float": "right", "display": "flex", "gap": "0.5rem"}),
268
+ dcc.Store(id="user-input-store", data="", storage_type="session"),
269
+ html.Button(id='hidden-send', style={'display': 'none'})
270
  ], style={"marginTop": "1rem"}),
271
  html.Div(id="error-message", style={"color": "#bb2124", "marginTop": "0.5rem"}),
272
+ dcc.Store(id="should-clear-input", data=False)
273
  ])
274
  )
275
 
 
278
  chat_box_card(),
279
  user_input_card(),
280
  dcc.Loading(id="loading", type="default", fullscreen=False, style={"position": "absolute", "top": "5%", "left": "50%"}),
281
+ dcc.Interval(id="stream-interval", interval=400, n_intervals=0, disabled=True, max_intervals=1000),
282
+ dcc.Store(id="client-question", data="")
283
  ], style={"padding": "1rem", "backgroundColor": "#fff", "height": "100vh", "overflowY": "auto"})
284
 
285
  app.layout = html.Div([
 
291
  html.Div(right_main_static(), id='right-main', style={"marginLeft": "30vw", "width": "70vw", "overflowY": "auto"})
292
  ], style={"display": "flex"}),
293
  dcc.Store(id="clear-input", data=False),
294
+ dcc.Store(id="scroll-bottom", data=0),
295
+ # clientside callback for textarea enter/shift-enter
296
+ dcc.Store(id="enter-pressed", data=False)
297
  ])
298
 
299
+ # JS callback to intercept Enter/Shift+Enter for dcc.Textarea
300
+ app.clientside_callback(
301
+ """
302
+ function(n, value) {
303
+ var ta = document.getElementById('user-input');
304
+ if (!ta) return window.dash_clientside.no_update;
305
+ if (!window._asktricare_enter_handler) {
306
+ ta.addEventListener('keydown', function(e) {
307
+ if (e.key === 'Enter' && !e.shiftKey) {
308
+ e.preventDefault();
309
+ var btn = document.getElementById('hidden-send');
310
+ if (btn) btn.click();
311
+ }
312
+ });
313
+ window._asktricare_enter_handler = true;
314
+ }
315
+ return window.dash_clientside.no_update;
316
+ }
317
+ """,
318
+ Output('enter-pressed', 'data'),
319
+ Input('user-input', 'n_blur'),
320
+ State('user-input', 'value')
321
+ )
322
+
323
  def _is_supported_doc(filename):
324
  ext = os.path.splitext(filename)[1].lower()
325
  return ext in [".txt", ".pdf", ".md", ".docx"]
 
334
  except Exception as e:
335
  logger.error(f"Error reading {filepath}: {e}")
336
  return ""
 
337
  else:
338
  return ""
339
 
 
364
  Input("new-chat-btn", "n_clicks"),
365
  Input("stream-interval", "n_intervals"),
366
  Input({"type": "chat-history-item", "index": dash.ALL}, "n_clicks"),
367
+ Input('hidden-send', 'n_clicks'),
368
  State("file-upload", "filename"),
369
  State("user-input", "value"),
370
  State("selected-history", "data"),
371
  State("chat-history-list", "children"),
372
  prevent_initial_call=False
373
  )
374
+ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, stream_n, chat_history_clicks, hidden_send_clicks, file_names, user_input, selected_history, chat_history_list_children):
375
  trigger = callback_context.triggered[0]['prop_id'].split('.')[0] if callback_context.triggered else ""
376
  session_id = session_id or get_session_id()
377
  session_lock = get_session_lock(session_id)
 
430
  history_index_clicked
431
  )
432
 
433
+ # Handle File Upload
434
  file_was_uploaded_and_sent = False
435
  if trigger == "file-upload" and file_contents and file_names:
436
  uploads = []
 
447
  with open(fp, "wb") as f:
448
  f.write(base64.b64decode(data))
449
  uploads.append({"name": fname, "is_img": is_img, "path": fp})
 
450
  if _is_supported_doc(n) and not is_img:
451
  text = _extract_text_from_upload(fp, ext)
452
  if text.strip():
453
+ embed_user_doc(session_id, fname, text)
454
+ logger.info(f"Session {session_id}: Uploaded doc '{n}' embedded for user vector store")
 
 
 
 
 
455
  state["uploads"].extend(uploads)
456
  save_session_state(session_id)
457
  logger.info(f"Session {session_id}: Uploaded files {[u['name'] for u in uploads]}")
458
 
459
+ # Determine if send was triggered (via send-btn, hidden-send, or enter)
460
+ send_triggered = False
461
+ if trigger == "send-btn" or trigger == "hidden-send":
462
+ send_triggered = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
  # Handle Send
465
+ if send_triggered and user_input and user_input.strip():
466
+ question = user_input.strip()
467
+ state["messages"].append({"role": "user", "content": question})
468
  state["streaming"] = True
469
  state["stream_buffer"] = ""
470
  save_session_state(session_id)
471
 
472
+ def run_stream(session_id, messages, question):
473
  try:
474
  system_prompt = load_system_prompt()
475
+ # Retrieve relevant context from global RAG
476
+ rag_chunks = []
477
+ try:
478
+ # Search global docs
479
+ global_embeds = []
480
+ global_texts = []
481
+ global_fnames = []
482
+ for fname, emb in EMBEDDING_INDEX.items():
483
+ global_embeds.append(emb)
484
+ global_texts.append(EMBEDDING_TEXTS[fname])
485
+ global_fnames.append(fname)
486
+ global_rag = semantic_search(question, global_embeds, global_texts, global_fnames, top_k=2)
487
+ if global_rag:
488
+ for r in global_rag:
489
+ rag_chunks.append(f"Global doc [{r['filename']}]:\n{r['text'][:1000]}")
490
+ # Search user docs
491
+ user_embeds, user_texts, user_fnames = get_user_embeddings(session_id)
492
+ user_rag = semantic_search(question, user_embeds, user_texts, user_fnames, top_k=2)
493
+ if user_rag:
494
+ for r in user_rag:
495
+ rag_chunks.append(f"User upload [{r['filename']}]:\n{r['text'][:1000]}")
496
+ except Exception as e:
497
+ logger.error(f"Session {session_id}: RAG error: {e}")
498
+ context_block = ""
499
+ if rag_chunks:
500
+ context_block = "The following sources may help answer the question:\n\n" + "\n\n".join(rag_chunks) + "\n\n"
501
  msg_list = [{"role": "system", "content": system_prompt}]
502
+ if context_block:
503
+ msg_list.append({"role": "system", "content": context_block})
504
  for m in messages:
505
  msg_list.append({"role": m["role"], "content": m["content"]})
506
  response = openai.ChatCompletion.create(
 
530
  state["stream_buffer"] = ""
531
  state["streaming"] = False
532
  save_session_state(session_id)
533
+ logger.info(f"Session {session_id}: User: {question} | Assistant: {reply}")
534
  except Exception as e:
535
  session_lock = get_session_lock(session_id)
536
  with session_lock:
 
541
  save_session_state(session_id)
542
  logger.error(f"Session {session_id}: Streaming error: {e}")
543
 
544
+ threading.Thread(target=run_stream, args=(session_id, list(state["messages"]), question), daemon=True).start()
545
  start_streaming = True
546
 
547
  # Handle New Chat button logic: auto-name and reset
 
646
  chat_cards.append(chat_message_card(state["stream_buffer"], is_user=False))
647
  return upload_cards, chat_history_items, chat_cards, error, False, 0, "", selected_history
648
  # Always clear input after send
649
+ if send_triggered:
650
  return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, "", selected_history
651
  return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, user_input or "", selected_history
652