Spaces:

MicroHealth
/

ask-tricare

Paused

App Files Files Community

bluenevus commited on May 1

Commit

9a3a044

1 Parent(s): 871d9e7

Update app.py via AI Editor

Browse files

Files changed (1) hide show

app.py +136 -67

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ import openai
 import base64
 import datetime
 from werkzeug.utils import secure_filename
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
 logger = logging.getLogger("AskTricare")
@@ -112,6 +113,61 @@ def embed_docs_folder():
 embed_docs_folder()
 app = dash.Dash(
     __name__,
     server=app_flask,
@@ -200,14 +256,20 @@ def user_input_card():
                     placeholder="Type your question...",
                     style={"width": "100%", "height": "60px", "resize": "vertical", "wordWrap": "break-word"},
                     wrap="soft",
-                    maxLength=1000
                 ),
                 html.Div([
                     dbc.Button("Send", id="send-btn", color="primary", className="mt-2 me-2", style={"minWidth": "100px"}),
                     dbc.Button("New Chat", id="new-chat-btn", color="secondary", className="mt-2", style={"minWidth": "110px"}),
                 ], style={"float": "right", "display": "flex", "gap": "0.5rem"}),
             ], style={"marginTop": "1rem"}),
             html.Div(id="error-message", style={"color": "#bb2124", "marginTop": "0.5rem"}),
         ])
     )
@@ -216,7 +278,8 @@ def right_main_static():
         chat_box_card(),
         user_input_card(),
         dcc.Loading(id="loading", type="default", fullscreen=False, style={"position": "absolute", "top": "5%", "left": "50%"}),
-        dcc.Interval(id="stream-interval", interval=400, n_intervals=0, disabled=True, max_intervals=1000)
     ], style={"padding": "1rem", "backgroundColor": "#fff", "height": "100vh", "overflowY": "auto"})
 app.layout = html.Div([
@@ -228,9 +291,35 @@ app.layout = html.Div([
         html.Div(right_main_static(), id='right-main', style={"marginLeft": "30vw", "width": "70vw", "overflowY": "auto"})
     ], style={"display": "flex"}),
     dcc.Store(id="clear-input", data=False),
-    dcc.Store(id="scroll-bottom", data=0)
 ])
 def _is_supported_doc(filename):
     ext = os.path.splitext(filename)[1].lower()
     return ext in [".txt", ".pdf", ".md", ".docx"]
@@ -245,7 +334,6 @@ def _extract_text_from_upload(filepath, ext):
         except Exception as e:
             logger.error(f"Error reading {filepath}: {e}")
             return ""
-    # For .pdf/.docx, could add extraction with extra dependencies
     else:
         return ""
@@ -276,13 +364,14 @@ def assign_session_id(_):
     Input("new-chat-btn", "n_clicks"),
     Input("stream-interval", "n_intervals"),
     Input({"type": "chat-history-item", "index": dash.ALL}, "n_clicks"),
     State("file-upload", "filename"),
     State("user-input", "value"),
     State("selected-history", "data"),
     State("chat-history-list", "children"),
     prevent_initial_call=False
 )
-def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, stream_n, chat_history_clicks, file_names, user_input, selected_history, chat_history_list_children):
     trigger = callback_context.triggered[0]['prop_id'].split('.')[0] if callback_context.triggered else ""
     session_id = session_id or get_session_id()
     session_lock = get_session_lock(session_id)
@@ -341,7 +430,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
                 history_index_clicked
             )
-        # Handle File Upload -- now, if supported, send to OpenAI as system message
         file_was_uploaded_and_sent = False
         if trigger == "file-upload" and file_contents and file_names:
             uploads = []
@@ -358,80 +447,60 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
                 with open(fp, "wb") as f:
                     f.write(base64.b64decode(data))
                 uploads.append({"name": fname, "is_img": is_img, "path": fp})
-                # If document, extract text and send to OpenAI as a message (system or user)
                 if _is_supported_doc(n) and not is_img:
                     text = _extract_text_from_upload(fp, ext)
                     if text.strip():
-                        doc_intro = f"(User uploaded document '{n}'; content below):\n\n{text[:3800]}"
-                        # Add as user message and trigger streaming
-                        state["messages"].append({"role": "user", "content": doc_intro})
-                        state["streaming"] = True
-                        state["stream_buffer"] = ""
-                        file_was_uploaded_and_sent = True
-                        logger.info(f"Session {session_id}: Uploaded doc '{n}' sent to OpenAI")
             state["uploads"].extend(uploads)
             save_session_state(session_id)
             logger.info(f"Session {session_id}: Uploaded files {[u['name'] for u in uploads]}")
-        # If a supported doc was uploaded, start streaming OpenAI response
-        if file_was_uploaded_and_sent:
-            def run_stream(session_id, messages):
-                try:
-                    system_prompt = load_system_prompt()
-                    msg_list = [{"role": "system", "content": system_prompt}]
-                    for m in messages:
-                        msg_list.append({"role": m["role"], "content": m["content"]})
-                    response = openai.ChatCompletion.create(
-                        model="gpt-3.5-turbo",
-                        messages=msg_list,
-                        max_tokens=700,
-                        temperature=0.2,
-                        stream=True,
-                    )
-                    reply = ""
-                    for chunk in response:
-                        delta = chunk["choices"][0]["delta"]
-                        content = delta.get("content", "")
-                        if content:
-                            reply += content
-                            session_lock = get_session_lock(session_id)
-                            with session_lock:
-                                load_session_state(session_id)
-                                state = get_session_state(session_id)
-                                state["stream_buffer"] = reply
-                                save_session_state(session_id)
-                    session_lock = get_session_lock(session_id)
-                    with session_lock:
-                        load_session_state(session_id)
-                        state = get_session_state(session_id)
-                        state["messages"].append({"role": "assistant", "content": reply})
-                        state["stream_buffer"] = ""
-                        state["streaming"] = False
-                        save_session_state(session_id)
-                    logger.info(f"Session {session_id}: Doc Q&A: Assistant: {reply}")
-                except Exception as e:
-                    session_lock = get_session_lock(session_id)
-                    with session_lock:
-                        load_session_state(session_id)
-                        state = get_session_state(session_id)
-                        state["streaming"] = False
-                        state["stream_buffer"] = ""
-                        save_session_state(session_id)
-                    logger.error(f"Session {session_id}: Streaming error (doc upload): {e}")
-            threading.Thread(target=run_stream, args=(session_id, list(state["messages"])), daemon=True).start()
-            start_streaming = True
         # Handle Send
-        if trigger == "send-btn" and user_input and user_input.strip():
-            state["messages"].append({"role": "user", "content": user_input})
             state["streaming"] = True
             state["stream_buffer"] = ""
             save_session_state(session_id)
-            def run_stream(session_id, messages):
                 try:
                     system_prompt = load_system_prompt()
                     msg_list = [{"role": "system", "content": system_prompt}]
                     for m in messages:
                         msg_list.append({"role": m["role"], "content": m["content"]})
                     response = openai.ChatCompletion.create(
@@ -461,7 +530,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
                         state["stream_buffer"] = ""
                         state["streaming"] = False
                         save_session_state(session_id)
-                    logger.info(f"Session {session_id}: User: {user_input} | Assistant: {reply}")
                 except Exception as e:
                     session_lock = get_session_lock(session_id)
                     with session_lock:
@@ -472,7 +541,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
                         save_session_state(session_id)
                     logger.error(f"Session {session_id}: Streaming error: {e}")
-            threading.Thread(target=run_stream, args=(session_id, list(state["messages"])), daemon=True).start()
             start_streaming = True
         # Handle New Chat button logic: auto-name and reset
@@ -577,7 +646,7 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
                 chat_cards.append(chat_message_card(state["stream_buffer"], is_user=False))
             return upload_cards, chat_history_items, chat_cards, error, False, 0, "", selected_history
         # Always clear input after send
-        if trigger == "send-btn":
             return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, "", selected_history
         return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, user_input or "", selected_history

 import base64
 import datetime
 from werkzeug.utils import secure_filename
+import numpy as np
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
 logger = logging.getLogger("AskTricare")
 embed_docs_folder()
+def embed_user_doc(session_id, filename, text):
+    session_dir = get_session_dir(session_id)
+    if not text.strip():
+        return
+    try:
+        chunk = text[:4000]
+        response = openai.Embedding.create(
+            input=[chunk],
+            model=EMBEDDING_MODEL
+        )
+        embedding = response['data'][0]['embedding']
+        user_embeds_path = os.path.join(session_dir, "user_embeds.json")
+        if os.path.exists(user_embeds_path):
+            with open(user_embeds_path, "r") as f:
+                user_embeds = json.load(f)
+        else:
+            user_embeds = {"embeddings": [], "texts": [], "filenames": []}
+        user_embeds["embeddings"].append(embedding)
+        user_embeds["texts"].append(chunk)
+        user_embeds["filenames"].append(filename)
+        with open(user_embeds_path, "w") as f:
+            json.dump(user_embeds, f)
+        logger.info(f"Session {session_id}: Embedded user doc {filename}")
+    except Exception as e:
+        logger.error(f"Session {session_id}: Failed to embed user doc {filename}: {e}")
+def get_user_embeddings(session_id):
+    session_dir = get_session_dir(session_id)
+    user_embeds_path = os.path.join(session_dir, "user_embeds.json")
+    if os.path.exists(user_embeds_path):
+        with open(user_embeds_path, "r") as f:
+            d = json.load(f)
+        embeds = np.array(d.get("embeddings", []))
+        texts = d.get("texts", [])
+        filenames = d.get("filenames", [])
+        return embeds, texts, filenames
+    return np.array([]), [], []
+def semantic_search(query, embed_matrix, texts, filenames, top_k=2):
+    if len(embed_matrix) == 0:
+        return []
+    try:
+        q_embed = openai.Embedding.create(input=[query], model=EMBEDDING_MODEL)["data"][0]["embedding"]
+        q_embed = np.array(q_embed)
+        embed_matrix = np.array(embed_matrix)
+        scores = np.dot(embed_matrix, q_embed) / (np.linalg.norm(embed_matrix, axis=1) * np.linalg.norm(q_embed) + 1e-8)
+        idx = np.argsort(scores)[::-1][:top_k]
+        results = []
+        for i in idx:
+            results.append({"filename": filenames[i], "text": texts[i], "score": float(scores[i])})
+        return results
+    except Exception as e:
+        logger.error(f"Semantic search error: {e}")
+        return []
 app = dash.Dash(
     __name__,
     server=app_flask,
                     placeholder="Type your question...",
                     style={"width": "100%", "height": "60px", "resize": "vertical", "wordWrap": "break-word"},
                     wrap="soft",
+                    maxLength=1000,
+                    n_submit=0,
+                    n_blur=0,
                 ),
+                dcc.Store(id="enter-triggered", data=False),
                 html.Div([
                     dbc.Button("Send", id="send-btn", color="primary", className="mt-2 me-2", style={"minWidth": "100px"}),
                     dbc.Button("New Chat", id="new-chat-btn", color="secondary", className="mt-2", style={"minWidth": "110px"}),
                 ], style={"float": "right", "display": "flex", "gap": "0.5rem"}),
+                dcc.Store(id="user-input-store", data="", storage_type="session"),
+                html.Button(id='hidden-send', style={'display': 'none'})
             ], style={"marginTop": "1rem"}),
             html.Div(id="error-message", style={"color": "#bb2124", "marginTop": "0.5rem"}),
+            dcc.Store(id="should-clear-input", data=False)
         ])
     )
         chat_box_card(),
         user_input_card(),
         dcc.Loading(id="loading", type="default", fullscreen=False, style={"position": "absolute", "top": "5%", "left": "50%"}),
+        dcc.Interval(id="stream-interval", interval=400, n_intervals=0, disabled=True, max_intervals=1000),
+        dcc.Store(id="client-question", data="")
     ], style={"padding": "1rem", "backgroundColor": "#fff", "height": "100vh", "overflowY": "auto"})
 app.layout = html.Div([
         html.Div(right_main_static(), id='right-main', style={"marginLeft": "30vw", "width": "70vw", "overflowY": "auto"})
     ], style={"display": "flex"}),
     dcc.Store(id="clear-input", data=False),
+    dcc.Store(id="scroll-bottom", data=0),
+    # clientside callback for textarea enter/shift-enter
+    dcc.Store(id="enter-pressed", data=False)
 ])
+# JS callback to intercept Enter/Shift+Enter for dcc.Textarea
+app.clientside_callback(
+    """
+    function(n, value) {
+        var ta = document.getElementById('user-input');
+        if (!ta) return window.dash_clientside.no_update;
+        if (!window._asktricare_enter_handler) {
+            ta.addEventListener('keydown', function(e) {
+                if (e.key === 'Enter' && !e.shiftKey) {
+                    e.preventDefault();
+                    var btn = document.getElementById('hidden-send');
+                    if (btn) btn.click();
+                }
+            });
+            window._asktricare_enter_handler = true;
+        }
+        return window.dash_clientside.no_update;
+    }
+    """,
+    Output('enter-pressed', 'data'),
+    Input('user-input', 'n_blur'),
+    State('user-input', 'value')
+)
 def _is_supported_doc(filename):
     ext = os.path.splitext(filename)[1].lower()
     return ext in [".txt", ".pdf", ".md", ".docx"]
         except Exception as e:
             logger.error(f"Error reading {filepath}: {e}")
             return ""
     else:
         return ""
     Input("new-chat-btn", "n_clicks"),
     Input("stream-interval", "n_intervals"),
     Input({"type": "chat-history-item", "index": dash.ALL}, "n_clicks"),
+    Input('hidden-send', 'n_clicks'),
     State("file-upload", "filename"),
     State("user-input", "value"),
     State("selected-history", "data"),
     State("chat-history-list", "children"),
     prevent_initial_call=False
 )
+def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, stream_n, chat_history_clicks, hidden_send_clicks, file_names, user_input, selected_history, chat_history_list_children):
     trigger = callback_context.triggered[0]['prop_id'].split('.')[0] if callback_context.triggered else ""
     session_id = session_id or get_session_id()
     session_lock = get_session_lock(session_id)
                 history_index_clicked
             )
+        # Handle File Upload
         file_was_uploaded_and_sent = False
         if trigger == "file-upload" and file_contents and file_names:
             uploads = []
                 with open(fp, "wb") as f:
                     f.write(base64.b64decode(data))
                 uploads.append({"name": fname, "is_img": is_img, "path": fp})
                 if _is_supported_doc(n) and not is_img:
                     text = _extract_text_from_upload(fp, ext)
                     if text.strip():
+                        embed_user_doc(session_id, fname, text)
+                        logger.info(f"Session {session_id}: Uploaded doc '{n}' embedded for user vector store")
             state["uploads"].extend(uploads)
             save_session_state(session_id)
             logger.info(f"Session {session_id}: Uploaded files {[u['name'] for u in uploads]}")
+        # Determine if send was triggered (via send-btn, hidden-send, or enter)
+        send_triggered = False
+        if trigger == "send-btn" or trigger == "hidden-send":
+            send_triggered = True
         # Handle Send
+        if send_triggered and user_input and user_input.strip():
+            question = user_input.strip()
+            state["messages"].append({"role": "user", "content": question})
             state["streaming"] = True
             state["stream_buffer"] = ""
             save_session_state(session_id)
+            def run_stream(session_id, messages, question):
                 try:
                     system_prompt = load_system_prompt()
+                    # Retrieve relevant context from global RAG
+                    rag_chunks = []
+                    try:
+                        # Search global docs
+                        global_embeds = []
+                        global_texts = []
+                        global_fnames = []
+                        for fname, emb in EMBEDDING_INDEX.items():
+                            global_embeds.append(emb)
+                            global_texts.append(EMBEDDING_TEXTS[fname])
+                            global_fnames.append(fname)
+                        global_rag = semantic_search(question, global_embeds, global_texts, global_fnames, top_k=2)
+                        if global_rag:
+                            for r in global_rag:
+                                rag_chunks.append(f"Global doc [{r['filename']}]:\n{r['text'][:1000]}")
+                        # Search user docs
+                        user_embeds, user_texts, user_fnames = get_user_embeddings(session_id)
+                        user_rag = semantic_search(question, user_embeds, user_texts, user_fnames, top_k=2)
+                        if user_rag:
+                            for r in user_rag:
+                                rag_chunks.append(f"User upload [{r['filename']}]:\n{r['text'][:1000]}")
+                    except Exception as e:
+                        logger.error(f"Session {session_id}: RAG error: {e}")
+                    context_block = ""
+                    if rag_chunks:
+                        context_block = "The following sources may help answer the question:\n\n" + "\n\n".join(rag_chunks) + "\n\n"
                     msg_list = [{"role": "system", "content": system_prompt}]
+                    if context_block:
+                        msg_list.append({"role": "system", "content": context_block})
                     for m in messages:
                         msg_list.append({"role": m["role"], "content": m["content"]})
                     response = openai.ChatCompletion.create(
                         state["stream_buffer"] = ""
                         state["streaming"] = False
                         save_session_state(session_id)
+                    logger.info(f"Session {session_id}: User: {question} | Assistant: {reply}")
                 except Exception as e:
                     session_lock = get_session_lock(session_id)
                     with session_lock:
                         save_session_state(session_id)
                     logger.error(f"Session {session_id}: Streaming error: {e}")
+            threading.Thread(target=run_stream, args=(session_id, list(state["messages"]), question), daemon=True).start()
             start_streaming = True
         # Handle New Chat button logic: auto-name and reset
                 chat_cards.append(chat_message_card(state["stream_buffer"], is_user=False))
             return upload_cards, chat_history_items, chat_cards, error, False, 0, "", selected_history
         # Always clear input after send
+        if send_triggered:
             return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, "", selected_history
         return upload_cards, chat_history_items, chat_cards, error, (not state.get("streaming", False)), 0, user_input or "", selected_history