witspathology

Sleeping

App Files Files Community

IAMTFRMZA commited on Apr 16

Commit

90e2f9d

verified ·

1 Parent(s): 39ae89b

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -18

app.py CHANGED Viewed

@@ -2,18 +2,23 @@ import streamlit as st
 import os
 import time
 import re
 from openai import OpenAI
 # ------------------ App Configuration ------------------
 st.set_page_config(page_title="Document AI Assistant", layout="wide")
 st.title("📄 Document AI Assistant")
-st.caption("Chat with an AI Assistant on your medical/pathology documents")
-# ------------------ Load API Key and Assistant ID from Hugging Face Secrets ------------------
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
-# ------------------ Error Handling for Missing Secrets ------------------
 if not OPENAI_API_KEY or not ASSISTANT_ID:
     st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
     st.stop()
@@ -30,7 +35,7 @@ if "image_url" not in st.session_state:
 if "image_updated" not in st.session_state:
     st.session_state.image_updated = False
-# ------------------ Sidebar Controls ------------------
 st.sidebar.header("🔧 Settings")
 if st.sidebar.button("🔄 Clear Chat"):
     st.session_state.messages = []
@@ -41,21 +46,20 @@ if st.sidebar.button("🔄 Clear Chat"):
 show_image = st.sidebar.checkbox("📖 Show Document Image", value=True)
-# ------------------ Layout: Image (Left) and Chat (Right) ------------------
 col1, col2 = st.columns([1, 2])
-# ------------------ Left Panel: Document Image ------------------
 with col1:
     if show_image and st.session_state.image_url:
         st.image(st.session_state.image_url, caption="📑 Extracted Page", use_container_width=True)
         st.session_state.image_updated = False
-# ------------------ Right Panel: Chat Interface ------------------
 with col2:
-    # -- Chat Input (Always at the Top)
     prompt = st.chat_input("Type your question about the document...")
-    # -- Pair messages (user + assistant) for display
     paired_messages = []
     buffer = []
     for msg in st.session_state.messages:
@@ -66,37 +70,33 @@ with col2:
     if buffer:
         paired_messages.append(buffer.copy())
-    # -- Display chat history (latest first, older scroll down)
     with st.container():
         for pair in reversed(paired_messages):
             for msg in pair:
                 with st.chat_message(msg["role"]):
                     st.write(msg["content"])
-    # -- Process new prompt if entered
     if prompt:
         st.session_state.messages.append({"role": "user", "content": prompt})
         try:
-            # Initialize thread if not already done
             if st.session_state.thread_id is None:
                 thread = client.beta.threads.create()
                 st.session_state.thread_id = thread.id
-            # Send user message to assistant
             client.beta.threads.messages.create(
                 thread_id=st.session_state.thread_id,
                 role="user",
                 content=prompt
             )
-            # Trigger assistant run
             run = client.beta.threads.runs.create(
                 thread_id=st.session_state.thread_id,
                 assistant_id=ASSISTANT_ID
             )
-            # Wait for assistant to respond
             with st.spinner("Assistant is thinking..."):
                 while True:
                     run_status = client.beta.threads.runs.retrieve(
@@ -107,7 +107,6 @@ with col2:
                         break
                     time.sleep(1)
-            # Retrieve latest assistant message
             messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id)
             assistant_message = None
             for message in reversed(messages.data):
@@ -117,7 +116,6 @@ with col2:
             st.session_state.messages.append({"role": "assistant", "content": assistant_message})
-            # -- Extract GitHub-hosted image URL if present
             image_match = re.search(
                 r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                 assistant_message
@@ -126,7 +124,48 @@ with col2:
                 st.session_state.image_url = image_match.group(0)
                 st.session_state.image_updated = True
-            st.rerun()  # Rerun to show updated image + message at top
         except Exception as e:
             st.error(f"❌ Error: {str(e)}")

 import os
 import time
 import re
+import uuid
+import threading
+import sounddevice as sd
+import numpy as np
 from openai import OpenAI
+from realtime_transcriber import WebSocketClient, connections, WEBSOCKET_URI, WEBSOCKET_HEADERS
 # ------------------ App Configuration ------------------
 st.set_page_config(page_title="Document AI Assistant", layout="wide")
 st.title("📄 Document AI Assistant")
+st.caption("Chat with an AI Assistant on your medical/pathology documents and voice")
+# ------------------ Load API Key and Assistant ID ------------------
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
 if not OPENAI_API_KEY or not ASSISTANT_ID:
     st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
     st.stop()
 if "image_updated" not in st.session_state:
     st.session_state.image_updated = False
+# ------------------ Sidebar ------------------
 st.sidebar.header("🔧 Settings")
 if st.sidebar.button("🔄 Clear Chat"):
     st.session_state.messages = []
 show_image = st.sidebar.checkbox("📖 Show Document Image", value=True)
+# ------------------ Section 1: Layout (Image + Chat) ------------------
 col1, col2 = st.columns([1, 2])
+# ----- Left Panel: Document Image -----
 with col1:
     if show_image and st.session_state.image_url:
         st.image(st.session_state.image_url, caption="📑 Extracted Page", use_container_width=True)
         st.session_state.image_updated = False
+# ----- Right Panel: Chat Assistant -----
 with col2:
     prompt = st.chat_input("Type your question about the document...")
+    # Pair user + assistant messages
     paired_messages = []
     buffer = []
     for msg in st.session_state.messages:
     if buffer:
         paired_messages.append(buffer.copy())
+    # Render messages from newest to oldest
     with st.container():
         for pair in reversed(paired_messages):
             for msg in pair:
                 with st.chat_message(msg["role"]):
                     st.write(msg["content"])
+    # Handle new prompt
     if prompt:
         st.session_state.messages.append({"role": "user", "content": prompt})
         try:
             if st.session_state.thread_id is None:
                 thread = client.beta.threads.create()
                 st.session_state.thread_id = thread.id
             client.beta.threads.messages.create(
                 thread_id=st.session_state.thread_id,
                 role="user",
                 content=prompt
             )
             run = client.beta.threads.runs.create(
                 thread_id=st.session_state.thread_id,
                 assistant_id=ASSISTANT_ID
             )
             with st.spinner("Assistant is thinking..."):
                 while True:
                     run_status = client.beta.threads.runs.retrieve(
                         break
                     time.sleep(1)
             messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id)
             assistant_message = None
             for message in reversed(messages.data):
             st.session_state.messages.append({"role": "assistant", "content": assistant_message})
             image_match = re.search(
                 r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                 assistant_message
                 st.session_state.image_url = image_match.group(0)
                 st.session_state.image_updated = True
+            st.rerun()
         except Exception as e:
             st.error(f"❌ Error: {str(e)}")
+# ------------------ Section 3: Voice Transcription ------------------
+st.markdown("---")
+st.markdown("## 🎙️ Real-Time Voice Transcription")
+# Init client
+if "voice_client_id" not in st.session_state:
+    client_id = str(uuid.uuid4())
+    st.session_state.voice_client_id = client_id
+    connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
+    threading.Thread(target=connections[client_id].run, daemon=True).start()
+client_id = st.session_state.voice_client_id
+transcript_placeholder = st.empty()
+recording = st.checkbox("🎤 Start Recording")
+if recording:
+    st.warning("Recording is active. Speak clearly...")
+    def audio_stream_callback(indata, frames, time_info, status):
+        if status:
+            print(f"⚠️ Audio status: {status}")
+        if client_id in connections:
+            connections[client_id].enqueue_audio_chunk(16000, indata.copy())
+            transcript_placeholder.markdown(f"**Live Transcript:**\n\n{connections[client_id].transcript}")
+    stream = sd.InputStream(callback=audio_stream_callback, channels=1, samplerate=16000)
+    stream.start()
+    st.session_state["stream"] = stream
+else:
+    if "stream" in st.session_state:
+        st.session_state["stream"].stop()
+        del st.session_state["stream"]
+        st.success("Recording stopped.")
+# Final transcript
+if client_id in connections:
+    st.markdown("**Final Transcript Output:**")
+    st.markdown(connections[client_id].transcript)