IAMTFRMZA commited on
Commit
90e2f9d
Β·
verified Β·
1 Parent(s): 39ae89b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -18
app.py CHANGED
@@ -2,18 +2,23 @@ import streamlit as st
2
  import os
3
  import time
4
  import re
 
 
 
 
 
5
  from openai import OpenAI
 
6
 
7
  # ------------------ App Configuration ------------------
8
  st.set_page_config(page_title="Document AI Assistant", layout="wide")
9
  st.title("πŸ“„ Document AI Assistant")
10
- st.caption("Chat with an AI Assistant on your medical/pathology documents")
11
 
12
- # ------------------ Load API Key and Assistant ID from Hugging Face Secrets ------------------
13
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
14
  ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
15
 
16
- # ------------------ Error Handling for Missing Secrets ------------------
17
  if not OPENAI_API_KEY or not ASSISTANT_ID:
18
  st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
19
  st.stop()
@@ -30,7 +35,7 @@ if "image_url" not in st.session_state:
30
  if "image_updated" not in st.session_state:
31
  st.session_state.image_updated = False
32
 
33
- # ------------------ Sidebar Controls ------------------
34
  st.sidebar.header("πŸ”§ Settings")
35
  if st.sidebar.button("πŸ”„ Clear Chat"):
36
  st.session_state.messages = []
@@ -41,21 +46,20 @@ if st.sidebar.button("πŸ”„ Clear Chat"):
41
 
42
  show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
43
 
44
- # ------------------ Layout: Image (Left) and Chat (Right) ------------------
45
  col1, col2 = st.columns([1, 2])
46
 
47
- # ------------------ Left Panel: Document Image ------------------
48
  with col1:
49
  if show_image and st.session_state.image_url:
50
  st.image(st.session_state.image_url, caption="πŸ“‘ Extracted Page", use_container_width=True)
51
  st.session_state.image_updated = False
52
 
53
- # ------------------ Right Panel: Chat Interface ------------------
54
  with col2:
55
- # -- Chat Input (Always at the Top)
56
  prompt = st.chat_input("Type your question about the document...")
57
 
58
- # -- Pair messages (user + assistant) for display
59
  paired_messages = []
60
  buffer = []
61
  for msg in st.session_state.messages:
@@ -66,37 +70,33 @@ with col2:
66
  if buffer:
67
  paired_messages.append(buffer.copy())
68
 
69
- # -- Display chat history (latest first, older scroll down)
70
  with st.container():
71
  for pair in reversed(paired_messages):
72
  for msg in pair:
73
  with st.chat_message(msg["role"]):
74
  st.write(msg["content"])
75
 
76
- # -- Process new prompt if entered
77
  if prompt:
78
  st.session_state.messages.append({"role": "user", "content": prompt})
79
 
80
  try:
81
- # Initialize thread if not already done
82
  if st.session_state.thread_id is None:
83
  thread = client.beta.threads.create()
84
  st.session_state.thread_id = thread.id
85
 
86
- # Send user message to assistant
87
  client.beta.threads.messages.create(
88
  thread_id=st.session_state.thread_id,
89
  role="user",
90
  content=prompt
91
  )
92
 
93
- # Trigger assistant run
94
  run = client.beta.threads.runs.create(
95
  thread_id=st.session_state.thread_id,
96
  assistant_id=ASSISTANT_ID
97
  )
98
 
99
- # Wait for assistant to respond
100
  with st.spinner("Assistant is thinking..."):
101
  while True:
102
  run_status = client.beta.threads.runs.retrieve(
@@ -107,7 +107,6 @@ with col2:
107
  break
108
  time.sleep(1)
109
 
110
- # Retrieve latest assistant message
111
  messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id)
112
  assistant_message = None
113
  for message in reversed(messages.data):
@@ -117,7 +116,6 @@ with col2:
117
 
118
  st.session_state.messages.append({"role": "assistant", "content": assistant_message})
119
 
120
- # -- Extract GitHub-hosted image URL if present
121
  image_match = re.search(
122
  r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
123
  assistant_message
@@ -126,7 +124,48 @@ with col2:
126
  st.session_state.image_url = image_match.group(0)
127
  st.session_state.image_updated = True
128
 
129
- st.rerun() # Rerun to show updated image + message at top
130
 
131
  except Exception as e:
132
  st.error(f"❌ Error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  import time
4
  import re
5
+ import uuid
6
+ import threading
7
+ import sounddevice as sd
8
+ import numpy as np
9
+
10
  from openai import OpenAI
11
+ from realtime_transcriber import WebSocketClient, connections, WEBSOCKET_URI, WEBSOCKET_HEADERS
12
 
13
  # ------------------ App Configuration ------------------
14
  st.set_page_config(page_title="Document AI Assistant", layout="wide")
15
  st.title("πŸ“„ Document AI Assistant")
16
+ st.caption("Chat with an AI Assistant on your medical/pathology documents and voice")
17
 
18
+ # ------------------ Load API Key and Assistant ID ------------------
19
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
20
  ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
21
 
 
22
  if not OPENAI_API_KEY or not ASSISTANT_ID:
23
  st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
24
  st.stop()
 
35
  if "image_updated" not in st.session_state:
36
  st.session_state.image_updated = False
37
 
38
+ # ------------------ Sidebar ------------------
39
  st.sidebar.header("πŸ”§ Settings")
40
  if st.sidebar.button("πŸ”„ Clear Chat"):
41
  st.session_state.messages = []
 
46
 
47
  show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
48
 
49
+ # ------------------ Section 1: Layout (Image + Chat) ------------------
50
  col1, col2 = st.columns([1, 2])
51
 
52
+ # ----- Left Panel: Document Image -----
53
  with col1:
54
  if show_image and st.session_state.image_url:
55
  st.image(st.session_state.image_url, caption="πŸ“‘ Extracted Page", use_container_width=True)
56
  st.session_state.image_updated = False
57
 
58
+ # ----- Right Panel: Chat Assistant -----
59
  with col2:
 
60
  prompt = st.chat_input("Type your question about the document...")
61
 
62
+ # Pair user + assistant messages
63
  paired_messages = []
64
  buffer = []
65
  for msg in st.session_state.messages:
 
70
  if buffer:
71
  paired_messages.append(buffer.copy())
72
 
73
+ # Render messages from newest to oldest
74
  with st.container():
75
  for pair in reversed(paired_messages):
76
  for msg in pair:
77
  with st.chat_message(msg["role"]):
78
  st.write(msg["content"])
79
 
80
+ # Handle new prompt
81
  if prompt:
82
  st.session_state.messages.append({"role": "user", "content": prompt})
83
 
84
  try:
 
85
  if st.session_state.thread_id is None:
86
  thread = client.beta.threads.create()
87
  st.session_state.thread_id = thread.id
88
 
 
89
  client.beta.threads.messages.create(
90
  thread_id=st.session_state.thread_id,
91
  role="user",
92
  content=prompt
93
  )
94
 
 
95
  run = client.beta.threads.runs.create(
96
  thread_id=st.session_state.thread_id,
97
  assistant_id=ASSISTANT_ID
98
  )
99
 
 
100
  with st.spinner("Assistant is thinking..."):
101
  while True:
102
  run_status = client.beta.threads.runs.retrieve(
 
107
  break
108
  time.sleep(1)
109
 
 
110
  messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id)
111
  assistant_message = None
112
  for message in reversed(messages.data):
 
116
 
117
  st.session_state.messages.append({"role": "assistant", "content": assistant_message})
118
 
 
119
  image_match = re.search(
120
  r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
121
  assistant_message
 
124
  st.session_state.image_url = image_match.group(0)
125
  st.session_state.image_updated = True
126
 
127
+ st.rerun()
128
 
129
  except Exception as e:
130
  st.error(f"❌ Error: {str(e)}")
131
+
132
+ # ------------------ Section 3: Voice Transcription ------------------
133
+ st.markdown("---")
134
+ st.markdown("## πŸŽ™οΈ Real-Time Voice Transcription")
135
+
136
+ # Init client
137
+ if "voice_client_id" not in st.session_state:
138
+ client_id = str(uuid.uuid4())
139
+ st.session_state.voice_client_id = client_id
140
+ connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
141
+ threading.Thread(target=connections[client_id].run, daemon=True).start()
142
+
143
+ client_id = st.session_state.voice_client_id
144
+ transcript_placeholder = st.empty()
145
+
146
+ recording = st.checkbox("🎀 Start Recording")
147
+
148
+ if recording:
149
+ st.warning("Recording is active. Speak clearly...")
150
+
151
+ def audio_stream_callback(indata, frames, time_info, status):
152
+ if status:
153
+ print(f"⚠️ Audio status: {status}")
154
+ if client_id in connections:
155
+ connections[client_id].enqueue_audio_chunk(16000, indata.copy())
156
+ transcript_placeholder.markdown(f"**Live Transcript:**\n\n{connections[client_id].transcript}")
157
+
158
+ stream = sd.InputStream(callback=audio_stream_callback, channels=1, samplerate=16000)
159
+ stream.start()
160
+ st.session_state["stream"] = stream
161
+
162
+ else:
163
+ if "stream" in st.session_state:
164
+ st.session_state["stream"].stop()
165
+ del st.session_state["stream"]
166
+ st.success("Recording stopped.")
167
+
168
+ # Final transcript
169
+ if client_id in connections:
170
+ st.markdown("**Final Transcript Output:**")
171
+ st.markdown(connections[client_id].transcript)