IAMTFRMZA commited on
Commit
c7d0024
Β·
verified Β·
1 Parent(s): d09f114

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -115
app.py CHANGED
@@ -10,76 +10,48 @@ from urllib.parse import quote
10
  from openai import OpenAI
11
 
12
  # ------------------ App Configuration ------------------
13
- st.set_page_config(page_title="Schlaeger Forrestdale DocAIA", layout="wide", initial_sidebar_state="collapsed")
14
  st.title("πŸ“„ Schlaeger Forrestdale Document Assistant")
15
- st.caption("Explore City of Armadale construction documents using AI + OCR 🧠")
16
 
17
  # ------------------ Load API Key ------------------
18
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
19
  if not OPENAI_API_KEY:
20
- st.error("❌ Missing OPENAI_API_KEY in Hugging Face Space secrets.")
21
  st.stop()
22
 
23
  client = OpenAI(api_key=OPENAI_API_KEY)
24
 
25
- # ------------------ Tabs ------------------
26
- tabs = st.tabs(["πŸ“œ Contract", "πŸ“ Technical"])
27
 
28
- # ================== CONTRACT TAB ==================
29
  with tabs[0]:
30
- ASSISTANT_ID = "asst_KsQRedoJUnEeStzfox1o06lO"
 
 
 
31
  if "contract_messages" not in st.session_state:
32
  st.session_state.contract_messages = []
33
  if "contract_thread_id" not in st.session_state:
34
  st.session_state.contract_thread_id = None
35
- if "image_url" not in st.session_state:
36
- st.session_state.image_url = None
37
-
38
- st.sidebar.header("πŸ“˜ Contract Tools")
39
- if st.sidebar.button("🧹 Clear Chat", key="clear_contract"):
40
- st.session_state.contract_messages = []
41
- st.session_state.contract_thread_id = None
42
- st.session_state.image_url = None
43
- st.rerun()
44
-
45
- show_image = st.sidebar.toggle("πŸ“‘ Show Page Image", value=True, key="show_image_toggle")
46
-
47
- keyword = st.sidebar.text_input("Search by Keyword", key="kw")
48
- if st.sidebar.button("πŸ”Ž Search", key="kw_btn") and keyword:
49
- st.session_state.contract_messages.append({"role": "user", "content": f"Find clauses or references related to: {keyword}"})
50
-
51
- section = st.sidebar.selectbox("πŸ“„ Jump to Section", [
52
- "Select a section...",
53
- "1. Formal Instrument of Contract",
54
- "2. Offer and Acceptance",
55
- "3. Key Personnel",
56
- "4. Contract Pricing",
57
- "5. Specifications",
58
- "6. WHS Policies",
59
- "7. Penalties and Delays",
60
- "8. Dispute Resolution",
61
- "9. Principal Obligations"
62
- ])
63
- if section != "Select a section...":
64
- st.session_state.contract_messages.append({"role": "user", "content": f"Summarize or list key points from section: {section}"})
65
-
66
- action = st.sidebar.selectbox("βš™οΈ Common Queries", [
67
- "Select an action...",
68
- "List all contractual obligations",
69
- "Summarize payment terms",
70
- "List WHS responsibilities",
71
- "Find delay-related penalties",
72
- "Extract dispute resolution steps"
73
- ])
74
- if action != "Select an action...":
75
- st.session_state.contract_messages.append({"role": "user", "content": action})
76
-
77
- chat_col, img_col = st.columns([2, 1])
78
- with chat_col:
79
- st.markdown("### 🧠 Ask Contract Document Question")
80
- user_input = st.chat_input("Ask something about the contract")
81
- if user_input:
82
- st.session_state.contract_messages.append({"role": "user", "content": user_input})
83
 
84
  if st.session_state.contract_messages and st.session_state.contract_messages[-1]["role"] == "user":
85
  try:
@@ -92,38 +64,43 @@ with tabs[0]:
92
  role="user",
93
  content=st.session_state.contract_messages[-1]["content"]
94
  )
 
95
  run = client.beta.threads.runs.create(
96
  thread_id=st.session_state.contract_thread_id,
97
- assistant_id=ASSISTANT_ID
98
  )
99
 
100
- with st.spinner("πŸ€– Analyzing contract..."):
101
  while True:
102
- status = client.beta.threads.runs.retrieve(
103
  thread_id=st.session_state.contract_thread_id,
104
  run_id=run.id
105
  )
106
- if status.status in ("completed", "failed", "cancelled"):
107
  break
108
  time.sleep(1)
109
 
110
- if status.status == "completed":
111
  messages = client.beta.threads.messages.list(thread_id=st.session_state.contract_thread_id)
112
- for m in reversed(messages.data):
113
- if m.role == "assistant":
114
- content = m.content[0].text.value
115
- st.session_state.contract_messages.append({"role": "assistant", "content": content})
116
 
117
- match = re.search(r'Document Reference:\s*(.*?),\s*Page\s*(\d+)', content)
118
  if match:
119
- doc, page = match.group(1).strip(), int(match.group(2))
 
120
  page_str = f"{page:04d}"
121
- folder = quote(doc)
122
- st.session_state.image_url = f"https://raw.githubusercontent.com/AndrewLORTech/c2ozschlaegerforrestdale/main/{folder}/{folder}_page_{page_str}.png"
 
 
123
  break
124
  else:
125
- st.error("❌ Assistant failed.")
126
  st.rerun()
 
127
  except Exception as e:
128
  st.error(f"❌ Error: {e}")
129
 
@@ -131,31 +108,32 @@ with tabs[0]:
131
  with st.chat_message(msg["role"]):
132
  st.markdown(msg["content"], unsafe_allow_html=True)
133
 
134
- with img_col:
135
- if show_image and st.session_state.image_url:
136
- with st.spinner("Loading image..."):
137
- try:
138
- res = requests.get(st.session_state.image_url)
139
- img = Image.open(BytesIO(res.content))
140
- st.image(img, caption="πŸ“„ OCR Page Image", use_container_width=True)
141
- except:
142
- st.error("⚠️ Failed to load image.")
143
-
144
- # ================== TECHNICAL TAB ==================
145
  with tabs[1]:
146
- ASSISTANT_ID = "asst_DjvuWBc7tCvMbAhY7n1em4BZ"
 
147
  if "tech_messages" not in st.session_state:
148
  st.session_state.tech_messages = []
149
  if "tech_thread_id" not in st.session_state:
150
  st.session_state.tech_thread_id = None
151
- if "results" not in st.session_state:
152
- st.session_state.results = []
153
- if "lightbox_url" not in st.session_state:
154
- st.session_state.lightbox_url = None
155
 
156
- prompt = st.chat_input("Ask about plans, drawings or components")
157
- if prompt:
158
- st.session_state.tech_messages.append({"role": "user", "content": prompt})
159
 
160
  if st.session_state.tech_messages and st.session_state.tech_messages[-1]["role"] == "user":
161
  try:
@@ -168,51 +146,54 @@ with tabs[1]:
168
  role="user",
169
  content=st.session_state.tech_messages[-1]["content"]
170
  )
 
171
  run = client.beta.threads.runs.create(
172
  thread_id=st.session_state.tech_thread_id,
173
- assistant_id=ASSISTANT_ID
174
  )
175
 
176
- with st.spinner("πŸ€– Querying technical documents..."):
177
  while True:
178
- status = client.beta.threads.runs.retrieve(
179
  thread_id=st.session_state.tech_thread_id,
180
  run_id=run.id
181
  )
182
- if status.status in ("completed", "failed", "cancelled"):
183
  break
184
  time.sleep(1)
185
 
186
- if status.status == "completed":
187
  messages = client.beta.threads.messages.list(thread_id=st.session_state.tech_thread_id)
188
- for m in reversed(messages.data):
189
- if m.role == "assistant":
190
- content = m.content[0].text.value
191
- st.session_state.tech_messages.append({"role": "assistant", "content": content})
192
  try:
193
- json_data = json.loads(content.strip("`json "))
194
- st.session_state.results = json_data
195
  except:
196
- st.session_state.results = []
197
  break
198
  else:
199
- st.error("⚠️ Assistant failed to complete.")
200
  st.rerun()
 
201
  except Exception as e:
202
  st.error(f"❌ Error: {e}")
203
 
204
- if st.session_state.results:
205
- disciplines = sorted(set(d.get("discipline", "") for d in st.session_state.results))
206
- selected = st.selectbox("🌍 Filter by discipline", ["All"] + disciplines)
207
  page_size = 8
208
  page_num = st.number_input("Page", min_value=1, step=1, value=1)
209
 
210
- filtered = [r for r in st.session_state.results if selected == "All" or r.get("discipline") == selected]
211
- paged = filtered[(page_num - 1) * page_size : page_num * page_size]
212
 
213
  st.markdown("---")
214
  st.subheader("πŸ“‚ Drawing Results")
215
  cols = st.columns(4)
 
216
  for i, item in enumerate(paged):
217
  with cols[i % 4]:
218
  st.markdown(f"**{item['drawing_number']}**")
@@ -220,15 +201,16 @@ with tabs[1]:
220
  st.caption(item.get("summary", ""))
221
  for url in item.get("images", [])[:1]:
222
  if st.button("πŸ–ΌοΈ View Image", key=f"view_{i}"):
223
- st.session_state.lightbox_url = url
224
-
225
- if st.session_state.lightbox_url:
226
- st.markdown("---")
227
- st.image(st.session_state.lightbox_url, use_column_width=True, caption="πŸ” Enlarged Preview")
228
- if st.button("❌ Close Viewer"):
229
- st.session_state.lightbox_url = None
230
- st.rerun()
 
231
  else:
232
  for msg in st.session_state.tech_messages:
233
  with st.chat_message(msg["role"]):
234
- st.markdown(msg["content"], unsafe_allow_html=True)
 
10
  from openai import OpenAI
11
 
12
  # ------------------ App Configuration ------------------
13
+ st.set_page_config(page_title="Schlaeger Forrestdale TechDocAIA", layout="wide", initial_sidebar_state="collapsed")
14
  st.title("πŸ“„ Schlaeger Forrestdale Document Assistant")
15
+ st.caption("Explore City of Armadale construction documents using AI + OCR 🧐")
16
 
17
  # ------------------ Load API Key ------------------
18
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
19
  if not OPENAI_API_KEY:
20
+ st.error("❌ Missing OPENAI_API_KEY. Please set it in Hugging Face Space secrets.")
21
  st.stop()
22
 
23
  client = OpenAI(api_key=OPENAI_API_KEY)
24
 
25
+ # ------------------ Tabs Setup ------------------
26
+ tabs = st.tabs(["πŸ“‘ Contract", "πŸ“ Technical"])
27
 
28
+ # ------------------ Contract Tab ------------------
29
  with tabs[0]:
30
+ from urllib.parse import quote
31
+
32
+ ASSISTANT_ID_CONTRACT = "asst_KsQRedoJUnEeStzfox1o06lO"
33
+
34
  if "contract_messages" not in st.session_state:
35
  st.session_state.contract_messages = []
36
  if "contract_thread_id" not in st.session_state:
37
  st.session_state.contract_thread_id = None
38
+ if "contract_image_url" not in st.session_state:
39
+ st.session_state.contract_image_url = None
40
+ if "contract_image_updated" not in st.session_state:
41
+ st.session_state.contract_image_updated = False
42
+ if "contract_pending_prompt" not in st.session_state:
43
+ st.session_state.contract_pending_prompt = None
44
+
45
+ col1, col2 = st.columns([2, 1])
46
+ with col1:
47
+ st.markdown("### 🧠 Ask a Document-Specific Question")
48
+ user_prompt = st.chat_input("Example: What is the defects liability period?")
49
+
50
+ if user_prompt:
51
+ st.session_state.contract_messages.append({"role": "user", "content": user_prompt})
52
+ elif st.session_state.contract_pending_prompt:
53
+ st.session_state.contract_messages.append({"role": "user", "content": st.session_state.contract_pending_prompt})
54
+ st.session_state.contract_pending_prompt = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  if st.session_state.contract_messages and st.session_state.contract_messages[-1]["role"] == "user":
57
  try:
 
64
  role="user",
65
  content=st.session_state.contract_messages[-1]["content"]
66
  )
67
+
68
  run = client.beta.threads.runs.create(
69
  thread_id=st.session_state.contract_thread_id,
70
+ assistant_id=ASSISTANT_ID_CONTRACT
71
  )
72
 
73
+ with st.spinner("πŸ€– Parsing and responding with referenced content..."):
74
  while True:
75
+ run_status = client.beta.threads.runs.retrieve(
76
  thread_id=st.session_state.contract_thread_id,
77
  run_id=run.id
78
  )
79
+ if run_status.status in ("completed", "failed", "cancelled"):
80
  break
81
  time.sleep(1)
82
 
83
+ if run_status.status == "completed":
84
  messages = client.beta.threads.messages.list(thread_id=st.session_state.contract_thread_id)
85
+ for message in reversed(messages.data):
86
+ if message.role == "assistant":
87
+ assistant_reply = message.content[0].text.value
88
+ st.session_state.contract_messages.append({"role": "assistant", "content": assistant_reply})
89
 
90
+ match = re.search(r'Document Reference:\s*(.*?),\s*Page\s*(\d+)', assistant_reply)
91
  if match:
92
+ doc_name = match.group(1).strip()
93
+ page = int(match.group(2))
94
  page_str = f"{page:04d}"
95
+ folder = quote(doc_name)
96
+ image_url = f"https://raw.githubusercontent.com/AndrewLORTech/c2ozschlaegerforrestdale/main/{folder}/{folder}_page_{page_str}.png"
97
+ st.session_state.contract_image_url = image_url
98
+ st.session_state.contract_image_updated = True
99
  break
100
  else:
101
+ st.error(f"⚠️ Assistant failed: {run_status.status}")
102
  st.rerun()
103
+
104
  except Exception as e:
105
  st.error(f"❌ Error: {e}")
106
 
 
108
  with st.chat_message(msg["role"]):
109
  st.markdown(msg["content"], unsafe_allow_html=True)
110
 
111
+ with col2:
112
+ if st.session_state.contract_image_url:
113
+ try:
114
+ response = requests.get(st.session_state.contract_image_url)
115
+ response.raise_for_status()
116
+ img = Image.open(BytesIO(response.content))
117
+ st.image(img, caption="πŸ“„ OCR Page Image", use_container_width=True)
118
+ except Exception as e:
119
+ st.error(f"❗ Failed to load image: {e}")
120
+
121
+ # ------------------ Technical Tab ------------------
122
  with tabs[1]:
123
+ ASSISTANT_ID_TECHNICAL = "asst_DjvuWBc7tCvMbAhY7n1em4BZ"
124
+
125
  if "tech_messages" not in st.session_state:
126
  st.session_state.tech_messages = []
127
  if "tech_thread_id" not in st.session_state:
128
  st.session_state.tech_thread_id = None
129
+ if "tech_results" not in st.session_state:
130
+ st.session_state.tech_results = []
131
+ if "tech_lightbox_url" not in st.session_state:
132
+ st.session_state.tech_lightbox_url = None
133
 
134
+ user_prompt = st.chat_input("Ask about plans, drawings or components")
135
+ if user_prompt:
136
+ st.session_state.tech_messages.append({"role": "user", "content": user_prompt})
137
 
138
  if st.session_state.tech_messages and st.session_state.tech_messages[-1]["role"] == "user":
139
  try:
 
146
  role="user",
147
  content=st.session_state.tech_messages[-1]["content"]
148
  )
149
+
150
  run = client.beta.threads.runs.create(
151
  thread_id=st.session_state.tech_thread_id,
152
+ assistant_id=ASSISTANT_ID_TECHNICAL
153
  )
154
 
155
+ with st.spinner("πŸ€– Parsing and responding..."):
156
  while True:
157
+ run_status = client.beta.threads.runs.retrieve(
158
  thread_id=st.session_state.tech_thread_id,
159
  run_id=run.id
160
  )
161
+ if run_status.status in ("completed", "failed", "cancelled"):
162
  break
163
  time.sleep(1)
164
 
165
+ if run_status.status == "completed":
166
  messages = client.beta.threads.messages.list(thread_id=st.session_state.tech_thread_id)
167
+ for message in reversed(messages.data):
168
+ if message.role == "assistant":
169
+ assistant_reply = message.content[0].text.value
170
+ st.session_state.tech_messages.append({"role": "assistant", "content": assistant_reply})
171
  try:
172
+ json_data = json.loads(assistant_reply.strip("`json "))
173
+ st.session_state.tech_results = json_data
174
  except:
175
+ st.session_state.tech_results = []
176
  break
177
  else:
178
+ st.error(f"⚠️ Assistant failed: {run_status.status}")
179
  st.rerun()
180
+
181
  except Exception as e:
182
  st.error(f"❌ Error: {e}")
183
 
184
+ if st.session_state.tech_results:
185
+ disciplines = sorted(set(d.get("discipline", "") for d in st.session_state.tech_results))
186
+ selected_discipline = st.selectbox("🌍 Filter by discipline", ["All"] + disciplines)
187
  page_size = 8
188
  page_num = st.number_input("Page", min_value=1, step=1, value=1)
189
 
190
+ filtered_results = [r for r in st.session_state.tech_results if selected_discipline == "All" or r.get("discipline") == selected_discipline]
191
+ paged = filtered_results[(page_num - 1) * page_size : page_num * page_size]
192
 
193
  st.markdown("---")
194
  st.subheader("πŸ“‚ Drawing Results")
195
  cols = st.columns(4)
196
+
197
  for i, item in enumerate(paged):
198
  with cols[i % 4]:
199
  st.markdown(f"**{item['drawing_number']}**")
 
201
  st.caption(item.get("summary", ""))
202
  for url in item.get("images", [])[:1]:
203
  if st.button("πŸ–ΌοΈ View Image", key=f"view_{i}"):
204
+ st.session_state.tech_lightbox_url = url
205
+
206
+ if st.session_state.tech_lightbox_url:
207
+ col_a, col_b = st.columns([1, 2])
208
+ with col_b:
209
+ st.image(st.session_state.tech_lightbox_url, caption="πŸ” Enlarged Preview", use_container_width=True)
210
+ if st.button("❌ Close Viewer"):
211
+ st.session_state.tech_lightbox_url = None
212
+ st.rerun()
213
  else:
214
  for msg in st.session_state.tech_messages:
215
  with st.chat_message(msg["role"]):
216
+ st.markdown(msg["content"], unsafe_allow_html=True)