File size: 9,855 Bytes
3d9fd27
4a71975
3d9fd27
 
d09f114
 
 
 
 
9910527
4a71975
d09f114
c7d0024
d09f114
c7d0024
3d9fd27
d09f114
9910527
3d9fd27
c7d0024
3d9fd27
209a87c
3d9fd27
209a87c
c7d0024
 
d09f114
c7d0024
d09f114
c7d0024
 
 
 
d09f114
 
 
 
c7d0024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d09f114
 
 
 
 
 
 
 
 
 
 
 
c7d0024
d09f114
 
c7d0024
d09f114
 
c7d0024
d09f114
c7d0024
d09f114
 
 
c7d0024
d09f114
 
209a87c
c7d0024
d09f114
c7d0024
 
 
 
209a87c
c7d0024
d09f114
c7d0024
 
d09f114
c7d0024
 
 
 
d09f114
 
c7d0024
d09f114
c7d0024
d09f114
 
209a87c
d09f114
 
 
3d9fd27
c7d0024
 
 
 
 
 
 
 
 
 
 
d09f114
c7d0024
 
d09f114
 
 
 
c7d0024
 
 
 
d09f114
c7d0024
 
 
d09f114
 
3d9fd27
d09f114
 
 
 
3d9fd27
 
 
 
f1f3914
c7d0024
3d9fd27
 
c7d0024
3d9fd27
717ae43
c7d0024
d09f114
c7d0024
d09f114
 
 
c7d0024
d09f114
 
f1f3914
c7d0024
3d9fd27
c7d0024
 
 
 
3d9fd27
c7d0024
 
d09f114
c7d0024
3d9fd27
d09f114
c7d0024
d09f114
c7d0024
3d9fd27
d09f114
 
c7d0024
 
 
d09f114
 
 
c7d0024
 
d09f114
 
 
 
c7d0024
d09f114
 
 
 
 
 
 
c7d0024
 
 
 
 
 
 
 
 
d09f114
 
 
c7d0024
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import streamlit as st
import os
import time
import re
import requests
import json
from PIL import Image
from io import BytesIO
from urllib.parse import quote
from openai import OpenAI

# ------------------ App Configuration ------------------
st.set_page_config(page_title="Schlaeger Forrestdale TechDocAIA", layout="wide", initial_sidebar_state="collapsed")
st.title("πŸ“„ Schlaeger Forrestdale Document Assistant")
st.caption("Explore City of Armadale construction documents using AI + OCR 🧐")

# ------------------ Load API Key ------------------
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    st.error("❌ Missing OPENAI_API_KEY. Please set it in Hugging Face Space secrets.")
    st.stop()

client = OpenAI(api_key=OPENAI_API_KEY)

# ------------------ Tabs Setup ------------------
tabs = st.tabs(["πŸ“‘ Contract", "πŸ“ Technical"])

# ------------------ Contract Tab ------------------
with tabs[0]:
    from urllib.parse import quote

    ASSISTANT_ID_CONTRACT = "asst_KsQRedoJUnEeStzfox1o06lO"

    if "contract_messages" not in st.session_state:
        st.session_state.contract_messages = []
    if "contract_thread_id" not in st.session_state:
        st.session_state.contract_thread_id = None
    if "contract_image_url" not in st.session_state:
        st.session_state.contract_image_url = None
    if "contract_image_updated" not in st.session_state:
        st.session_state.contract_image_updated = False
    if "contract_pending_prompt" not in st.session_state:
        st.session_state.contract_pending_prompt = None

    col1, col2 = st.columns([2, 1])
    with col1:
        st.markdown("### 🧠 Ask a Document-Specific Question")
        user_prompt = st.chat_input("Example: What is the defects liability period?")

        if user_prompt:
            st.session_state.contract_messages.append({"role": "user", "content": user_prompt})
        elif st.session_state.contract_pending_prompt:
            st.session_state.contract_messages.append({"role": "user", "content": st.session_state.contract_pending_prompt})
            st.session_state.contract_pending_prompt = None

        if st.session_state.contract_messages and st.session_state.contract_messages[-1]["role"] == "user":
            try:
                if st.session_state.contract_thread_id is None:
                    thread = client.beta.threads.create()
                    st.session_state.contract_thread_id = thread.id

                client.beta.threads.messages.create(
                    thread_id=st.session_state.contract_thread_id,
                    role="user",
                    content=st.session_state.contract_messages[-1]["content"]
                )

                run = client.beta.threads.runs.create(
                    thread_id=st.session_state.contract_thread_id,
                    assistant_id=ASSISTANT_ID_CONTRACT
                )

                with st.spinner("πŸ€– Parsing and responding with referenced content..."):
                    while True:
                        run_status = client.beta.threads.runs.retrieve(
                            thread_id=st.session_state.contract_thread_id,
                            run_id=run.id
                        )
                        if run_status.status in ("completed", "failed", "cancelled"):
                            break
                        time.sleep(1)

                if run_status.status == "completed":
                    messages = client.beta.threads.messages.list(thread_id=st.session_state.contract_thread_id)
                    for message in reversed(messages.data):
                        if message.role == "assistant":
                            assistant_reply = message.content[0].text.value
                            st.session_state.contract_messages.append({"role": "assistant", "content": assistant_reply})

                            match = re.search(r'Document Reference:\s*(.*?),\s*Page\s*(\d+)', assistant_reply)
                            if match:
                                doc_name = match.group(1).strip()
                                page = int(match.group(2))
                                page_str = f"{page:04d}"
                                folder = quote(doc_name)
                                image_url = f"https://raw.githubusercontent.com/AndrewLORTech/c2ozschlaegerforrestdale/main/{folder}/{folder}_page_{page_str}.png"
                                st.session_state.contract_image_url = image_url
                                st.session_state.contract_image_updated = True
                            break
                else:
                    st.error(f"⚠️ Assistant failed: {run_status.status}")
                st.rerun()

            except Exception as e:
                st.error(f"❌ Error: {e}")

        for msg in st.session_state.contract_messages:
            with st.chat_message(msg["role"]):
                st.markdown(msg["content"], unsafe_allow_html=True)

    with col2:
        if st.session_state.contract_image_url:
            try:
                response = requests.get(st.session_state.contract_image_url)
                response.raise_for_status()
                img = Image.open(BytesIO(response.content))
                st.image(img, caption="πŸ“„ OCR Page Image", use_container_width=True)
            except Exception as e:
                st.error(f"❗ Failed to load image: {e}")

# ------------------ Technical Tab ------------------
with tabs[1]:
    ASSISTANT_ID_TECHNICAL = "asst_DjvuWBc7tCvMbAhY7n1em4BZ"

    if "tech_messages" not in st.session_state:
        st.session_state.tech_messages = []
    if "tech_thread_id" not in st.session_state:
        st.session_state.tech_thread_id = None
    if "tech_results" not in st.session_state:
        st.session_state.tech_results = []
    if "tech_lightbox_url" not in st.session_state:
        st.session_state.tech_lightbox_url = None

    user_prompt = st.chat_input("Ask about plans, drawings or components")
    if user_prompt:
        st.session_state.tech_messages.append({"role": "user", "content": user_prompt})

    if st.session_state.tech_messages and st.session_state.tech_messages[-1]["role"] == "user":
        try:
            if st.session_state.tech_thread_id is None:
                thread = client.beta.threads.create()
                st.session_state.tech_thread_id = thread.id

            client.beta.threads.messages.create(
                thread_id=st.session_state.tech_thread_id,
                role="user",
                content=st.session_state.tech_messages[-1]["content"]
            )

            run = client.beta.threads.runs.create(
                thread_id=st.session_state.tech_thread_id,
                assistant_id=ASSISTANT_ID_TECHNICAL
            )

            with st.spinner("πŸ€– Parsing and responding..."):
                while True:
                    run_status = client.beta.threads.runs.retrieve(
                        thread_id=st.session_state.tech_thread_id,
                        run_id=run.id
                    )
                    if run_status.status in ("completed", "failed", "cancelled"):
                        break
                    time.sleep(1)

            if run_status.status == "completed":
                messages = client.beta.threads.messages.list(thread_id=st.session_state.tech_thread_id)
                for message in reversed(messages.data):
                    if message.role == "assistant":
                        assistant_reply = message.content[0].text.value
                        st.session_state.tech_messages.append({"role": "assistant", "content": assistant_reply})
                        try:
                            json_data = json.loads(assistant_reply.strip("`json "))
                            st.session_state.tech_results = json_data
                        except:
                            st.session_state.tech_results = []
                        break
            else:
                st.error(f"⚠️ Assistant failed: {run_status.status}")
            st.rerun()

        except Exception as e:
            st.error(f"❌ Error: {e}")

    if st.session_state.tech_results:
        disciplines = sorted(set(d.get("discipline", "") for d in st.session_state.tech_results))
        selected_discipline = st.selectbox("🌍 Filter by discipline", ["All"] + disciplines)
        page_size = 8
        page_num = st.number_input("Page", min_value=1, step=1, value=1)

        filtered_results = [r for r in st.session_state.tech_results if selected_discipline == "All" or r.get("discipline") == selected_discipline]
        paged = filtered_results[(page_num - 1) * page_size : page_num * page_size]

        st.markdown("---")
        st.subheader("πŸ“‚ Drawing Results")
        cols = st.columns(4)

        for i, item in enumerate(paged):
            with cols[i % 4]:
                st.markdown(f"**{item['drawing_number']}**")
                st.markdown(f"_Discipline: {item['discipline']}_")
                st.caption(item.get("summary", ""))
                for url in item.get("images", [])[:1]:
                    if st.button("πŸ–ΌοΈ View Image", key=f"view_{i}"):
                        st.session_state.tech_lightbox_url = url

        if st.session_state.tech_lightbox_url:
            col_a, col_b = st.columns([1, 2])
            with col_b:
                st.image(st.session_state.tech_lightbox_url, caption="πŸ” Enlarged Preview", use_container_width=True)
                if st.button("❌ Close Viewer"):
                    st.session_state.tech_lightbox_url = None
                    st.rerun()
    else:
        for msg in st.session_state.tech_messages:
            with st.chat_message(msg["role"]):
                st.markdown(msg["content"], unsafe_allow_html=True)