Spaces:
Sleeping
Sleeping
File size: 6,453 Bytes
9d10d92 13919c8 2943948 13919c8 b386f62 8c4492e d036356 af951b6 74246a4 c1043ca 3e6c499 8c4492e c1043ca 8c4492e 971b3be 8c4492e c1043ca 8c4492e c1043ca 8c4492e 212c196 e70ac04 c1043ca fb260a4 af951b6 8c4492e 212c196 e70ac04 8c4492e c1043ca af951b6 8c4492e fb260a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import streamlit as st
import os
import time
import re
import requests
from PIL import Image
from io import BytesIO
from openai import OpenAI
# ------------------ App Configuration ------------------
st.set_page_config(page_title="Schlaeger Forrestdale DocAIA", layout="wide")
st.title("π Schlaeger Forrestdale Document Assistant")
st.caption("Explore technical construction documents via AI with OCR references")
# ------------------ Load API Key and Assistant ID ------------------
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
if not OPENAI_API_KEY or not ASSISTANT_ID:
st.error("β Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in Hugging Face Space secrets.")
st.stop()
client = OpenAI(api_key=OPENAI_API_KEY)
# ------------------ Session State Initialization ------------------
if "messages" not in st.session_state:
st.session_state.messages = []
if "thread_id" not in st.session_state:
st.session_state.thread_id = None
if "image_url" not in st.session_state:
st.session_state.image_url = None
if "image_updated" not in st.session_state:
st.session_state.image_updated = False
if "search_history" not in st.session_state:
st.session_state.search_history = []
if "run_in_progress" not in st.session_state:
st.session_state.run_in_progress = False
# ------------------ Sidebar (Left Utility Panel) ------------------
st.sidebar.header("βΉοΈ Information")
if st.sidebar.button("π§Ή Clear Chat"):
st.session_state.messages = []
st.session_state.thread_id = None
st.session_state.image_url = None
st.session_state.image_updated = False
st.session_state.search_history = []
st.session_state.run_in_progress = False
st.rerun()
show_image = st.sidebar.toggle("π Show Page Image", value=True)
st.sidebar.markdown("### π Navigate or Query Document")
keyword = st.sidebar.text_input("Jump to clause keyword or term", placeholder="e.g. defects, payment, WHS")
if st.sidebar.button("π Search Keyword") and keyword:
st.session_state.search_history.append(keyword)
if not st.session_state.run_in_progress:
st.session_state.messages.append({"role": "user", "content": f"Find clauses or references related to: {keyword}"})
section_options = [
"Select a section...",
"1. Formal Instrument of Contract",
"2. Offer and Acceptance",
"3. Key Personnel",
"4. Contract Pricing",
"5. Contract Specifications",
"6. WHS Policies",
"7. Penalties and Delays",
"8. Dispute Resolution",
"9. Principal Obligations"
]
selected_section = st.sidebar.selectbox("π Browse Sections", section_options)
if selected_section and selected_section != section_options[0]:
st.session_state.messages.append({"role": "user", "content": f"Summarize or list key points from section: {selected_section}"})
actions = [
"Select an action...",
"List all contractual obligations",
"Summarize payment terms",
"List WHS responsibilities",
"Find delay-related penalties",
"Extract dispute resolution steps"
]
selected_action = st.sidebar.selectbox("βοΈ Common Actions", actions)
if selected_action and selected_action != actions[0]:
st.session_state.messages.append({"role": "user", "content": selected_action})
if st.session_state.search_history:
st.sidebar.markdown("---")
st.sidebar.markdown("#### π Recent Searches")
for term in reversed(st.session_state.search_history[-5:]):
st.sidebar.markdown(f"- {term}")
if show_image and st.session_state.image_url:
st.sidebar.markdown("---")
st.sidebar.markdown("#### πΌοΈ Page Image (Toggle)")
try:
response = requests.get(st.session_state.image_url)
response.raise_for_status()
img = Image.open(BytesIO(response.content))
st.sidebar.image(img, caption="OCR Page", use_container_width=True)
except Exception as e:
st.sidebar.error(f"β Failed to load image: {e}")
# ------------------ Center Panel: Chat ------------------
st.markdown("### π§ Ask a Document-Specific Question")
prompt = st.chat_input("Example: What is the defects liability period?")
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"], unsafe_allow_html=True)
if prompt and not st.session_state.run_in_progress:
st.session_state.messages.append({"role": "user", "content": prompt})
st.session_state.run_in_progress = True
try:
if st.session_state.thread_id is None:
thread = client.beta.threads.create()
st.session_state.thread_id = thread.id
client.beta.threads.messages.create(thread_id=st.session_state.thread_id, role="user", content=prompt)
run = client.beta.threads.runs.create(thread_id=st.session_state.thread_id, assistant_id=ASSISTANT_ID)
with st.spinner("π€ Parsing and responding with referenced content..."):
while True:
run_status = client.beta.threads.runs.retrieve(thread_id=st.session_state.thread_id, run_id=run.id)
if run_status.status in ("completed", "failed", "cancelled"):
break
time.sleep(1)
if run_status.status != "completed":
st.error(f"β οΈ Assistant failed: {run_status.status}")
else:
messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id)
for message in reversed(messages.data):
if message.role == "assistant":
assistant_message = message.content[0].text.value
st.session_state.messages.append({"role": "assistant", "content": assistant_message})
break
match = re.search(r'Document Reference:\s+(.+?),\s+Page\s+(\d+)', assistant_message)
if match:
doc_name = match.group(1).strip()
page = int(match.group(2))
page_str = f"{page:04d}"
image_url = f"https://raw.githubusercontent.com/AndrewLORTech/c2ozschlaegerforrestdale/main/{doc_name}/{doc_name}_page_{page_str}.png"
st.session_state.image_url = image_url
st.session_state.image_updated = True
st.session_state.run_in_progress = False
st.rerun()
except Exception as e:
st.session_state.run_in_progress = False
st.error(f"β Error: {e}") |