Spaces:
Sleeping
Sleeping
File size: 6,410 Bytes
8c4492e 13919c8 2943948 13919c8 b386f62 8c4492e 3e6c499 c1043ca 3e6c499 8c4492e c1043ca 8c4492e 971b3be 8c4492e c1043ca 8c4492e c1043ca 925961d 971b3be 925961d 971b3be 8c4492e c1043ca 8c01614 8c4492e c1043ca 8c4492e 3e6c499 f534be4 9c9251a 3e6c499 9c9251a 2943948 9c9251a 3e6c499 9c9251a 74c6fff 3e6c499 eeb4027 3e6c499 f534be4 74c6fff 9c9251a 3e6c499 9c9251a 925961d 53fcb59 74c6fff 53fcb59 925961d 53fcb59 74c6fff 53fcb59 6e52561 9c9251a 53fcb59 74c6fff 53fcb59 9c9251a 925961d 74c6fff 9c9251a 3bbf4ab 9c9251a bcaf273 925961d 971b3be 925961d 971b3be 925961d 971b3be 925961d 971b3be 925961d bcaf273 f534be4 3e6c499 57d0c38 8c4492e 9c9251a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import streamlit as st
import os
import time
import re
import requests
from PIL import Image
from io import BytesIO
from openai import OpenAI
# ------------------ App Configuration ------------------
st.set_page_config(page_title="Document AI Assistant", layout="wide")
st.title("π Document AI Assistant")
st.caption("Chat with an AI Assistant on your medical/pathology or construction documents")
# ------------------ Load API Key and Assistant ID ------------------
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
if not OPENAI_API_KEY or not ASSISTANT_ID:
st.error("β Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in Hugging Face Space secrets.")
st.stop()
client = OpenAI(api_key=OPENAI_API_KEY)
# ------------------ Map document titles to GitHub folder names ------------------
DOCUMENT_FOLDER_MAP = {
"Tender 5 of 2024 - Forrestdale": "Schlager Group Pty Ltd - Tender 5 of 2024 - Forrestdale ~ Pavilion and External Works Part 1 of 2",
"Tender 5 of 2024 - Forrestdale Part 2": "Schlager Group Pty Ltd - Tender 5 of 2024 - Forrestdale ~ Pavilion and External Works Part 2 of 2",
"Armadale Forrestdale Methodology": "Armadale Forrestdale Methodology",
"2302-FORRESTDALE SPORTING PAVILION-TECHNICAL SPECIFICATION": "2302-FORRESTDALE SPORTING PAVILION-TECHNICAL SPECIFICATION",
"22.146.DS Electrical Specification IFC Rev_0": "22.146.DS Electrical Specification IFC Rev_0",
"Forrrestdale Community Hub Program 23-04-2025": "Forrrestdale Community Hub Program 23-04-2025",
"Landscape Specification - Forrestdale Community Hub_02": "Landscape Specification - Forrestdale Community Hub_02",
"Mechanical_Spec_-_Forrestdale Sporting Club Room IFC": "Mechanical_Spec_-_Forrestdale Sporting Club Room IFC",
"V-23-021-HY-SPEC-R0": "V-23-021-HY-SPEC-R0"
}
# ------------------ Session State Initialization ------------------
if "messages" not in st.session_state:
st.session_state.messages = []
if "thread_id" not in st.session_state:
st.session_state.thread_id = None
if "image_url" not in st.session_state:
st.session_state.image_url = None
if "image_updated" not in st.session_state:
st.session_state.image_updated = False
# ------------------ Sidebar Controls ------------------
st.sidebar.header("π§ Settings")
if st.sidebar.button("π Clear Chat"):
st.session_state.messages = []
st.session_state.thread_id = None
st.session_state.image_url = None
st.session_state.image_updated = False
st.rerun()
show_image = st.sidebar.checkbox("π Show Document Image", value=True)
# ------------------ Layout: Image + Chat ------------------
col1, col2 = st.columns([1, 2])
# ------------------ Left Panel: Image ------------------
with col1:
if show_image and st.session_state.image_url:
try:
response = requests.get(st.session_state.image_url)
response.raise_for_status()
img = Image.open(BytesIO(response.content))
st.image(img, caption="π Extracted Page", use_container_width=True)
st.session_state.image_updated = False
except Exception as e:
st.warning(f"β οΈ Failed to load image from URL:\n{st.session_state.image_url}\n\nError: {e}")
# ------------------ Right Panel: Chat ------------------
with col2:
prompt = st.chat_input("Type your question about the document...")
# Display chat history
paired_messages = []
buffer = []
for msg in st.session_state.messages:
buffer.append(msg)
if msg["role"] == "assistant" and len(buffer) == 2:
paired_messages.append(buffer.copy())
buffer.clear()
if buffer:
paired_messages.append(buffer.copy())
for pair in reversed(paired_messages):
for msg in pair:
with st.chat_message(msg["role"]):
st.write(msg["content"])
if prompt:
st.session_state.messages.append({"role": "user", "content": prompt})
try:
# Create thread
if st.session_state.thread_id is None:
thread = client.beta.threads.create()
st.session_state.thread_id = thread.id
# Send user message
client.beta.threads.messages.create(
thread_id=st.session_state.thread_id,
role="user",
content=prompt
)
# Run assistant
run = client.beta.threads.runs.create(
thread_id=st.session_state.thread_id,
assistant_id=ASSISTANT_ID
)
with st.spinner("Assistant is thinking..."):
while True:
run_status = client.beta.threads.runs.retrieve(
thread_id=st.session_state.thread_id,
run_id=run.id
)
if run_status.status == "completed":
break
time.sleep(1)
# Retrieve assistant message
messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id)
assistant_message = None
for message in reversed(messages.data):
if message.role == "assistant":
assistant_message = message.content[0].text.value
break
st.session_state.messages.append({"role": "assistant", "content": assistant_message})
# β
Extract folder + page number from "Document Reference" line
match = re.search(r'Document Reference:\s+(.+?),\s+Page\s+(\d+)', assistant_message)
if match:
doc_name_raw = match.group(1).strip()
page_num = int(match.group(2))
page_str = f"{page_num:04d}"
# Map to actual GitHub folder name
folder_name = DOCUMENT_FOLDER_MAP.get(doc_name_raw, doc_name_raw)
corrected_url = (
f"https://raw.githubusercontent.com/AndrewLORTech/c2ozschlaegerforrestdale/main/"
f"{folder_name}/{folder_name}_page_{page_str}.png"
)
st.session_state.image_url = corrected_url
st.session_state.image_updated = True
st.rerun()
except Exception as e:
st.error(f"β Error: {str(e)}")
|