witspathologyv2 / app.py
IAMTFRMZA's picture
Update app.py
925961d verified
raw
history blame
6.41 kB
import streamlit as st
import os
import time
import re
import requests
from PIL import Image
from io import BytesIO
from openai import OpenAI
# ------------------ App Configuration ------------------
st.set_page_config(page_title="Document AI Assistant", layout="wide")
st.title("πŸ“„ Document AI Assistant")
st.caption("Chat with an AI Assistant on your medical/pathology or construction documents")
# ------------------ Load API Key and Assistant ID ------------------
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
if not OPENAI_API_KEY or not ASSISTANT_ID:
st.error("❌ Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in Hugging Face Space secrets.")
st.stop()
client = OpenAI(api_key=OPENAI_API_KEY)
# ------------------ Map document titles to GitHub folder names ------------------
DOCUMENT_FOLDER_MAP = {
"Tender 5 of 2024 - Forrestdale": "Schlager Group Pty Ltd - Tender 5 of 2024 - Forrestdale ~ Pavilion and External Works Part 1 of 2",
"Tender 5 of 2024 - Forrestdale Part 2": "Schlager Group Pty Ltd - Tender 5 of 2024 - Forrestdale ~ Pavilion and External Works Part 2 of 2",
"Armadale Forrestdale Methodology": "Armadale Forrestdale Methodology",
"2302-FORRESTDALE SPORTING PAVILION-TECHNICAL SPECIFICATION": "2302-FORRESTDALE SPORTING PAVILION-TECHNICAL SPECIFICATION",
"22.146.DS Electrical Specification IFC Rev_0": "22.146.DS Electrical Specification IFC Rev_0",
"Forrrestdale Community Hub Program 23-04-2025": "Forrrestdale Community Hub Program 23-04-2025",
"Landscape Specification - Forrestdale Community Hub_02": "Landscape Specification - Forrestdale Community Hub_02",
"Mechanical_Spec_-_Forrestdale Sporting Club Room IFC": "Mechanical_Spec_-_Forrestdale Sporting Club Room IFC",
"V-23-021-HY-SPEC-R0": "V-23-021-HY-SPEC-R0"
}
# ------------------ Session State Initialization ------------------
if "messages" not in st.session_state:
st.session_state.messages = []
if "thread_id" not in st.session_state:
st.session_state.thread_id = None
if "image_url" not in st.session_state:
st.session_state.image_url = None
if "image_updated" not in st.session_state:
st.session_state.image_updated = False
# ------------------ Sidebar Controls ------------------
st.sidebar.header("πŸ”§ Settings")
if st.sidebar.button("πŸ”„ Clear Chat"):
st.session_state.messages = []
st.session_state.thread_id = None
st.session_state.image_url = None
st.session_state.image_updated = False
st.rerun()
show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
# ------------------ Layout: Image + Chat ------------------
col1, col2 = st.columns([1, 2])
# ------------------ Left Panel: Image ------------------
with col1:
if show_image and st.session_state.image_url:
try:
response = requests.get(st.session_state.image_url)
response.raise_for_status()
img = Image.open(BytesIO(response.content))
st.image(img, caption="πŸ“‘ Extracted Page", use_container_width=True)
st.session_state.image_updated = False
except Exception as e:
st.warning(f"⚠️ Failed to load image from URL:\n{st.session_state.image_url}\n\nError: {e}")
# ------------------ Right Panel: Chat ------------------
with col2:
prompt = st.chat_input("Type your question about the document...")
# Display chat history
paired_messages = []
buffer = []
for msg in st.session_state.messages:
buffer.append(msg)
if msg["role"] == "assistant" and len(buffer) == 2:
paired_messages.append(buffer.copy())
buffer.clear()
if buffer:
paired_messages.append(buffer.copy())
for pair in reversed(paired_messages):
for msg in pair:
with st.chat_message(msg["role"]):
st.write(msg["content"])
if prompt:
st.session_state.messages.append({"role": "user", "content": prompt})
try:
# Create thread
if st.session_state.thread_id is None:
thread = client.beta.threads.create()
st.session_state.thread_id = thread.id
# Send user message
client.beta.threads.messages.create(
thread_id=st.session_state.thread_id,
role="user",
content=prompt
)
# Run assistant
run = client.beta.threads.runs.create(
thread_id=st.session_state.thread_id,
assistant_id=ASSISTANT_ID
)
with st.spinner("Assistant is thinking..."):
while True:
run_status = client.beta.threads.runs.retrieve(
thread_id=st.session_state.thread_id,
run_id=run.id
)
if run_status.status == "completed":
break
time.sleep(1)
# Retrieve assistant message
messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id)
assistant_message = None
for message in reversed(messages.data):
if message.role == "assistant":
assistant_message = message.content[0].text.value
break
st.session_state.messages.append({"role": "assistant", "content": assistant_message})
# βœ… Extract folder + page number from "Document Reference" line
match = re.search(r'Document Reference:\s+(.+?),\s+Page\s+(\d+)', assistant_message)
if match:
doc_name_raw = match.group(1).strip()
page_num = int(match.group(2))
page_str = f"{page_num:04d}"
# Map to actual GitHub folder name
folder_name = DOCUMENT_FOLDER_MAP.get(doc_name_raw, doc_name_raw)
corrected_url = (
f"https://raw.githubusercontent.com/AndrewLORTech/c2ozschlaegerforrestdale/main/"
f"{folder_name}/{folder_name}_page_{page_str}.png"
)
st.session_state.image_url = corrected_url
st.session_state.image_updated = True
st.rerun()
except Exception as e:
st.error(f"❌ Error: {str(e)}")