import streamlit as st import os import time import re import requests from PIL import Image from io import BytesIO from openai import OpenAI # ------------------ App Configuration ------------------ st.set_page_config(page_title="Document AI Assistant", layout="wide") st.title("📄 Document AI Assistant") st.caption("Chat with an AI Assistant on your medical/pathology or construction documents") # ------------------ Load API Key and Assistant ID ------------------ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") ASSISTANT_ID = os.environ.get("ASSISTANT_ID") if not OPENAI_API_KEY or not ASSISTANT_ID: st.error("❌ Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in Hugging Face Space secrets.") st.stop() client = OpenAI(api_key=OPENAI_API_KEY) # ------------------ Map document titles to GitHub folder names ------------------ DOCUMENT_FOLDER_MAP = { "Tender 5 of 2024 - Forrestdale": "Schlager Group Pty Ltd - Tender 5 of 2024 - Forrestdale ~ Pavilion and External Works Part 1 of 2", "Tender 5 of 2024 - Forrestdale Part 2": "Schlager Group Pty Ltd - Tender 5 of 2024 - Forrestdale ~ Pavilion and External Works Part 2 of 2", "Armadale Forrestdale Methodology": "Armadale Forrestdale Methodology", "2302-FORRESTDALE SPORTING PAVILION-TECHNICAL SPECIFICATION": "2302-FORRESTDALE SPORTING PAVILION-TECHNICAL SPECIFICATION", "22.146.DS Electrical Specification IFC Rev_0": "22.146.DS Electrical Specification IFC Rev_0", "Forrrestdale Community Hub Program 23-04-2025": "Forrrestdale Community Hub Program 23-04-2025", "Landscape Specification - Forrestdale Community Hub_02": "Landscape Specification - Forrestdale Community Hub_02", "Mechanical_Spec_-_Forrestdale Sporting Club Room IFC": "Mechanical_Spec_-_Forrestdale Sporting Club Room IFC", "V-23-021-HY-SPEC-R0": "V-23-021-HY-SPEC-R0" } # ------------------ Session State Initialization ------------------ if "messages" not in st.session_state: st.session_state.messages = [] if "thread_id" not in st.session_state: st.session_state.thread_id = None if "image_url" not in st.session_state: st.session_state.image_url = None if "image_updated" not in st.session_state: st.session_state.image_updated = False # ------------------ Sidebar Controls ------------------ st.sidebar.header("🔧 Settings") if st.sidebar.button("🔄 Clear Chat"): st.session_state.messages = [] st.session_state.thread_id = None st.session_state.image_url = None st.session_state.image_updated = False st.rerun() show_image = st.sidebar.checkbox("📖 Show Document Image", value=True) # ------------------ Layout: Image + Chat ------------------ col1, col2 = st.columns([1, 2]) # ------------------ Left Panel: Image ------------------ with col1: if show_image and st.session_state.image_url: try: response = requests.get(st.session_state.image_url) response.raise_for_status() img = Image.open(BytesIO(response.content)) st.image(img, caption="📑 Extracted Page", use_container_width=True) st.session_state.image_updated = False except Exception as e: st.warning(f"⚠️ Failed to load image from URL:\n{st.session_state.image_url}\n\nError: {e}") # ------------------ Right Panel: Chat ------------------ with col2: prompt = st.chat_input("Type your question about the document...") # Display chat history paired_messages = [] buffer = [] for msg in st.session_state.messages: buffer.append(msg) if msg["role"] == "assistant" and len(buffer) == 2: paired_messages.append(buffer.copy()) buffer.clear() if buffer: paired_messages.append(buffer.copy()) for pair in reversed(paired_messages): for msg in pair: with st.chat_message(msg["role"]): st.write(msg["content"]) if prompt: st.session_state.messages.append({"role": "user", "content": prompt}) try: # Create thread if st.session_state.thread_id is None: thread = client.beta.threads.create() st.session_state.thread_id = thread.id # Send user message client.beta.threads.messages.create( thread_id=st.session_state.thread_id, role="user", content=prompt ) # Run assistant run = client.beta.threads.runs.create( thread_id=st.session_state.thread_id, assistant_id=ASSISTANT_ID ) with st.spinner("Assistant is thinking..."): while True: run_status = client.beta.threads.runs.retrieve( thread_id=st.session_state.thread_id, run_id=run.id ) if run_status.status == "completed": break time.sleep(1) # Retrieve assistant message messages = client.beta.threads.messages.list(thread_id=st.session_state.thread_id) assistant_message = None for message in reversed(messages.data): if message.role == "assistant": assistant_message = message.content[0].text.value break st.session_state.messages.append({"role": "assistant", "content": assistant_message}) # ✅ Extract folder + page number from "Document Reference" line match = re.search(r'Document Reference:\s+(.+?),\s+Page\s+(\d+)', assistant_message) if match: doc_name_raw = match.group(1).strip() page_num = int(match.group(2)) page_str = f"{page_num:04d}" # Map to actual GitHub folder name folder_name = DOCUMENT_FOLDER_MAP.get(doc_name_raw, doc_name_raw) corrected_url = ( f"https://raw.githubusercontent.com/AndrewLORTech/c2ozschlaegerforrestdale/main/" f"{folder_name}/{folder_name}_page_{page_str}.png" ) st.session_state.image_url = corrected_url st.session_state.image_updated = True st.rerun() except Exception as e: st.error(f"❌ Error: {str(e)}")