import streamlit as st import requests # ----------------- Streamlit Setup ----------------- # st.set_page_config(page_title="Lease Agreement Extractor (OCR + AI)", layout="centered") st.title("📄 Lease Agreement Extractor (OCR + AI)") # ----------------- Assistant & API Config ----------------- # OPENAI_ASSISTANT_ID = "asst_xBnNfiyWmVa4iF3CgXwJnmBt" # Replace with your assistant ID OPENAI_API_KEY = "YOUR_OPENAI_API_KEY" # Replace with your OpenAI key HEADERS = { "Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json" } # ----------------- Upload Interface ----------------- # uploaded_file = st.file_uploader("Upload a lease agreement (PDF, PNG, JPG)", type=["pdf", "png", "jpg", "jpeg"]) def upload_file_to_openai(file_bytes, filename): files = { "file": (filename, file_bytes, "application/octet-stream") } response = requests.post("https://api.openai.com/v1/files", headers={"Authorization": f"Bearer {OPENAI_API_KEY}"}, files=files) return response.json().get("id") def create_thread_and_run(file_id): # Create thread thread_res = requests.post("https://api.openai.com/v1/threads", headers=HEADERS, json={}) thread_data = thread_res.json() thread_id = thread_data.get("id") # Run assistant with structured lease extraction prompt run_payload = { "assistant_id": OPENAI_ASSISTANT_ID, "instructions": """ You are an AI assistant designed to extract structured lease data from any uploaded agreement, even if it is scanned. Your responsibilities are: - Accept PDF or image-based lease documents. - Use OCR (Optical Character Recognition) if the document is non-selectable or scanned. - Parse and extract key structured data fields from lease agreements. - Output results in JSON format following a fixed schema. OCR & Extraction Rules: 1. If text cannot be extracted normally, perform OCR using image-to-text on each page. 2. Preserve paragraph structure, detect headers, signature blocks, tables, and dates. Expected JSON Output: { "document_title": "Lease Agreement Title or File Name", "parties": { "lessor": "Name of Lessor", "lessee": "Name of Lessee" }, "property_description": "Detailed description of leased property", "term": { "start_date": "YYYY-MM-DD", "end_date": "YYYY-MM-DD", "renewal_options": "Yes/No or Clause Text" }, "financials": { "monthly_rent": "Amount", "deposit": "Amount", "escalation_clause": "Text if present" }, "obligations": { "maintenance": "Lessor/Lessee", "insurance": "Lessor/Lessee", "subletting": "Allowed/Not allowed" }, "signatures": [ { "party": "Party Name", "signed_by": "Full Name", "date": "YYYY-MM-DD", "position": "Title if listed" } ], "raw_text_per_page": { "page_1": "Full OCR text of page 1", "page_2": "Full OCR text of page 2" } } 📌 Behaviors: - Automatically detect if OCR is needed. - Normalize dates to YYYY-MM-DD. - Use "Not Found" or null where info is missing. - Return JSON only. - If OCR text is low-confidence, include: "low_confidence": true """, "file_ids": [file_id] } run_res = requests.post(f"https://api.openai.com/v1/threads/{thread_id}/runs", headers=HEADERS, json=run_payload) run_data = run_res.json() run_id = run_data.get("id") return thread_id, run_id # ----------------- Main UI Flow ----------------- # if uploaded_file is not None: st.success("📄 File uploaded successfully.") if st.button("🚀 Run Lease Extraction"): with st.spinner("Uploading and invoking assistant..."): file_id = upload_file_to_openai(uploaded_file.getvalue(), uploaded_file.name) if file_id: thread_id, run_id = create_thread_and_run(file_id) st.success("✅ Assistant run started!") st.code(f"Thread ID: {thread_id}", language="text") st.code(f"Run ID: {run_id}", language="text") st.markdown("📌 You can retrieve results via OpenAI API using these IDs.") else: st.error("❌ File upload to OpenAI failed.") else: st.info("Please upload a lease agreement file to begin.")