Tesneem commited on
Commit
13def26
·
verified ·
1 Parent(s): bb1806c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -74
app.py CHANGED
@@ -15,6 +15,7 @@ db_name = os.environ.get("MONGO_DB", "grant_docs")
15
  mongo_uri = f"mongodb+srv://{user}:{password}@{cluster}/{db_name}?retryWrites=true&w=majority&tls=true&tlsAllowInvalidCertificates=true"
16
  client = MongoClient(mongo_uri, tls=True, tlsAllowInvalidCertificates=True, serverSelectionTimeoutMS=20000)
17
  db = client[db_name]
 
18
  def gate_ui():
19
  APP_PASSWORD=st.secrets.get("APP_PASSWORD", os.getenv("APP_PASSWORD")).strip()
20
  if "authed" not in st.session_state:
@@ -35,81 +36,82 @@ def gate_ui():
35
  st.error("Incorrect password.")
36
  return False
37
  # === Streamlit UI ===
38
- if not gate_ui():
39
- return
40
- st.set_page_config(page_title="Doc Chunker", layout="wide")
41
- st.title("📄 Document Chunker & Uploader")
42
-
43
- with st.sidebar:
44
- st.header("Settings")
45
-
46
- # Fetch collection names for dropdown
47
- try:
48
- existing_categories = db["final_chunks"].distinct("collection_category") or []
49
- except Exception:
50
- existing_categories = []
51
- existing_categories=sorted([c for c in existing_categories if c])+["Create New Category"]
52
- selected_category = st.selectbox(
53
- "Choose Category (collection_category)",
54
- existing_categories,
55
- index=existing_categories.index("Create New Category") if "Create New Category" in existing_categories else 0
56
- )
57
- if selected_category == "Create New Category":
58
- selected_category = st.sidebar.text_input("Enter Category Name:")
59
- if not selected_category:
60
- st.warning("⚠️ Enter a category name to proceed.")
61
- st.stop()
62
-
63
- is_grant_app = st.toggle("Is this a Grant Application?", value=False)
64
-
65
- uploaded_file = st.file_uploader("Upload a DOCX, TXT, or PDF file", type=["docx", "txt", "pdf"])
66
-
67
- if uploaded_file:
68
- temp_path = Path(tempfile.gettempdir()) / uploaded_file.name
69
- with open(temp_path, "wb") as f:
70
- f.write(uploaded_file.getbuffer())
71
-
72
- st.success(f"Uploaded `{uploaded_file.name}`")
73
-
74
- modified_time = datetime.now().isoformat()
75
- collection = db['final_chunks']
76
- already = collection.find_one({
77
- "metadata.title": uploaded_file.name,
78
- "collection_category": selected_category
79
- })
80
-
81
- if already:
82
- st.warning(f"⚠️ `{uploaded_file.name}` already exists in category `{selected_category}`. Skipping…")
83
- else:
84
- st.write("⏳ Processing with DocumentChunker...")
85
- chunker = DocumentChunker()
86
- chunks = chunker.process_document(str(temp_path))
87
-
88
- if chunks:
89
- for chunk in chunks:
90
- chunk['collection_category']=selected_category
91
- chunk['metadata'].update({
92
- "title": uploaded_file.name,
93
- "uploaded_at": modified_time,
94
- "is_grant_app": is_grant_app,
95
- })
96
- collection.insert_one(chunk)
97
-
98
- st.success(f"✅ {len(chunks)} chunks inserted into `final_chunks` (category: `{selected_category}`)")
99
-
100
- # Show a few previews
101
- for i, c in enumerate(chunks[:3]):
102
- st.subheader(f"Chunk {i+1}: {c['metadata'].get('header') or 'No Header'}")
103
- st.markdown(c['text'][:400] + "...")
104
- st.caption(f"Topics: {', '.join(c['metadata']['topics'])} | Category: {c['metadata']['category']}")
105
- st.progress(c['metadata']['confidence_score'])
106
-
107
- if len(chunks) > 3:
108
- st.info(f"... and {len(chunks)-3} more chunks processed.")
109
-
110
  else:
111
- st.warning("⚠️ No chunks were generated.")
112
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  # try:
114
  # os.remove(temp_path)
115
  # except Exception as e:
 
15
  mongo_uri = f"mongodb+srv://{user}:{password}@{cluster}/{db_name}?retryWrites=true&w=majority&tls=true&tlsAllowInvalidCertificates=true"
16
  client = MongoClient(mongo_uri, tls=True, tlsAllowInvalidCertificates=True, serverSelectionTimeoutMS=20000)
17
  db = client[db_name]
18
+ st.set_page_config(page_title="Doc Chunker", layout="wide")
19
  def gate_ui():
20
  APP_PASSWORD=st.secrets.get("APP_PASSWORD", os.getenv("APP_PASSWORD")).strip()
21
  if "authed" not in st.session_state:
 
36
  st.error("Incorrect password.")
37
  return False
38
  # === Streamlit UI ===
39
+ def main():
40
+ if not gate_ui():
41
+ return
42
+ st.title("📄 Document Chunker & Uploader")
43
+
44
+ with st.sidebar:
45
+ st.header("Settings")
46
+
47
+ # Fetch collection names for dropdown
48
+ try:
49
+ existing_categories = db["final_chunks"].distinct("collection_category") or []
50
+ except Exception:
51
+ existing_categories = []
52
+ existing_categories=sorted([c for c in existing_categories if c])+["Create New Category"]
53
+ selected_category = st.selectbox(
54
+ "Choose Category (collection_category)",
55
+ existing_categories,
56
+ index=existing_categories.index("Create New Category") if "Create New Category" in existing_categories else 0
57
+ )
58
+ if selected_category == "Create New Category":
59
+ selected_category = st.sidebar.text_input("Enter Category Name:")
60
+ if not selected_category:
61
+ st.warning("⚠️ Enter a category name to proceed.")
62
+ st.stop()
63
+
64
+ is_grant_app = st.toggle("Is this a Grant Application?", value=False)
65
+
66
+ uploaded_file = st.file_uploader("Upload a DOCX, TXT, or PDF file", type=["docx", "txt", "pdf"])
67
+
68
+ if uploaded_file:
69
+ temp_path = Path(tempfile.gettempdir()) / uploaded_file.name
70
+ with open(temp_path, "wb") as f:
71
+ f.write(uploaded_file.getbuffer())
72
+
73
+ st.success(f"Uploaded `{uploaded_file.name}`")
74
+
75
+ modified_time = datetime.now().isoformat()
76
+ collection = db['final_chunks']
77
+ already = collection.find_one({
78
+ "metadata.title": uploaded_file.name,
79
+ "collection_category": selected_category
80
+ })
81
+
82
+ if already:
83
+ st.warning(f"⚠️ `{uploaded_file.name}` already exists in category `{selected_category}`. Skipping…")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  else:
85
+ st.write(" Processing with DocumentChunker...")
86
+ chunker = DocumentChunker()
87
+ chunks = chunker.process_document(str(temp_path))
88
+
89
+ if chunks:
90
+ for chunk in chunks:
91
+ chunk['collection_category']=selected_category
92
+ chunk['metadata'].update({
93
+ "title": uploaded_file.name,
94
+ "uploaded_at": modified_time,
95
+ "is_grant_app": is_grant_app,
96
+ })
97
+ collection.insert_one(chunk)
98
+
99
+ st.success(f"✅ {len(chunks)} chunks inserted into `final_chunks` (category: `{selected_category}`)")
100
+
101
+ # Show a few previews
102
+ for i, c in enumerate(chunks[:3]):
103
+ st.subheader(f"Chunk {i+1}: {c['metadata'].get('header') or 'No Header'}")
104
+ st.markdown(c['text'][:400] + "...")
105
+ st.caption(f"Topics: {', '.join(c['metadata']['topics'])} | Category: {c['metadata']['category']}")
106
+ st.progress(c['metadata']['confidence_score'])
107
+
108
+ if len(chunks) > 3:
109
+ st.info(f"... and {len(chunks)-3} more chunks processed.")
110
+
111
+ else:
112
+ st.warning("⚠️ No chunks were generated.")
113
+ if __name__ == "__main__":
114
+ main()
115
  # try:
116
  # os.remove(temp_path)
117
  # except Exception as e: