awacke1 commited on
Commit
15c1377
Β·
verified Β·
1 Parent(s): a285999

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +745 -0
app.py ADDED
@@ -0,0 +1,745 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # ───────────── IMPORTS ─────────────
3
+ # =============================================================================
4
+ import base64
5
+ import glob
6
+ import hashlib
7
+ import json
8
+ import os
9
+ import pandas as pd
10
+ import pytz
11
+ import random
12
+ import re
13
+ import shutil
14
+ import streamlit as st
15
+ import time
16
+ import traceback
17
+ import uuid
18
+ import zipfile
19
+ from PIL import Image
20
+ from azure.cosmos import CosmosClient, PartitionKey, exceptions
21
+ from datetime import datetime
22
+ from git import Repo
23
+ from github import Github
24
+ from gradio_client import Client
25
+ import tempfile
26
+ import io
27
+ import requests
28
+ import numpy as np
29
+ from urllib.parse import quote
30
+ import PyPDF2 # For PDF text extraction
31
+
32
+ # =============================================================================
33
+ # ───────────── EXTERNAL HELP LINKS ─────────────
34
+ # =============================================================================
35
+ external_links = [
36
+ {"title": "CosmosDB GenAI Full Text Search", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/gen-ai/full-text-search", "emoji": "πŸ’»"},
37
+ {"title": "CosmosDB SQL API Client Library", "url": "https://learn.microsoft.com/en-us/python/api/overview/azure/cosmos-readme?view=azure-python", "emoji": "πŸ’»"},
38
+ {"title": "CosmosDB Index and Query Vectors", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/how-to-python-vector-index-query", "emoji": "πŸ’»"},
39
+ {"title": "CosmosDB NoSQL Materialized Views", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/materialized-views", "emoji": "πŸ’»"},
40
+ {"title": "LangChain Vector Store Guide", "url": "https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/", "emoji": "πŸ’»"},
41
+ {"title": "Vector Database Prompt Engineering RAG for Python", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database?source=recommendations", "emoji": "πŸ’»"},
42
+ {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "πŸ’»"},
43
+ {"title": "MergeKit Sample Usage", "url": "https://github.com/arcee-ai/MergeKit#examples", "emoji": "πŸ“š"},
44
+ {"title": "DistillKit Official GitHub", "url": "https://github.com/arcee-ai/DistillKit", "emoji": "πŸ’»"},
45
+ {"title": "DistillKit Sample Usage", "url": "https://github.com/arcee-ai/DistillKit#usage", "emoji": "πŸ“š"},
46
+ {"title": "arcee.ai Official Website", "url": "https://arcee.ai", "emoji": "🌐"},
47
+ ]
48
+
49
+ # =============================================================================
50
+ # ───────────── APP CONFIGURATION ─────────────
51
+ # =============================================================================
52
+ Site_Name = 'πŸ™ GitCosmos'
53
+ title = "πŸ™ GitCosmos"
54
+ helpURL = 'https://huggingface.co/awacke1'
55
+ bugURL = 'https://huggingface.co/spaces/awacke1/AzureCosmosDBUI/'
56
+ icons = 'πŸ™πŸŒŒπŸ’«'
57
+ st.set_page_config(
58
+ page_title=title,
59
+ page_icon=icons,
60
+ layout="wide",
61
+ initial_sidebar_state="auto",
62
+ menu_items={
63
+ 'Get Help': helpURL,
64
+ 'Report a bug': bugURL,
65
+ 'About': title
66
+ }
67
+ )
68
+
69
+ ENDPOINT = "https://acae-afd.documents.azure.com:443/"
70
+ DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME")
71
+ CONTAINER_NAME = os.environ.get("COSMOS_CONTAINER_NAME")
72
+ Key = os.environ.get("Key")
73
+ CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
74
+
75
+ # =============================================================================
76
+ # ───────────── HELPER FUNCTIONS ─────────────
77
+ # =============================================================================
78
+ def get_download_link(file_path):
79
+ with open(file_path, "rb") as file:
80
+ contents = file.read()
81
+ b64 = base64.b64encode(contents).decode()
82
+ file_name = os.path.basename(file_path)
83
+ return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} πŸ“‚</a>'
84
+
85
+ def generate_unique_id():
86
+ timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
87
+ unique_uuid = str(uuid.uuid4())
88
+ return f"{timestamp}-{unique_uuid}"
89
+
90
+ def generate_filename(prompt, file_type):
91
+ central = pytz.timezone('US/Central')
92
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
93
+ safe_prompt = re.sub(r'\W+', '', prompt)[:90]
94
+ return f"{safe_date_time}{safe_prompt}.{file_type}"
95
+
96
+ def create_file(filename, prompt, response, should_save=True):
97
+ if should_save:
98
+ with open(filename, 'w', encoding='utf-8') as file:
99
+ file.write(prompt + "\n\n" + response)
100
+
101
+ def load_file(file_name):
102
+ with open(file_name, "r", encoding='utf-8') as file:
103
+ return file.read()
104
+
105
+ def create_zip_of_files(files):
106
+ zip_name = "all_files.zip"
107
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
108
+ for file in files:
109
+ zipf.write(file)
110
+ return zip_name
111
+
112
+ def preprocess_text(text):
113
+ text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
114
+ text = text.replace('"', '\\"')
115
+ text = re.sub(r'[\t]', ' ', text)
116
+ text = re.sub(r'[^\x00-\x7F]+', '', text)
117
+ return text.strip()
118
+
119
+ def sanitize_json_text(text):
120
+ text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', text)
121
+ return text.strip()
122
+
123
+ def extract_pdf_text(pdf_file):
124
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
125
+ text = ""
126
+ for page in pdf_reader.pages:
127
+ text += page.extract_text() or ""
128
+ return text
129
+
130
+ # =============================================================================
131
+ # ───────────── COSMOS DB FUNCTIONS ─────────────
132
+ # =============================================================================
133
+ def get_databases(client):
134
+ return [db['id'] for db in client.list_databases()]
135
+
136
+ def get_containers(database):
137
+ return [container['id'] for container in database.list_containers()]
138
+
139
+ def get_documents(container, limit=None):
140
+ query = "SELECT * FROM c ORDER BY c._ts DESC"
141
+ items = list(container.query_items(query=query, enable_cross_partition_query=True, max_item_count=limit))
142
+ return items
143
+
144
+ def insert_record(container, record):
145
+ try:
146
+ container.create_item(body=record)
147
+ return True, "Inserted! πŸŽ‰"
148
+ except exceptions.CosmosHttpResponseError as e:
149
+ return False, f"HTTP error: {str(e)} 🚨"
150
+ except Exception as e:
151
+ return False, f"Error: {str(e)} 😱"
152
+
153
+ def update_record(container, updated_record):
154
+ try:
155
+ container.upsert_item(body=updated_record)
156
+ return True, f"Updated {updated_record['id']} πŸ› οΈ"
157
+ except exceptions.CosmosHttpResponseError as e:
158
+ return False, f"HTTP error: {str(e)} 🚨"
159
+ except Exception as e:
160
+ return False, f"Error: {str(e)} 😱"
161
+
162
+ def delete_record(container, record):
163
+ try:
164
+ doc_id = record["id"]
165
+ partition_key_value = record.get("pk", doc_id)
166
+ st.write(f"Deleting {doc_id} with partition key {partition_key_value}")
167
+ container.delete_item(item=doc_id, partition_key=partition_key_value)
168
+ return True, f"Record {doc_id} deleted. πŸ—‘οΈ"
169
+ except exceptions.CosmosResourceNotFoundError:
170
+ return True, f"Record {doc_id} not found (already deleted). πŸ—‘οΈ"
171
+ except exceptions.CosmosHttpResponseError as e:
172
+ return False, f"HTTP error deleting {doc_id}: {str(e)} 🚨"
173
+ except Exception as e:
174
+ return False, f"Unexpected error deleting {doc_id}: {str(e)} 😱"
175
+
176
+ def save_to_cosmos_db(container, query, response1, response2):
177
+ try:
178
+ timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
179
+ unique_uuid = str(uuid.uuid4())
180
+ new_id = f"{timestamp}-{unique_uuid}"
181
+ record = {
182
+ "id": new_id,
183
+ "pk": new_id,
184
+ "name": new_id,
185
+ "query": query,
186
+ "response1": response1,
187
+ "response2": response2,
188
+ "timestamp": datetime.utcnow().isoformat(),
189
+ "type": "ai_response",
190
+ "version": "1.0"
191
+ }
192
+ container.create_item(body=record)
193
+ st.success(f"Saved: {record['id']}")
194
+ except Exception as e:
195
+ st.error(f"Save error: {str(e)}")
196
+
197
+ def archive_current_container(database_name, container_name, client):
198
+ try:
199
+ base_dir = "./cosmos_archive"
200
+ if os.path.exists(base_dir):
201
+ shutil.rmtree(base_dir)
202
+ os.makedirs(base_dir)
203
+ db_client = client.get_database_client(database_name)
204
+ container_client = db_client.get_container_client(container_name)
205
+ items = list(container_client.read_all_items())
206
+ container_dir = os.path.join(base_dir, container_name)
207
+ os.makedirs(container_dir)
208
+ for item in items:
209
+ item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}")
210
+ with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f:
211
+ json.dump(item, f, indent=2)
212
+ archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}"
213
+ shutil.make_archive(archive_name, 'zip', base_dir)
214
+ return get_download_link(f"{archive_name}.zip")
215
+ except Exception as e:
216
+ return f"Archive error: {str(e)} 😒"
217
+
218
+ def create_new_container(database, container_id, partition_key_path):
219
+ try:
220
+ container = database.create_container(
221
+ id=container_id,
222
+ partition_key=PartitionKey(path=partition_key_path)
223
+ )
224
+ return container
225
+ except exceptions.CosmosResourceExistsError:
226
+ return database.get_container_client(container_id)
227
+ except exceptions.CosmosHttpResponseError as e:
228
+ st.error(f"Error creating container: {str(e)}")
229
+ return None
230
+
231
+ # =============================================================================
232
+ # ───────────── GITHUB FUNCTIONS ─────────────
233
+ # =============================================================================
234
+ def download_github_repo(url, local_path):
235
+ if os.path.exists(local_path):
236
+ shutil.rmtree(local_path)
237
+ Repo.clone_from(url, local_path)
238
+
239
+ def create_zip_file(source_dir, output_filename):
240
+ shutil.make_archive(output_filename, 'zip', source_dir)
241
+
242
+ def create_repo(g, repo_name):
243
+ user = g.get_user()
244
+ return user.create_repo(repo_name)
245
+
246
+ def push_to_github(local_path, repo, github_token):
247
+ repo_url = f"https://{github_token}@github.com/{repo.full_name}.git"
248
+ local_repo = Repo(local_path)
249
+ if 'origin' in [remote.name for remote in local_repo.remotes]:
250
+ origin = local_repo.remote('origin')
251
+ origin.set_url(repo_url)
252
+ else:
253
+ origin = local_repo.create_remote('origin', repo_url)
254
+ if not local_repo.heads:
255
+ local_repo.git.checkout('-b', 'main')
256
+ current_branch = 'main'
257
+ else:
258
+ current_branch = local_repo.active_branch.name
259
+ local_repo.git.add(A=True)
260
+ if local_repo.is_dirty():
261
+ local_repo.git.commit('-m', 'Initial commit')
262
+ origin.push(refspec=f'{current_branch}:{current_branch}')
263
+
264
+ # =============================================================================
265
+ # ───────────── FILE & MEDIA MANAGEMENT FUNCTIONS ─────────────
266
+ # =============================================================================
267
+ def display_file_viewer(file_path):
268
+ content = load_file(file_path)
269
+ if content:
270
+ st.markdown("### πŸ“„ File Viewer")
271
+ st.markdown(f"**{file_path}**")
272
+ file_stats = os.stat(file_path)
273
+ st.markdown(f"**Mod:** {datetime.fromtimestamp(file_stats.st_mtime).strftime('%Y-%m-%d %H:%M:%S')} | **Size:** {file_stats.st_size} bytes")
274
+ st.markdown("---")
275
+ st.markdown(content)
276
+ st.download_button("⬇️", data=content, file_name=os.path.basename(file_path), mime="text/markdown")
277
+
278
+ def display_file_editor(file_path):
279
+ if 'file_content' not in st.session_state:
280
+ st.session_state.file_content = {}
281
+ if file_path not in st.session_state.file_content:
282
+ content = load_file(file_path)
283
+ if content:
284
+ st.session_state.file_content[file_path] = content
285
+ st.markdown("### ✏️ Edit File")
286
+ st.markdown(f"**Editing:** {file_path}")
287
+ new_content = st.text_area("Edit:", value=st.session_state.file_content.get(file_path, ""), height=400, key=f"editor_{hash(file_path)}")
288
+ col1, col2 = st.columns([1, 5])
289
+ with col1:
290
+ if st.button("πŸ’Ύ Save"):
291
+ sanitized = sanitize_json_text(new_content)
292
+ try:
293
+ json.loads(sanitized)
294
+ with open(file_path, 'w', encoding='utf-8') as file:
295
+ file.write(sanitized)
296
+ st.session_state.file_content[file_path] = sanitized
297
+ st.success("Saved! πŸŽ‰")
298
+ time.sleep(1)
299
+ st.rerun()
300
+ except Exception as e:
301
+ st.error(f"Save error: {str(e)}")
302
+ with col2:
303
+ st.download_button("⬇️", data=new_content, file_name=os.path.basename(file_path), mime="text/markdown")
304
+
305
+ def update_file_management_section():
306
+ if 'file_view_mode' not in st.session_state:
307
+ st.session_state.file_view_mode = None
308
+ if 'current_file' not in st.session_state:
309
+ st.session_state.current_file = None
310
+ all_files = sorted(glob.glob("*.md"), reverse=True)
311
+ st.sidebar.subheader("πŸ“ Files")
312
+ if st.sidebar.button("πŸ—‘ Delete All"):
313
+ for file in all_files:
314
+ os.remove(file)
315
+ st.session_state.file_content = {}
316
+ st.session_state.current_file = None
317
+ st.session_state.file_view_mode = None
318
+ st.rerun()
319
+ if st.sidebar.button("⬇️ Download All"):
320
+ zip_file = create_zip_of_files(all_files)
321
+ st.sidebar.markdown(get_download_link(zip_file), unsafe_allow_html=True)
322
+ for file in all_files:
323
+ col1, col2, col3, col4 = st.sidebar.columns([1, 3, 1, 1])
324
+ with col1:
325
+ if st.button("🌐", key=f"view_{file}"):
326
+ st.session_state.current_file = file
327
+ st.session_state.file_view_mode = 'view'
328
+ st.rerun()
329
+ with col2:
330
+ st.markdown(get_download_link(file), unsafe_allow_html=True)
331
+ with col3:
332
+ if st.button("πŸ“‚", key=f"edit_{file}"):
333
+ st.session_state.current_file = file
334
+ st.session_state.file_view_mode = 'edit'
335
+ st.rerun()
336
+ with col4:
337
+ if st.button("πŸ—‘", key=f"delete_{file}"):
338
+ os.remove(file)
339
+ if file in st.session_state.file_content:
340
+ del st.session_state.file_content[file]
341
+ if st.session_state.current_file == file:
342
+ st.session_state.current_file = None
343
+ st.session_state.file_view_mode = None
344
+ st.rerun()
345
+ st.sidebar.subheader("External Help Links")
346
+ for link in external_links:
347
+ st.sidebar.markdown(f"{link['emoji']} [{link['title']}]({link['url']})", unsafe_allow_html=True)
348
+ if st.session_state.current_file:
349
+ if st.session_state.file_view_mode == 'view':
350
+ display_file_viewer(st.session_state.current_file)
351
+ elif st.session_state.file_view_mode == 'edit':
352
+ display_file_editor(st.session_state.current_file)
353
+
354
+ # =============================================================================
355
+ # ───────────── UI FUNCTIONS ─────────────
356
+ # =============================================================================
357
+ def edit_all_documents(container, search_keyword=None):
358
+ st.markdown("### πŸ“‘ All Documents" + (f" (Filtered: '{search_keyword}')" if search_keyword else ""))
359
+ documents = get_documents(container)
360
+ if search_keyword:
361
+ documents = [doc for doc in documents if vector_keyword_search(search_keyword, doc)]
362
+ if not documents:
363
+ st.info("No documents match the current filter." if search_keyword else "No documents in this container.")
364
+ return
365
+
366
+ if 'saved_docs' not in st.session_state:
367
+ st.session_state.saved_docs = {}
368
+
369
+ for doc in documents:
370
+ ts = doc.get("_ts", 0)
371
+ dt = datetime.fromtimestamp(ts) if ts else datetime.now()
372
+ formatted_ts = dt.strftime("%I:%M %p %m/%d/%Y")
373
+ header = f"{doc.get('name', 'Unnamed')} - {formatted_ts}"
374
+ with st.expander(header):
375
+ col_json, col_pdf = st.columns([2, 1])
376
+ with col_json:
377
+ doc_key = f"editor_{doc['id']}"
378
+ initial_value = st.session_state.saved_docs.get(doc['id'], json.dumps(doc, indent=2))
379
+ edited_content = st.text_area("Edit JSON", value=initial_value, height=300, key=doc_key)
380
+ with col_pdf:
381
+ if 'pdf_data' in doc:
382
+ st.markdown("### πŸ“œ PDF Preview")
383
+ pdf_bytes = base64.b64decode(doc['pdf_data'])
384
+ st.download_button(
385
+ label="⬇️ Download PDF",
386
+ data=pdf_bytes,
387
+ file_name=f"{doc.get('name', 'document')}.pdf",
388
+ mime="application/pdf"
389
+ )
390
+
391
+ col_save, col_delete = st.columns(2)
392
+ with col_save:
393
+ if st.button("πŸ’Ύ Save", key=f"save_{doc['id']}"):
394
+ try:
395
+ cleaned_content = sanitize_json_text(edited_content)
396
+ updated_doc = json.loads(cleaned_content)
397
+ updated_doc['id'] = doc['id']
398
+ updated_doc['pk'] = doc.get('pk', doc['id'])
399
+ for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
400
+ updated_doc.pop(field, None)
401
+ success, message = update_record(container, updated_doc)
402
+ if success:
403
+ st.success(f"Saved {doc['id']}")
404
+ st.session_state.saved_docs[doc['id']] = json.dumps(updated_doc, indent=2)
405
+ st.rerun()
406
+ else:
407
+ st.error(message)
408
+ except json.JSONDecodeError as e:
409
+ st.error(f"Invalid JSON format: {str(e)}\nProblematic input:\n{cleaned_content}")
410
+ except Exception as e:
411
+ st.error(f"Save error: {str(e)}")
412
+ with col_delete:
413
+ if st.button("πŸ—‘οΈ Delete", key=f"delete_{doc['id']}"):
414
+ success, message = delete_record(container, doc)
415
+ if success:
416
+ st.success(message)
417
+ if doc['id'] in st.session_state.saved_docs:
418
+ del st.session_state.saved_docs[doc['id']]
419
+ st.rerun()
420
+ else:
421
+ st.error(message)
422
+
423
+ def new_item_default(container):
424
+ new_id = generate_unique_id()
425
+ default_doc = {
426
+ "id": new_id,
427
+ "pk": new_id,
428
+ "name": "New Document",
429
+ "content": "Start editing here...",
430
+ "timestamp": datetime.now().isoformat(),
431
+ "type": "sample"
432
+ }
433
+ success, message = insert_record(container, default_doc)
434
+ if success:
435
+ st.success("New document created! ✨")
436
+ st.rerun()
437
+ else:
438
+ st.error(f"Error creating new item: {message}")
439
+
440
+ def new_item_from_pdf(container, pdf_file):
441
+ new_id = generate_unique_id()
442
+ pdf_bytes = pdf_file.read()
443
+ pdf_text = extract_pdf_text(io.BytesIO(pdf_bytes))
444
+ pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
445
+ default_doc = {
446
+ "id": new_id,
447
+ "pk": new_id,
448
+ "name": pdf_file.name,
449
+ "content": pdf_text[:1000], # Truncate for brevity, full text in pdf_data
450
+ "timestamp": datetime.now().isoformat(),
451
+ "type": "pdf_document",
452
+ "pdf_data": pdf_base64
453
+ }
454
+ success, message = insert_record(container, default_doc)
455
+ if success:
456
+ st.success(f"PDF document '{pdf_file.name}' created! ✨")
457
+ st.rerun()
458
+ else:
459
+ st.error(f"Error creating PDF item: {message}")
460
+
461
+ def add_field_to_doc():
462
+ key = st.session_state.new_field_key
463
+ value = st.session_state.new_field_value
464
+ try:
465
+ doc = json.loads(st.session_state.doc_editor)
466
+ doc[key] = value
467
+ st.session_state.doc_editor = json.dumps(doc, indent=2)
468
+ container = st.session_state.current_container
469
+ success, message = update_record(container, doc)
470
+ if success:
471
+ st.success(f"Added field {key} πŸ‘")
472
+ else:
473
+ st.error(message)
474
+ except Exception as e:
475
+ st.error(f"Error adding field: {str(e)}")
476
+
477
+ def new_ai_record(container):
478
+ new_id = generate_unique_id()
479
+ default_doc = {
480
+ "id": new_id,
481
+ "pk": new_id,
482
+ "name": "AI Modality Record",
483
+ "function_url": "https://example.com/function",
484
+ "input_text": "### Input (markdown)\n\nType your input here.",
485
+ "output_text": "### Output (markdown)\n\nResult will appear here.",
486
+ "timestamp": datetime.now().isoformat(),
487
+ "type": "ai_modality"
488
+ }
489
+ success, message = insert_record(container, default_doc)
490
+ if success:
491
+ st.success("New AI modality record created! πŸ’‘")
492
+ st.rerun()
493
+ else:
494
+ st.error(f"Error creating AI record: {message}")
495
+
496
+ def new_links_record(container):
497
+ new_id = generate_unique_id()
498
+ links_md = "\n".join([f"- {link['emoji']} [{link['title']}]({link['url']})" for link in external_links])
499
+ default_doc = {
500
+ "id": new_id,
501
+ "pk": new_id,
502
+ "name": "Portal Links Record",
503
+ "function_url": "",
504
+ "input_text": links_md,
505
+ "output_text": "",
506
+ "timestamp": datetime.now().isoformat(),
507
+ "type": "ai_modality"
508
+ }
509
+ success, message = insert_record(container, default_doc)
510
+ if success:
511
+ st.success("New Portal Links record created! πŸ”—")
512
+ st.rerun()
513
+ else:
514
+ st.error(f"Error creating links record: {message}")
515
+
516
+ def vector_keyword_search(keyword, doc):
517
+ keyword = keyword.lower()
518
+ for key, value in doc.items():
519
+ if isinstance(value, str) and keyword in value.lower():
520
+ return True
521
+ return False
522
+
523
+ def search_documents_ui(container):
524
+ st.sidebar.subheader("πŸ” Vector Search")
525
+ with st.sidebar.form("search_form"):
526
+ keyword = st.text_input("Search Documents", key="search_keyword")
527
+ col1, col2 = st.columns(2)
528
+ with col1:
529
+ search_submitted = st.form_submit_button("πŸ” Search")
530
+ with col2:
531
+ clear_submitted = st.form_submit_button("πŸ—‘οΈ Clear")
532
+ if search_submitted and keyword:
533
+ st.session_state.active_search = keyword
534
+ st.rerun()
535
+ if clear_submitted:
536
+ if 'active_search' in st.session_state:
537
+ del st.session_state.active_search
538
+ st.rerun()
539
+
540
+ uploaded_file = st.sidebar.file_uploader("Upload Document (PDF)", type=["pdf"])
541
+ if uploaded_file and container:
542
+ new_item_from_pdf(container, uploaded_file)
543
+
544
+ def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
545
+ try:
546
+ if isinstance(file_data, bytes):
547
+ img = Image.open(io.BytesIO(file_data))
548
+ elif hasattr(file_data, 'read'):
549
+ if hasattr(file_data, 'seek'):
550
+ file_data.seek(0)
551
+ img = Image.open(file_data)
552
+ else:
553
+ raise ValueError(f"Unsupported input: {type(file_data)}")
554
+ if img.mode != 'RGB':
555
+ img = img.convert('RGB')
556
+ aspect_ratio = img.size[0] / img.size[1]
557
+ if aspect_ratio > target_size[0] / target_size[1]:
558
+ new_width = target_size[0]
559
+ new_height = int(new_width / aspect_ratio)
560
+ else:
561
+ new_height = target_size[1]
562
+ new_width = int(new_height * aspect_ratio)
563
+ new_width = (new_width // 2) * 2
564
+ new_height = (new_height // 2) * 2
565
+ resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
566
+ final_img = Image.new('RGB', target_size, (255, 255, 255))
567
+ paste_x = (target_size[0] - new_width) // 2
568
+ paste_y = (target_size[1] - new_height) // 2
569
+ final_img.paste(resized_img, (paste_x, paste_y))
570
+ return final_img
571
+ except Exception as e:
572
+ st.error(f"Image error: {str(e)}")
573
+ return None
574
+
575
+ def add_video_generation_ui(container):
576
+ st.markdown("### πŸŽ₯ Video Generation")
577
+ col1, col2 = st.columns([2, 1])
578
+ with col1:
579
+ uploaded_file = st.file_uploader("Upload Image πŸ–ΌοΈ", type=['png', 'jpg', 'jpeg'])
580
+ with col2:
581
+ st.markdown("#### Parameters")
582
+ motion = st.slider("🌊 Motion", 1, 255, 127)
583
+ fps = st.slider("🎬 FPS", 1, 30, 6)
584
+ with st.expander("Advanced"):
585
+ use_custom = st.checkbox("Custom Seed")
586
+ seed = st.number_input("Seed", value=int(time.time() * 1000)) if use_custom else None
587
+ if uploaded_file:
588
+ file_data = uploaded_file.read()
589
+ preview1, preview2 = st.columns(2)
590
+ with preview1:
591
+ st.write("Original")
592
+ st.image(Image.open(io.BytesIO(file_data)), use_column_width=True)
593
+ with preview2:
594
+ proc_img = validate_and_preprocess_image(io.BytesIO(file_data))
595
+ if proc_img:
596
+ st.write("Processed")
597
+ st.image(proc_img, use_column_width=True)
598
+ else:
599
+ return
600
+ if st.button("πŸŽ₯ Generate"):
601
+ with st.spinner("Generating video..."):
602
+ with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
603
+ proc_img.save(temp_file.name, format='PNG')
604
+ try:
605
+ client = Client("awacke1/stable-video-diffusion", hf_token=os.environ.get("HUGGINGFACE_TOKEN"))
606
+ result = client.predict(
607
+ image=temp_file.name,
608
+ seed=seed if seed is not None else int(time.time() * 1000),
609
+ randomize_seed=seed is None,
610
+ motion_bucket_id=motion,
611
+ fps_id=fps,
612
+ api_name="/video"
613
+ )
614
+ video_path = result[0].get('video') if isinstance(result[0], dict) else None
615
+ if video_path and os.path.exists(video_path):
616
+ video_filename = f"generated_video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
617
+ shutil.copy(video_path, video_filename)
618
+ st.success(f"Video generated! πŸŽ‰")
619
+ st.video(video_filename)
620
+ if container:
621
+ video_record = {
622
+ "id": generate_unique_id(),
623
+ "pk": generate_unique_id(),
624
+ "type": "generated_video",
625
+ "filename": video_filename,
626
+ "seed": seed if seed is not None else "random",
627
+ "motion": motion,
628
+ "fps": fps,
629
+ "timestamp": datetime.now().isoformat()
630
+ }
631
+ success, message = insert_record(container, video_record)
632
+ if success:
633
+ st.success("DB record saved!")
634
+ else:
635
+ st.error(f"DB error: {message}")
636
+ except Exception as e:
637
+ st.error(f"Video gen error: {str(e)}")
638
+ finally:
639
+ os.unlink(temp_file.name)
640
+
641
+ # =============================================================================
642
+ # ───────────── MAIN FUNCTION ─────────────
643
+ # =============================================================================
644
+ def main():
645
+ st.markdown("### πŸ™ GitCosmos - Cosmos & Git Hub")
646
+ st.markdown(f"[πŸ”— Portal]({CosmosDBUrl})")
647
+
648
+ if "chat_history" not in st.session_state:
649
+ st.session_state.chat_history = []
650
+ if "current_container" not in st.session_state:
651
+ st.session_state.current_container = None
652
+ if not Key:
653
+ st.error("Missing Cosmos Key πŸ”‘βŒ")
654
+ return
655
+ st.session_state.primary_key = Key
656
+ st.session_state.logged_in = True
657
+
658
+ # Sidebar: Hierarchical Navigation
659
+ st.sidebar.title("πŸ™ Navigator")
660
+
661
+ # Vector Search Section (Moved to Top)
662
+ if st.session_state.current_container:
663
+ search_documents_ui(st.session_state.current_container)
664
+
665
+ # Databases Section
666
+ st.sidebar.subheader("πŸ—ƒοΈ Databases")
667
+ if "client" not in st.session_state:
668
+ st.session_state.client = CosmosClient(ENDPOINT, credential=Key)
669
+ databases = get_databases(st.session_state.client)
670
+ selected_db = st.sidebar.selectbox("Select Database", databases, key="db_select")
671
+ if selected_db != st.session_state.get("selected_database"):
672
+ st.session_state.selected_database = selected_db
673
+ st.session_state.selected_container = None
674
+ st.session_state.current_container = None
675
+ if 'active_search' in st.session_state:
676
+ del st.session_state.active_search
677
+ st.rerun()
678
+
679
+ # Containers Section
680
+ if st.session_state.selected_database:
681
+ database = st.session_state.client.get_database_client(st.session_state.selected_database)
682
+ st.sidebar.subheader("πŸ“ Containers")
683
+ if st.sidebar.button("πŸ†• New Container"):
684
+ with st.sidebar.form("new_container_form"):
685
+ container_id = st.text_input("Container ID", "new-container")
686
+ partition_key = st.text_input("Partition Key", "/pk")
687
+ if st.form_submit_button("Create"):
688
+ container = create_new_container(database, container_id, partition_key)
689
+ if container:
690
+ st.success(f"Container '{container_id}' created!")
691
+ st.rerun()
692
+ containers = get_containers(database)
693
+ selected_container = st.sidebar.selectbox("Select Container", containers, key="container_select")
694
+ if selected_container != st.session_state.get("selected_container"):
695
+ st.session_state.selected_container = selected_container
696
+ st.session_state.current_container = database.get_container_client(selected_container)
697
+ if 'active_search' in st.session_state:
698
+ del st.session_state.active_search
699
+ st.rerun()
700
+
701
+ # Actions Section
702
+ st.sidebar.subheader("βš™οΈ Actions")
703
+ if st.session_state.current_container:
704
+ if st.sidebar.button("πŸ“¦ Export Container"):
705
+ download_link = archive_current_container(st.session_state.selected_database, st.session_state.selected_container, st.session_state.client)
706
+ st.sidebar.markdown(download_link, unsafe_allow_html=True) if download_link.startswith('<a') else st.sidebar.error(download_link)
707
+
708
+ # Items Section
709
+ st.sidebar.subheader("πŸ“‘ Items")
710
+ if st.session_state.current_container:
711
+ if st.sidebar.button("βž• New Item"):
712
+ new_item_default(st.session_state.current_container)
713
+ st.sidebar.text_input("New Field Key", key="new_field_key")
714
+ st.sidebar.text_input("New Field Value", key="new_field_value")
715
+ if st.sidebar.button("βž• Add Field") and "doc_editor" in st.session_state:
716
+ add_field_to_doc()
717
+ if st.sidebar.button("πŸ€– New AI Record"):
718
+ new_ai_record(st.session_state.current_container)
719
+ if st.sidebar.button("πŸ”— New Links Record"):
720
+ new_links_record(st.session_state.current_container)
721
+
722
+ # Central Area: Editable Documents with Search Filter
723
+ if st.session_state.current_container:
724
+ search_keyword = st.session_state.get('active_search', None)
725
+ edit_all_documents(st.session_state.current_container, search_keyword)
726
+ else:
727
+ st.info("Select a database and container to view and edit documents.")
728
+
729
+ # Additional Features
730
+ update_file_management_section()
731
+ add_video_generation_ui(st.session_state.current_container if st.session_state.current_container else None)
732
+
733
+ # Logout
734
+ if st.session_state.logged_in and st.sidebar.button("πŸšͺ Logout"):
735
+ st.session_state.logged_in = False
736
+ st.session_state.client = None
737
+ st.session_state.selected_database = None
738
+ st.session_state.selected_container = None
739
+ st.session_state.current_container = None
740
+ if 'active_search' in st.session_state:
741
+ del st.session_state.active_search
742
+ st.rerun()
743
+
744
+ if __name__ == "__main__":
745
+ main()