Spaces:
Running
Running
Fix 'Process Document Again' button to only appear for the current document
Browse files
app.py
CHANGED
@@ -58,7 +58,7 @@ def convert_pdf_to_images(pdf_bytes, dpi=150, rotation=0):
|
|
58 |
return []
|
59 |
|
60 |
# Cache preprocessed images for better performance
|
61 |
-
@st.cache_data(ttl=24*3600, show_spinner=False) # Cache for 24 hours
|
62 |
def preprocess_image(image_bytes, preprocessing_options):
|
63 |
"""Preprocess image with selected options optimized for historical document OCR quality"""
|
64 |
# Setup basic console logging
|
@@ -175,7 +175,7 @@ def preprocess_image(image_bytes, preprocessing_options):
|
|
175 |
|
176 |
# Cache OCR results in memory to speed up repeated processing
|
177 |
@st.cache_data(ttl=24*3600, max_entries=20, show_spinner=False)
|
178 |
-
def process_file_cached(file_path, file_type, use_vision, file_size_mb, cache_key):
|
179 |
"""Cached version of OCR processing to reuse results"""
|
180 |
# Initialize OCR processor
|
181 |
processor = StructuredOCR()
|
@@ -241,6 +241,8 @@ def process_file(uploaded_file, use_vision=True, preprocessing_options=None, pro
|
|
241 |
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
|
242 |
tmp.write(file_bytes)
|
243 |
temp_path = tmp.name
|
|
|
|
|
244 |
|
245 |
# Get PDF rotation value if available and file is a PDF
|
246 |
pdf_rotation_value = pdf_rotation if 'pdf_rotation' in locals() and file_type == "pdf" else 0
|
@@ -284,11 +286,34 @@ def process_file(uploaded_file, use_vision=True, preprocessing_options=None, pro
|
|
284 |
# Generate cache key
|
285 |
import hashlib
|
286 |
file_hash = hashlib.md5(file_bytes).hexdigest()
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
# Process with cached function if possible
|
290 |
try:
|
291 |
-
result = process_file_cached(temp_path, file_type, use_vision, file_size_mb, cache_key)
|
292 |
progress_bar.progress(90)
|
293 |
status_text.markdown('<div class="processing-status-container">Finalizing results...</div>', unsafe_allow_html=True)
|
294 |
except Exception as e:
|
@@ -343,7 +368,12 @@ def process_file(uploaded_file, use_vision=True, preprocessing_options=None, pro
|
|
343 |
# Clean up original temp file and use the processed one
|
344 |
if os.path.exists(temp_path):
|
345 |
os.unlink(temp_path)
|
|
|
|
|
|
|
346 |
temp_path = proc_tmp.name
|
|
|
|
|
347 |
progress_bar.progress(30)
|
348 |
else:
|
349 |
progress_bar.progress(30)
|
@@ -377,19 +407,37 @@ def process_file(uploaded_file, use_vision=True, preprocessing_options=None, pro
|
|
377 |
# Add pdf_rotation to cache key if present
|
378 |
pdf_rotation_value = pdf_rotation if 'pdf_rotation' in locals() else 0
|
379 |
file_hash = hashlib.md5(open(temp_path, 'rb').read()).hexdigest()
|
380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
382 |
progress_bar.progress(50)
|
383 |
# Check if we have custom instructions
|
384 |
has_custom_prompt = 'custom_prompt' in locals() and custom_prompt and len(str(custom_prompt).strip()) > 0
|
|
|
|
|
385 |
if has_custom_prompt:
|
386 |
status_text.markdown('<div class="processing-status-container">Processing document with custom instructions...</div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
387 |
else:
|
388 |
status_text.markdown('<div class="processing-status-container">Processing document with OCR...</div>', unsafe_allow_html=True)
|
389 |
|
390 |
# Process the file using cached function if possible
|
391 |
try:
|
392 |
-
result = process_file_cached(temp_path, file_type, use_vision, file_size_mb, cache_key)
|
393 |
progress_bar.progress(80)
|
394 |
status_text.markdown('<div class="processing-status-container">Analyzing document structure...</div>', unsafe_allow_html=True)
|
395 |
progress_bar.progress(90)
|
@@ -471,6 +519,40 @@ except ImportError:
|
|
471 |
if 'previous_results' not in st.session_state:
|
472 |
st.session_state.previous_results = []
|
473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
# Create main layout with tabs and columns
|
475 |
main_tab1, main_tab2, main_tab3 = st.tabs(["Document Processing", "Previous Results", "About"])
|
476 |
|
@@ -1243,32 +1325,47 @@ with main_tab1:
|
|
1243 |
</style>
|
1244 |
""", unsafe_allow_html=True)
|
1245 |
|
1246 |
-
#
|
|
|
|
|
|
|
|
|
1247 |
if 'sample_document' in st.session_state and st.session_state.sample_document is not None:
|
1248 |
# Use the sample document
|
1249 |
uploaded_file = st.session_state.sample_document
|
1250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1251 |
st.markdown(
|
1252 |
f"""
|
1253 |
<div style="background-color: #D4EDDA; color: #155724; padding: 10px;
|
1254 |
border-radius: 4px; border-left: 5px solid #155724; margin-bottom: 10px;">
|
1255 |
<div style="display: flex; justify-content: space-between; align-items: center;">
|
1256 |
-
<span style="font-weight: bold;">Sample Document: {
|
1257 |
</div>
|
1258 |
</div>
|
1259 |
""",
|
1260 |
unsafe_allow_html=True
|
1261 |
)
|
1262 |
-
|
1263 |
-
# Set auto-process flag in session state if this is a newly loaded sample
|
1264 |
-
if st.session_state.sample_just_loaded:
|
1265 |
-
st.session_state.auto_process_sample = True
|
1266 |
-
# Mark that this is a sample document being processed
|
1267 |
-
st.session_state.sample_document_processed = True
|
1268 |
-
st.session_state.sample_just_loaded = False
|
1269 |
-
|
1270 |
-
# Clear sample document after use to avoid interference with future uploads
|
1271 |
-
st.session_state.sample_document = None
|
1272 |
|
1273 |
if uploaded_file is not None:
|
1274 |
# Check file size (cap at 50MB)
|
@@ -1278,6 +1375,12 @@ with main_tab1:
|
|
1278 |
with left_col:
|
1279 |
st.error(f"File too large ({file_size_mb:.1f} MB). Maximum file size is 50MB.")
|
1280 |
st.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
1281 |
|
1282 |
file_ext = Path(uploaded_file.name).suffix.lower()
|
1283 |
|
@@ -1288,6 +1391,32 @@ with main_tab1:
|
|
1288 |
# Make the button more clear about its function
|
1289 |
if st.session_state.processed_document_active:
|
1290 |
process_button = st.button("Process Document Again")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1291 |
else:
|
1292 |
process_button = st.button("Process Document")
|
1293 |
|
@@ -1392,6 +1521,8 @@ with main_tab1:
|
|
1392 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
1393 |
tmp.write(uploaded_file.getvalue())
|
1394 |
temp_path = tmp.name
|
|
|
|
|
1395 |
|
1396 |
# Apply PDF rotation if specified
|
1397 |
pdf_rotation_value = pdf_rotation if 'pdf_rotation' in locals() else 0
|
@@ -1451,6 +1582,9 @@ with main_tab1:
|
|
1451 |
# Clean up temp file
|
1452 |
if os.path.exists(temp_path):
|
1453 |
os.unlink(temp_path)
|
|
|
|
|
|
|
1454 |
|
1455 |
except Exception as e:
|
1456 |
# If anything fails, revert to standard processing
|
@@ -1460,6 +1594,9 @@ with main_tab1:
|
|
1460 |
# For non-PDF files, use normal processing with custom prompt
|
1461 |
# Save the uploaded file to a temporary file with preprocessing
|
1462 |
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(uploaded_file.name).suffix) as tmp:
|
|
|
|
|
|
|
1463 |
# Apply preprocessing if any options are selected
|
1464 |
if any(preprocessing_options.values()):
|
1465 |
# Apply performance mode settings
|
@@ -1473,7 +1610,6 @@ with main_tab1:
|
|
1473 |
tmp.write(processed_bytes)
|
1474 |
else:
|
1475 |
tmp.write(uploaded_file.getvalue())
|
1476 |
-
temp_path = tmp.name
|
1477 |
|
1478 |
# Show progress
|
1479 |
with progress_placeholder.container():
|
@@ -1522,6 +1658,9 @@ with main_tab1:
|
|
1522 |
# Clean up temporary file
|
1523 |
if os.path.exists(temp_path):
|
1524 |
try:
|
|
|
|
|
|
|
1525 |
os.unlink(temp_path)
|
1526 |
except:
|
1527 |
pass
|
@@ -2104,6 +2243,10 @@ with main_tab1:
|
|
2104 |
# Set processed_document_active to True when a new document is processed
|
2105 |
st.session_state.processed_document_active = True
|
2106 |
|
|
|
|
|
|
|
|
|
2107 |
# Button styling is now handled by global CSS
|
2108 |
|
2109 |
# Display success message with close button for dismissing processed documents
|
@@ -2114,12 +2257,18 @@ with main_tab1:
|
|
2114 |
# Close button styling is now handled by global CSS
|
2115 |
|
2116 |
if st.button("✕ Close Document", key="close_document_button", help="Clear current document and start over"):
|
2117 |
-
#
|
2118 |
-
st.session_state.
|
2119 |
-
|
2120 |
-
|
2121 |
-
|
2122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
2123 |
st.rerun()
|
2124 |
|
2125 |
# Store the result in the previous results list
|
|
|
58 |
return []
|
59 |
|
60 |
# Cache preprocessed images for better performance
|
61 |
+
@st.cache_data(ttl=24*3600, show_spinner=False, hash_funcs={dict: lambda x: str(sorted(x.items()))}) # Cache for 24 hours
|
62 |
def preprocess_image(image_bytes, preprocessing_options):
|
63 |
"""Preprocess image with selected options optimized for historical document OCR quality"""
|
64 |
# Setup basic console logging
|
|
|
175 |
|
176 |
# Cache OCR results in memory to speed up repeated processing
|
177 |
@st.cache_data(ttl=24*3600, max_entries=20, show_spinner=False)
|
178 |
+
def process_file_cached(file_path, file_type, use_vision, file_size_mb, cache_key, preprocessing_options_hash=None):
|
179 |
"""Cached version of OCR processing to reuse results"""
|
180 |
# Initialize OCR processor
|
181 |
processor = StructuredOCR()
|
|
|
241 |
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
|
242 |
tmp.write(file_bytes)
|
243 |
temp_path = tmp.name
|
244 |
+
# Track temporary file for cleanup
|
245 |
+
st.session_state.temp_file_paths.append(temp_path)
|
246 |
|
247 |
# Get PDF rotation value if available and file is a PDF
|
248 |
pdf_rotation_value = pdf_rotation if 'pdf_rotation' in locals() and file_type == "pdf" else 0
|
|
|
286 |
# Generate cache key
|
287 |
import hashlib
|
288 |
file_hash = hashlib.md5(file_bytes).hexdigest()
|
289 |
+
|
290 |
+
# Include preprocessing options in cache key if available
|
291 |
+
preprocessing_options_hash = ""
|
292 |
+
if 'preprocessing_options' in locals() and preprocessing_options:
|
293 |
+
# Add pdf_rotation to preprocessing options to ensure it's part of the cache key
|
294 |
+
if pdf_rotation_value != 0:
|
295 |
+
preprocessing_options_with_rotation = preprocessing_options.copy()
|
296 |
+
preprocessing_options_with_rotation['pdf_rotation'] = pdf_rotation_value
|
297 |
+
preprocessing_str = str(sorted(preprocessing_options_with_rotation.items()))
|
298 |
+
else:
|
299 |
+
preprocessing_str = str(sorted(preprocessing_options.items()))
|
300 |
+
preprocessing_options_hash = hashlib.md5(preprocessing_str.encode()).hexdigest()
|
301 |
+
elif pdf_rotation_value != 0:
|
302 |
+
# If no preprocessing options but we have rotation, include that in the hash
|
303 |
+
preprocessing_options_hash = hashlib.md5(f"pdf_rotation_{pdf_rotation_value}".encode()).hexdigest()
|
304 |
+
|
305 |
+
cache_key = f"{file_hash}_{file_type}_{use_vision}_{preprocessing_options_hash}"
|
306 |
+
|
307 |
+
# Check if we have custom prompt to include in cache key
|
308 |
+
has_custom_prompt = 'custom_prompt' in locals() and custom_prompt and len(str(custom_prompt).strip()) > 0
|
309 |
+
if has_custom_prompt:
|
310 |
+
# Update cache key to include custom prompt hash
|
311 |
+
custom_prompt_hash = hashlib.md5(str(custom_prompt).encode()).hexdigest()
|
312 |
+
cache_key = f"{cache_key}_{custom_prompt_hash}"
|
313 |
|
314 |
# Process with cached function if possible
|
315 |
try:
|
316 |
+
result = process_file_cached(temp_path, file_type, use_vision, file_size_mb, cache_key, preprocessing_options_hash)
|
317 |
progress_bar.progress(90)
|
318 |
status_text.markdown('<div class="processing-status-container">Finalizing results...</div>', unsafe_allow_html=True)
|
319 |
except Exception as e:
|
|
|
368 |
# Clean up original temp file and use the processed one
|
369 |
if os.path.exists(temp_path):
|
370 |
os.unlink(temp_path)
|
371 |
+
# Remove original temp path from tracking list
|
372 |
+
if temp_path in st.session_state.temp_file_paths:
|
373 |
+
st.session_state.temp_file_paths.remove(temp_path)
|
374 |
temp_path = proc_tmp.name
|
375 |
+
# Track new temporary file for cleanup
|
376 |
+
st.session_state.temp_file_paths.append(temp_path)
|
377 |
progress_bar.progress(30)
|
378 |
else:
|
379 |
progress_bar.progress(30)
|
|
|
407 |
# Add pdf_rotation to cache key if present
|
408 |
pdf_rotation_value = pdf_rotation if 'pdf_rotation' in locals() else 0
|
409 |
file_hash = hashlib.md5(open(temp_path, 'rb').read()).hexdigest()
|
410 |
+
|
411 |
+
# Include preprocessing options in cache key to ensure reprocessing when options change
|
412 |
+
preprocessing_options_hash = ""
|
413 |
+
if preprocessing_options:
|
414 |
+
# Add pdf_rotation to preprocessing options to ensure it's part of the cache key
|
415 |
+
if pdf_rotation_value != 0:
|
416 |
+
preprocessing_options_with_rotation = preprocessing_options.copy()
|
417 |
+
preprocessing_options_with_rotation['pdf_rotation'] = pdf_rotation_value
|
418 |
+
preprocessing_str = str(sorted(preprocessing_options_with_rotation.items()))
|
419 |
+
else:
|
420 |
+
preprocessing_str = str(sorted(preprocessing_options.items()))
|
421 |
+
preprocessing_options_hash = hashlib.md5(preprocessing_str.encode()).hexdigest()
|
422 |
+
|
423 |
+
cache_key = f"{file_hash}_{file_type}_{use_vision}_{preprocessing_options_hash}"
|
424 |
|
425 |
progress_bar.progress(50)
|
426 |
# Check if we have custom instructions
|
427 |
has_custom_prompt = 'custom_prompt' in locals() and custom_prompt and len(str(custom_prompt).strip()) > 0
|
428 |
+
|
429 |
+
# If we have custom instructions, include them in cache key
|
430 |
if has_custom_prompt:
|
431 |
status_text.markdown('<div class="processing-status-container">Processing document with custom instructions...</div>', unsafe_allow_html=True)
|
432 |
+
# Update cache key to include custom prompt hash
|
433 |
+
custom_prompt_hash = hashlib.md5(str(custom_prompt).encode()).hexdigest()
|
434 |
+
cache_key = f"{cache_key}_{custom_prompt_hash}"
|
435 |
else:
|
436 |
status_text.markdown('<div class="processing-status-container">Processing document with OCR...</div>', unsafe_allow_html=True)
|
437 |
|
438 |
# Process the file using cached function if possible
|
439 |
try:
|
440 |
+
result = process_file_cached(temp_path, file_type, use_vision, file_size_mb, cache_key, preprocessing_options_hash)
|
441 |
progress_bar.progress(80)
|
442 |
status_text.markdown('<div class="processing-status-container">Analyzing document structure...</div>', unsafe_allow_html=True)
|
443 |
progress_bar.progress(90)
|
|
|
519 |
if 'previous_results' not in st.session_state:
|
520 |
st.session_state.previous_results = []
|
521 |
|
522 |
+
# Initialize temp file tracking
|
523 |
+
if 'temp_file_paths' not in st.session_state:
|
524 |
+
st.session_state.temp_file_paths = []
|
525 |
+
|
526 |
+
# Initialize last processed file tracking to fix "Process Document Again" button
|
527 |
+
if 'last_processed_file' not in st.session_state:
|
528 |
+
st.session_state.last_processed_file = None
|
529 |
+
|
530 |
+
# Check if we need to perform a complete reset (coming from "Close Document" button)
|
531 |
+
if 'perform_reset' in st.session_state and st.session_state.perform_reset:
|
532 |
+
# List of all session state keys that should be reset, except previous_results
|
533 |
+
reset_keys = [key for key in list(st.session_state.keys())
|
534 |
+
if key != 'previous_results']
|
535 |
+
|
536 |
+
# Remove all keys except previous_results
|
537 |
+
for key in reset_keys:
|
538 |
+
if key == 'perform_reset':
|
539 |
+
st.session_state[key] = False # Clear this flag
|
540 |
+
else:
|
541 |
+
st.session_state.pop(key, None)
|
542 |
+
|
543 |
+
# Reinitialize required session state variables
|
544 |
+
st.session_state.auto_process_sample = False
|
545 |
+
st.session_state.sample_just_loaded = False
|
546 |
+
st.session_state.processed_document_active = False
|
547 |
+
st.session_state.sample_document_processed = False
|
548 |
+
st.session_state.last_processed_file = None
|
549 |
+
|
550 |
+
# Explicitly reset document-related variables
|
551 |
+
st.session_state.sample_document = None
|
552 |
+
st.session_state.original_sample_bytes = None
|
553 |
+
st.session_state.original_sample_name = None
|
554 |
+
st.session_state.is_sample_document = False
|
555 |
+
|
556 |
# Create main layout with tabs and columns
|
557 |
main_tab1, main_tab2, main_tab3 = st.tabs(["Document Processing", "Previous Results", "About"])
|
558 |
|
|
|
1325 |
</style>
|
1326 |
""", unsafe_allow_html=True)
|
1327 |
|
1328 |
+
# Check if we're using a sample document (either newly loaded or from session state)
|
1329 |
+
using_sample_document = False
|
1330 |
+
sample_document_name = None
|
1331 |
+
|
1332 |
+
# Check for newly loaded sample document
|
1333 |
if 'sample_document' in st.session_state and st.session_state.sample_document is not None:
|
1334 |
# Use the sample document
|
1335 |
uploaded_file = st.session_state.sample_document
|
1336 |
+
using_sample_document = True
|
1337 |
+
sample_document_name = uploaded_file.name
|
1338 |
+
|
1339 |
+
# Set auto-process flag in session state if this is a newly loaded sample
|
1340 |
+
if st.session_state.sample_just_loaded:
|
1341 |
+
st.session_state.auto_process_sample = True
|
1342 |
+
# Mark that this is a sample document being processed
|
1343 |
+
st.session_state.sample_document_processed = True
|
1344 |
+
st.session_state.sample_just_loaded = False
|
1345 |
+
|
1346 |
+
# Store sample document bytes in a separate session state variable for potential reprocessing
|
1347 |
+
st.session_state.original_sample_bytes = uploaded_file.getvalue()
|
1348 |
+
st.session_state.original_sample_name = uploaded_file.name
|
1349 |
+
st.session_state.is_sample_document = True
|
1350 |
+
|
1351 |
+
# Check for reprocessing of previously loaded sample
|
1352 |
+
elif 'is_sample_document' in st.session_state and st.session_state.is_sample_document:
|
1353 |
+
using_sample_document = True
|
1354 |
+
sample_document_name = st.session_state.original_sample_name if 'original_sample_name' in st.session_state else "Sample Document"
|
1355 |
+
|
1356 |
+
# Display sample document notice if using a sample document
|
1357 |
+
if using_sample_document:
|
1358 |
st.markdown(
|
1359 |
f"""
|
1360 |
<div style="background-color: #D4EDDA; color: #155724; padding: 10px;
|
1361 |
border-radius: 4px; border-left: 5px solid #155724; margin-bottom: 10px;">
|
1362 |
<div style="display: flex; justify-content: space-between; align-items: center;">
|
1363 |
+
<span style="font-weight: bold;">Sample Document: {sample_document_name}</span>
|
1364 |
</div>
|
1365 |
</div>
|
1366 |
""",
|
1367 |
unsafe_allow_html=True
|
1368 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1369 |
|
1370 |
if uploaded_file is not None:
|
1371 |
# Check file size (cap at 50MB)
|
|
|
1375 |
with left_col:
|
1376 |
st.error(f"File too large ({file_size_mb:.1f} MB). Maximum file size is 50MB.")
|
1377 |
st.stop()
|
1378 |
+
|
1379 |
+
# Check if this is a new file (different from the last processed file)
|
1380 |
+
current_file_identifier = f"{uploaded_file.name}_{len(uploaded_file.getvalue())}"
|
1381 |
+
if st.session_state.last_processed_file != current_file_identifier:
|
1382 |
+
# Reset processed_document_active if a new file is uploaded
|
1383 |
+
st.session_state.processed_document_active = False
|
1384 |
|
1385 |
file_ext = Path(uploaded_file.name).suffix.lower()
|
1386 |
|
|
|
1391 |
# Make the button more clear about its function
|
1392 |
if st.session_state.processed_document_active:
|
1393 |
process_button = st.button("Process Document Again")
|
1394 |
+
|
1395 |
+
# If process button is clicked and we're dealing with a sample document that was cleared
|
1396 |
+
if process_button and uploaded_file is None and 'original_sample_bytes' in st.session_state:
|
1397 |
+
# Recreate the uploaded file from stored bytes
|
1398 |
+
from io import BytesIO
|
1399 |
+
import mimetypes
|
1400 |
+
|
1401 |
+
# Determine mime type based on file extension
|
1402 |
+
file_ext = os.path.splitext(st.session_state.original_sample_name)[1].lower()
|
1403 |
+
if file_ext == '.pdf':
|
1404 |
+
mime_type = 'application/pdf'
|
1405 |
+
elif file_ext in ['.jpg', '.jpeg']:
|
1406 |
+
mime_type = 'image/jpeg'
|
1407 |
+
elif file_ext == '.png':
|
1408 |
+
mime_type = 'image/png'
|
1409 |
+
else:
|
1410 |
+
mime_type = mimetypes.guess_type(st.session_state.original_sample_name)[0] or 'application/octet-stream'
|
1411 |
+
|
1412 |
+
# Create a synthetic file-like object with the same interface as UploadedFile
|
1413 |
+
uploaded_file = type('obj', (object,), {
|
1414 |
+
'name': st.session_state.original_sample_name,
|
1415 |
+
'getvalue': lambda: st.session_state.original_sample_bytes,
|
1416 |
+
'read': lambda: st.session_state.original_sample_bytes,
|
1417 |
+
'seek': lambda x: None,
|
1418 |
+
'type': mime_type
|
1419 |
+
})
|
1420 |
else:
|
1421 |
process_button = st.button("Process Document")
|
1422 |
|
|
|
1521 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
1522 |
tmp.write(uploaded_file.getvalue())
|
1523 |
temp_path = tmp.name
|
1524 |
+
# Track temporary file for cleanup
|
1525 |
+
st.session_state.temp_file_paths.append(temp_path)
|
1526 |
|
1527 |
# Apply PDF rotation if specified
|
1528 |
pdf_rotation_value = pdf_rotation if 'pdf_rotation' in locals() else 0
|
|
|
1582 |
# Clean up temp file
|
1583 |
if os.path.exists(temp_path):
|
1584 |
os.unlink(temp_path)
|
1585 |
+
# Remove from tracking list
|
1586 |
+
if temp_path in st.session_state.temp_file_paths:
|
1587 |
+
st.session_state.temp_file_paths.remove(temp_path)
|
1588 |
|
1589 |
except Exception as e:
|
1590 |
# If anything fails, revert to standard processing
|
|
|
1594 |
# For non-PDF files, use normal processing with custom prompt
|
1595 |
# Save the uploaded file to a temporary file with preprocessing
|
1596 |
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(uploaded_file.name).suffix) as tmp:
|
1597 |
+
temp_path = tmp.name
|
1598 |
+
# Track temporary file for cleanup
|
1599 |
+
st.session_state.temp_file_paths.append(temp_path)
|
1600 |
# Apply preprocessing if any options are selected
|
1601 |
if any(preprocessing_options.values()):
|
1602 |
# Apply performance mode settings
|
|
|
1610 |
tmp.write(processed_bytes)
|
1611 |
else:
|
1612 |
tmp.write(uploaded_file.getvalue())
|
|
|
1613 |
|
1614 |
# Show progress
|
1615 |
with progress_placeholder.container():
|
|
|
1658 |
# Clean up temporary file
|
1659 |
if os.path.exists(temp_path):
|
1660 |
try:
|
1661 |
+
# Remove from tracking list
|
1662 |
+
if temp_path in st.session_state.temp_file_paths:
|
1663 |
+
st.session_state.temp_file_paths.remove(temp_path)
|
1664 |
os.unlink(temp_path)
|
1665 |
except:
|
1666 |
pass
|
|
|
2243 |
# Set processed_document_active to True when a new document is processed
|
2244 |
st.session_state.processed_document_active = True
|
2245 |
|
2246 |
+
# Store information about this processed file to track when new files are uploaded
|
2247 |
+
if uploaded_file is not None:
|
2248 |
+
st.session_state.last_processed_file = f"{uploaded_file.name}_{len(uploaded_file.getvalue())}"
|
2249 |
+
|
2250 |
# Button styling is now handled by global CSS
|
2251 |
|
2252 |
# Display success message with close button for dismissing processed documents
|
|
|
2257 |
# Close button styling is now handled by global CSS
|
2258 |
|
2259 |
if st.button("✕ Close Document", key="close_document_button", help="Clear current document and start over"):
|
2260 |
+
# Create a special flag to signal a complete reset on the next rerun
|
2261 |
+
st.session_state.perform_reset = True
|
2262 |
+
|
2263 |
+
# Clean up any temporary files
|
2264 |
+
if 'temp_file_paths' in st.session_state:
|
2265 |
+
for temp_path in st.session_state.temp_file_paths:
|
2266 |
+
try:
|
2267 |
+
if os.path.exists(temp_path):
|
2268 |
+
os.remove(temp_path)
|
2269 |
+
except Exception:
|
2270 |
+
pass # Ignore errors in cleanup
|
2271 |
+
|
2272 |
st.rerun()
|
2273 |
|
2274 |
# Store the result in the previous results list
|