Spaces:
Running
Running
Fix sample document loading and processing pipeline
Browse files- Fixed sample document loading to automatically process after selection
- Enhanced SampleDocument class with better file emulation
- Added session state management for reliable sample processing
- Improved user feedback during sample document processing
- Updated CLAUDE.md with improved documentation
CLAUDE.md
CHANGED
@@ -8,6 +8,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|
8 |
- Process PDF files: `python pdf_ocr.py <file_path>`
|
9 |
- Process single file with logging: `python process_file.py <file_path>`
|
10 |
- Run newspaper test: `python test_newspaper.py <file_path>`
|
|
|
11 |
- Run typechecking: `mypy .`
|
12 |
- Lint code: `ruff check .` or `flake8`
|
13 |
|
@@ -23,6 +24,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|
23 |
- **Naming**: snake_case for variables/functions, PascalCase for classes
|
24 |
- **Documentation**: Google-style docstrings for all functions/classes
|
25 |
- **Logging**: Use module-level loggers with appropriate log levels
|
|
|
26 |
- **Line length**: ≤100 characters
|
27 |
|
28 |
## Architecture
|
@@ -30,4 +32,5 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|
30 |
- Utils: `ocr_utils.py` - OCR text and image processing utilities
|
31 |
- PDF handling: `pdf_ocr.py` - PDF-specific processing functionality
|
32 |
- Config: `config.py` - Configuration settings and API keys
|
33 |
-
- Web: `app.py` - Streamlit interface with UI components in `/ui` directory
|
|
|
|
8 |
- Process PDF files: `python pdf_ocr.py <file_path>`
|
9 |
- Process single file with logging: `python process_file.py <file_path>`
|
10 |
- Run newspaper test: `python test_newspaper.py <file_path>`
|
11 |
+
- Run notebook demo: `jupyter notebook notebook_demo.ipynb`
|
12 |
- Run typechecking: `mypy .`
|
13 |
- Lint code: `ruff check .` or `flake8`
|
14 |
|
|
|
24 |
- **Naming**: snake_case for variables/functions, PascalCase for classes
|
25 |
- **Documentation**: Google-style docstrings for all functions/classes
|
26 |
- **Logging**: Use module-level loggers with appropriate log levels
|
27 |
+
- **Exception handling**: Implement graceful fallbacks for API errors
|
28 |
- **Line length**: ≤100 characters
|
29 |
|
30 |
## Architecture
|
|
|
32 |
- Utils: `ocr_utils.py` - OCR text and image processing utilities
|
33 |
- PDF handling: `pdf_ocr.py` - PDF-specific processing functionality
|
34 |
- Config: `config.py` - Configuration settings and API keys
|
35 |
+
- Web: `app.py` - Streamlit interface with UI components in `/ui` directory
|
36 |
+
- Demo: `notebook_demo.ipynb` - Interactive notebook with educational examples
|
app.py
CHANGED
@@ -511,12 +511,12 @@ with main_tab1:
|
|
511 |
# Add heading for the file uploader (just text, no container)
|
512 |
st.markdown('### Upload Document')
|
513 |
|
514 |
-
# Model info
|
515 |
-
st.markdown("Using the latest `mistral-ocr-latest` model for advanced document understanding.")
|
516 |
|
517 |
# Enhanced file uploader with better help text
|
518 |
uploaded_file = st.file_uploader("Drag and drop PDFs or images here", type=["pdf", "png", "jpg", "jpeg"],
|
519 |
-
help="
|
520 |
|
521 |
# Removed seed prompt instructions from here, moving to sidebar
|
522 |
|
@@ -917,6 +917,8 @@ with main_tab2:
|
|
917 |
badge_color = "#6a1b9a" # Purple for document types
|
918 |
elif any(term in topic.lower() for term in ["travel", "military", "science", "medicine", "education", "art", "literature"]):
|
919 |
badge_color = "#2e7d32" # Green for subject domains
|
|
|
|
|
920 |
|
921 |
st.markdown(
|
922 |
f'<span style="background-color: {badge_color}; color: white; padding: 3px 8px; '
|
@@ -1193,6 +1195,27 @@ with main_tab3:
|
|
1193 |
""")
|
1194 |
|
1195 |
with main_tab1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196 |
if uploaded_file is not None:
|
1197 |
# Check file size (cap at 50MB)
|
1198 |
file_size_mb = len(uploaded_file.getvalue()) / (1024 * 1024)
|
@@ -1247,8 +1270,21 @@ with main_tab1:
|
|
1247 |
# No extra spacing needed as it will be managed programmatically
|
1248 |
metadata_placeholder = st.empty()
|
1249 |
|
1250 |
-
#
|
1251 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1252 |
# Move the progress indicator reference to just below the button
|
1253 |
progress_container = progress_placeholder
|
1254 |
try:
|
@@ -1477,8 +1513,8 @@ with main_tab1:
|
|
1477 |
# Only show when custom_prompt exists in the session AND has content, or when the result explicitly states it was applied
|
1478 |
has_instructions = ('custom_prompt' in locals() and custom_prompt and len(str(custom_prompt).strip()) > 0)
|
1479 |
if has_instructions or 'custom_prompt_applied' in result:
|
1480 |
-
# Use
|
1481 |
-
metadata_html += f'<p
|
1482 |
|
1483 |
# Close the metadata card
|
1484 |
metadata_html += '</div>'
|
@@ -1936,6 +1972,63 @@ with main_tab1:
|
|
1936 |
|
1937 |
if 'ocr_contents' not in result:
|
1938 |
st.error("No OCR content was extracted from the document.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1939 |
|
1940 |
# Close document content div
|
1941 |
st.markdown('</div>', unsafe_allow_html=True)
|
@@ -2038,6 +2131,41 @@ with main_tab1:
|
|
2038 |
lang_tag = f"{lang} Language"
|
2039 |
subject_tags.append(lang_tag)
|
2040 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2041 |
except Exception as e:
|
2042 |
logger.warning(f"Error generating subject tags: {str(e)}")
|
2043 |
# Fallback tags if extraction fails
|
@@ -2094,9 +2222,7 @@ with main_tab1:
|
|
2094 |
except Exception as e:
|
2095 |
st.error(f"Error processing document: {str(e)}")
|
2096 |
else:
|
2097 |
-
#
|
2098 |
-
|
2099 |
-
# Show example images in a simpler layout
|
2100 |
st.subheader("Example Documents")
|
2101 |
|
2102 |
# Add a simplified info message about examples
|
@@ -2106,9 +2232,115 @@ with main_tab1:
|
|
2106 |
- Handwritten letters and documents
|
2107 |
- Printed books and articles
|
2108 |
- Multi-page PDFs
|
2109 |
-
|
2110 |
-
Upload your own document to get started or explore the 'About' tab for more information.
|
2111 |
""")
|
2112 |
|
2113 |
-
#
|
2114 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
# Add heading for the file uploader (just text, no container)
|
512 |
st.markdown('### Upload Document')
|
513 |
|
514 |
+
# Model info with clearer instructions
|
515 |
+
st.markdown("Using the latest `mistral-ocr-latest` model for advanced document understanding. To get started upload your own document, use an example document, or explore the 'About' tab for more info.")
|
516 |
|
517 |
# Enhanced file uploader with better help text
|
518 |
uploaded_file = st.file_uploader("Drag and drop PDFs or images here", type=["pdf", "png", "jpg", "jpeg"],
|
519 |
+
help="Limit 200MB per file • PDF, PNG, JPG, JPEG")
|
520 |
|
521 |
# Removed seed prompt instructions from here, moving to sidebar
|
522 |
|
|
|
917 |
badge_color = "#6a1b9a" # Purple for document types
|
918 |
elif any(term in topic.lower() for term in ["travel", "military", "science", "medicine", "education", "art", "literature"]):
|
919 |
badge_color = "#2e7d32" # Green for subject domains
|
920 |
+
elif any(term in topic.lower() for term in ["preprocessed", "enhanced", "grayscale", "denoised", "contrast", "rotated"]):
|
921 |
+
badge_color = "#e65100" # Orange for preprocessing-related tags
|
922 |
|
923 |
st.markdown(
|
924 |
f'<span style="background-color: {badge_color}; color: white; padding: 3px 8px; '
|
|
|
1195 |
""")
|
1196 |
|
1197 |
with main_tab1:
|
1198 |
+
# Initialize session states if needed
|
1199 |
+
if 'auto_process_sample' not in st.session_state:
|
1200 |
+
st.session_state.auto_process_sample = False
|
1201 |
+
if 'sample_just_loaded' not in st.session_state:
|
1202 |
+
st.session_state.sample_just_loaded = False
|
1203 |
+
|
1204 |
+
# Use uploaded_file or sample_document if available
|
1205 |
+
if 'sample_document' in st.session_state and st.session_state.sample_document is not None:
|
1206 |
+
# Use the sample document
|
1207 |
+
uploaded_file = st.session_state.sample_document
|
1208 |
+
# Add a notice about using sample document
|
1209 |
+
st.success(f"Using sample document: {uploaded_file.name}")
|
1210 |
+
|
1211 |
+
# Set auto-process flag in session state if this is a newly loaded sample
|
1212 |
+
if st.session_state.sample_just_loaded:
|
1213 |
+
st.session_state.auto_process_sample = True
|
1214 |
+
st.session_state.sample_just_loaded = False
|
1215 |
+
|
1216 |
+
# Clear sample document after use to avoid interference with future uploads
|
1217 |
+
st.session_state.sample_document = None
|
1218 |
+
|
1219 |
if uploaded_file is not None:
|
1220 |
# Check file size (cap at 50MB)
|
1221 |
file_size_mb = len(uploaded_file.getvalue()) / (1024 * 1024)
|
|
|
1270 |
# No extra spacing needed as it will be managed programmatically
|
1271 |
metadata_placeholder = st.empty()
|
1272 |
|
1273 |
+
# Check if we need to auto-process a sample document
|
1274 |
+
if 'auto_process_sample' not in st.session_state:
|
1275 |
+
st.session_state.auto_process_sample = False
|
1276 |
+
|
1277 |
+
# Results section - process if button clicked or auto-process flag is set
|
1278 |
+
process_now = process_button or st.session_state.auto_process_sample
|
1279 |
+
|
1280 |
+
# Show a message if auto-processing
|
1281 |
+
if st.session_state.auto_process_sample:
|
1282 |
+
st.info("Automatically processing sample document...")
|
1283 |
+
|
1284 |
+
if process_now:
|
1285 |
+
# Reset auto-process flag to avoid processing on next rerun
|
1286 |
+
if st.session_state.auto_process_sample:
|
1287 |
+
st.session_state.auto_process_sample = False
|
1288 |
# Move the progress indicator reference to just below the button
|
1289 |
progress_container = progress_placeholder
|
1290 |
try:
|
|
|
1513 |
# Only show when custom_prompt exists in the session AND has content, or when the result explicitly states it was applied
|
1514 |
has_instructions = ('custom_prompt' in locals() and custom_prompt and len(str(custom_prompt).strip()) > 0)
|
1515 |
if has_instructions or 'custom_prompt_applied' in result:
|
1516 |
+
# Use consistent styling with other metadata fields
|
1517 |
+
metadata_html += f'<p><strong>Advanced Analysis:</strong> Custom instructions applied</p>'
|
1518 |
|
1519 |
# Close the metadata card
|
1520 |
metadata_html += '</div>'
|
|
|
1972 |
|
1973 |
if 'ocr_contents' not in result:
|
1974 |
st.error("No OCR content was extracted from the document.")
|
1975 |
+
else:
|
1976 |
+
# Check for minimal text content in OCR results
|
1977 |
+
has_minimal_text = False
|
1978 |
+
total_text_length = 0
|
1979 |
+
|
1980 |
+
# Check if the document is an image (not a PDF)
|
1981 |
+
is_image = result.get('file_name', '').lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))
|
1982 |
+
|
1983 |
+
# If image file with raw_text only
|
1984 |
+
if is_image and 'ocr_contents' in result:
|
1985 |
+
ocr_contents = result['ocr_contents']
|
1986 |
+
|
1987 |
+
# Check if only raw_text exists with minimal content
|
1988 |
+
has_raw_text_only = False
|
1989 |
+
if 'raw_text' in ocr_contents:
|
1990 |
+
raw_text = ocr_contents['raw_text']
|
1991 |
+
total_text_length += len(raw_text.strip())
|
1992 |
+
|
1993 |
+
# Check if raw_text is the only significant field
|
1994 |
+
other_content_fields = [k for k in ocr_contents.keys()
|
1995 |
+
if k not in ['raw_text', 'error', 'partial_text']
|
1996 |
+
and isinstance(ocr_contents[k], (str, list))
|
1997 |
+
and ocr_contents[k]]
|
1998 |
+
|
1999 |
+
if len(other_content_fields) <= 1: # Only raw_text or one other field
|
2000 |
+
has_raw_text_only = True
|
2001 |
+
|
2002 |
+
# Check if minimal text was extracted (less than 50 characters)
|
2003 |
+
if total_text_length < 50 and has_raw_text_only:
|
2004 |
+
has_minimal_text = True
|
2005 |
+
|
2006 |
+
# Check if any meaningful preprocessing options were used
|
2007 |
+
preprocessing_used = False
|
2008 |
+
if preprocessing_options.get("document_type", "standard") != "standard":
|
2009 |
+
preprocessing_used = True
|
2010 |
+
if preprocessing_options.get("grayscale", False):
|
2011 |
+
preprocessing_used = True
|
2012 |
+
if preprocessing_options.get("denoise", False):
|
2013 |
+
preprocessing_used = True
|
2014 |
+
if preprocessing_options.get("contrast", 0) != 0:
|
2015 |
+
preprocessing_used = True
|
2016 |
+
if preprocessing_options.get("rotation", 0) != 0:
|
2017 |
+
preprocessing_used = True
|
2018 |
+
|
2019 |
+
# If minimal text was found and preprocessing options weren't used
|
2020 |
+
if has_minimal_text and not preprocessing_used and uploaded_file.type.startswith('image/'):
|
2021 |
+
st.warning("""
|
2022 |
+
**Limited text extracted from this image.**
|
2023 |
+
|
2024 |
+
Try using preprocessing options in the sidebar to improve results:
|
2025 |
+
- Convert to grayscale for clearer text
|
2026 |
+
- Use denoising for aged or degraded documents
|
2027 |
+
- Adjust contrast for faded text
|
2028 |
+
- Try different rotation if text orientation is unclear
|
2029 |
+
|
2030 |
+
Click the "Preprocessing Options" section in the sidebar under "Image Processing".
|
2031 |
+
""")
|
2032 |
|
2033 |
# Close document content div
|
2034 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
2131 |
lang_tag = f"{lang} Language"
|
2132 |
subject_tags.append(lang_tag)
|
2133 |
|
2134 |
+
# Add preprocessing information as tags if preprocessing was applied
|
2135 |
+
if uploaded_file.type.startswith('image/'):
|
2136 |
+
# Check if meaningful preprocessing options were used
|
2137 |
+
if preprocessing_options.get("document_type", "standard") != "standard":
|
2138 |
+
doc_type = preprocessing_options["document_type"].capitalize()
|
2139 |
+
preprocessing_tag = f"Enhanced ({doc_type})"
|
2140 |
+
if preprocessing_tag not in subject_tags:
|
2141 |
+
subject_tags.append(preprocessing_tag)
|
2142 |
+
|
2143 |
+
preprocessing_methods = []
|
2144 |
+
if preprocessing_options.get("grayscale", False):
|
2145 |
+
preprocessing_methods.append("Grayscale")
|
2146 |
+
if preprocessing_options.get("denoise", False):
|
2147 |
+
preprocessing_methods.append("Denoised")
|
2148 |
+
if preprocessing_options.get("contrast", 0) != 0:
|
2149 |
+
contrast_val = preprocessing_options.get("contrast", 0)
|
2150 |
+
if contrast_val > 0:
|
2151 |
+
preprocessing_methods.append("Contrast Enhanced")
|
2152 |
+
else:
|
2153 |
+
preprocessing_methods.append("Contrast Reduced")
|
2154 |
+
if preprocessing_options.get("rotation", 0) != 0:
|
2155 |
+
preprocessing_methods.append("Rotated")
|
2156 |
+
|
2157 |
+
# Add a combined preprocessing tag if methods were applied
|
2158 |
+
if preprocessing_methods:
|
2159 |
+
prep_tag = "Preprocessed"
|
2160 |
+
if prep_tag not in subject_tags:
|
2161 |
+
subject_tags.append(prep_tag)
|
2162 |
+
|
2163 |
+
# Add the specific method as a tag if only one was used
|
2164 |
+
if len(preprocessing_methods) == 1:
|
2165 |
+
method_tag = preprocessing_methods[0]
|
2166 |
+
if method_tag not in subject_tags:
|
2167 |
+
subject_tags.append(method_tag)
|
2168 |
+
|
2169 |
except Exception as e:
|
2170 |
logger.warning(f"Error generating subject tags: {str(e)}")
|
2171 |
# Fallback tags if extraction fails
|
|
|
2222 |
except Exception as e:
|
2223 |
st.error(f"Error processing document: {str(e)}")
|
2224 |
else:
|
2225 |
+
# Example Documents section after file uploader
|
|
|
|
|
2226 |
st.subheader("Example Documents")
|
2227 |
|
2228 |
# Add a simplified info message about examples
|
|
|
2232 |
- Handwritten letters and documents
|
2233 |
- Printed books and articles
|
2234 |
- Multi-page PDFs
|
|
|
|
|
2235 |
""")
|
2236 |
|
2237 |
+
# Add CSS to make the dropdown match the column width
|
2238 |
+
st.markdown("""
|
2239 |
+
<style>
|
2240 |
+
/* Make the selectbox container match the full column width */
|
2241 |
+
.main .block-container .element-container:has([data-testid="stSelectbox"]) {
|
2242 |
+
width: 100% !important;
|
2243 |
+
max-width: 100% !important;
|
2244 |
+
}
|
2245 |
+
|
2246 |
+
/* Make the actual selectbox control take the full width */
|
2247 |
+
.stSelectbox > div > div {
|
2248 |
+
width: 100% !important;
|
2249 |
+
max-width: 100% !important;
|
2250 |
+
}
|
2251 |
+
</style>
|
2252 |
+
""", unsafe_allow_html=True)
|
2253 |
+
|
2254 |
+
# Sample document URLs dropdown with clearer label
|
2255 |
+
sample_urls = [
|
2256 |
+
"Select a sample document",
|
2257 |
+
"https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/a-la-carte.pdf",
|
2258 |
+
"https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/magician-or-bottle-cungerer.jpg",
|
2259 |
+
"https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/handwritten-letter.jpg",
|
2260 |
+
"https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/magellan-travels.jpg",
|
2261 |
+
"https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/milgram-flier.png",
|
2262 |
+
"https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/baldwin-15st-north.jpg"
|
2263 |
+
]
|
2264 |
+
|
2265 |
+
sample_names = [
|
2266 |
+
"Select a sample document",
|
2267 |
+
"Restaurant Menu (PDF)",
|
2268 |
+
"The Magician (Image)",
|
2269 |
+
"Handwritten Letter (Image)",
|
2270 |
+
"Magellan Travels (Image)",
|
2271 |
+
"Milgram Flier (Image)",
|
2272 |
+
"Baldwin Street (Image)"
|
2273 |
+
]
|
2274 |
+
|
2275 |
+
# Initialize sample_document in session state if it doesn't exist
|
2276 |
+
if 'sample_document' not in st.session_state:
|
2277 |
+
st.session_state.sample_document = None
|
2278 |
+
|
2279 |
+
selected_sample = st.selectbox("Select a sample document from `~/input`", options=range(len(sample_urls)), format_func=lambda i: sample_names[i])
|
2280 |
+
|
2281 |
+
if selected_sample > 0:
|
2282 |
+
selected_url = sample_urls[selected_sample]
|
2283 |
+
|
2284 |
+
# Add process button for the sample document
|
2285 |
+
if st.button("Load Sample Document"):
|
2286 |
+
try:
|
2287 |
+
import requests
|
2288 |
+
from io import BytesIO
|
2289 |
+
|
2290 |
+
with st.spinner(f"Downloading {sample_names[selected_sample]}..."):
|
2291 |
+
response = requests.get(selected_url)
|
2292 |
+
response.raise_for_status()
|
2293 |
+
|
2294 |
+
# Extract filename from URL
|
2295 |
+
file_name = selected_url.split("/")[-1]
|
2296 |
+
|
2297 |
+
# Create a BytesIO object from the downloaded content
|
2298 |
+
file_content = BytesIO(response.content)
|
2299 |
+
|
2300 |
+
# Store as a UploadedFile-like object in session state
|
2301 |
+
class SampleDocument:
|
2302 |
+
def __init__(self, name, content, content_type):
|
2303 |
+
self.name = name
|
2304 |
+
self._content = content
|
2305 |
+
self.type = content_type
|
2306 |
+
self.size = len(content)
|
2307 |
+
|
2308 |
+
def getvalue(self):
|
2309 |
+
return self._content
|
2310 |
+
|
2311 |
+
def read(self):
|
2312 |
+
return self._content
|
2313 |
+
|
2314 |
+
def seek(self, position):
|
2315 |
+
# Implement seek for compatibility with some file operations
|
2316 |
+
return
|
2317 |
+
|
2318 |
+
def tell(self):
|
2319 |
+
# Implement tell for compatibility
|
2320 |
+
return 0
|
2321 |
+
|
2322 |
+
# Determine content type based on file extension
|
2323 |
+
if file_name.lower().endswith('.pdf'):
|
2324 |
+
content_type = 'application/pdf'
|
2325 |
+
elif file_name.lower().endswith(('.jpg', '.jpeg')):
|
2326 |
+
content_type = 'image/jpeg'
|
2327 |
+
elif file_name.lower().endswith('.png'):
|
2328 |
+
content_type = 'image/png'
|
2329 |
+
else:
|
2330 |
+
content_type = 'application/octet-stream'
|
2331 |
+
|
2332 |
+
# Save download info in session state for more reliable handling
|
2333 |
+
st.session_state.sample_document = SampleDocument(
|
2334 |
+
name=file_name,
|
2335 |
+
content=response.content,
|
2336 |
+
content_type=content_type
|
2337 |
+
)
|
2338 |
+
|
2339 |
+
# Set a flag to indicate this is a newly loaded sample
|
2340 |
+
st.session_state.sample_just_loaded = True
|
2341 |
+
|
2342 |
+
# Force rerun to load the document
|
2343 |
+
st.rerun()
|
2344 |
+
except Exception as e:
|
2345 |
+
st.error(f"Error downloading sample document: {str(e)}")
|
2346 |
+
st.info("Please try uploading your own document instead.")
|