milwright commited on
Commit
2f2eb30
·
1 Parent(s): c50e726

Optimize sidebar spacing and fix deprecated parameter

Browse files

- Add proper section spacing in sidebar with dividers
- Replace deprecated use_column_width with use_container_width
- Simplify section headings and labels
- Make instruction examples more compact
- Normalize heading styles across UI
- Improve CSS for better element positioning

Files changed (3) hide show
  1. CLAUDE.md +30 -0
  2. app.py +73 -78
  3. ui/custom.css +38 -0
CLAUDE.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Commands
6
+ - Run app: `streamlit run app.py`
7
+ - Test OCR functionality: `python structured_ocr.py <file_path>`
8
+ - Process PDF files: `python pdf_ocr.py <file_path>`
9
+ - Process single file with logging: `python process_file.py <file_path>`
10
+ - Run typechecking: `mypy .`
11
+
12
+ ## Environment Setup
13
+ - API key: Set `MISTRAL_API_KEY` in `.env` file or environment variable
14
+ - Install dependencies: `pip install -r requirements.txt`
15
+ - System requirements: `apt-get install poppler-utils tesseract-ocr` (or equivalent for your OS)
16
+
17
+ ## Code Style Guidelines
18
+ - **Imports**: Standard library first, third-party next, local modules last
19
+ - **Types**: Use Pydantic models and type hints for all functions
20
+ - **Error handling**: Use specific exceptions with informative messages
21
+ - **Naming**: snake_case for variables/functions, PascalCase for classes
22
+ - **Documentation**: Google-style docstrings for all functions/classes
23
+ - **Logging**: Use module-level loggers with appropriate log levels
24
+
25
+ ## Architecture
26
+ - Core: `structured_ocr.py` - Main OCR processing with Mistral AI integration
27
+ - Utils: `ocr_utils.py` - Utility functions for OCR text and image processing
28
+ - PDF handling: `pdf_ocr.py` - PDF-specific processing functionality
29
+ - Config: `config.py` - Configuration settings and API keys
30
+ - Web: `app.py` - Streamlit interface with UI components in `/ui` directory
app.py CHANGED
@@ -511,49 +511,60 @@ with st.sidebar:
511
  # Options title with reduced top margin
512
  st.markdown("<h2 style='margin-top:-25px; margin-bottom:5px; padding:0;'>Options</h2>", unsafe_allow_html=True)
513
 
514
- # Reduce spacing between sidebar sections
515
  st.markdown("""
516
  <style>
517
- /* Reduce all spacing in sidebar */
518
  .block-container {padding-top: 0;}
519
  .stSidebar .block-container {padding-top: 0 !important;}
520
  .stSidebar [data-testid='stSidebarNav'] {margin-bottom: 0 !important;}
521
  .stSidebar [data-testid='stMarkdownContainer'] {margin-bottom: 0 !important; margin-top: 0 !important;}
522
  .stSidebar [data-testid='stVerticalBlock'] {gap: 0 !important;}
523
 
524
- /* Make checkbox rows more compact */
525
- .stCheckbox {margin-bottom: 0 !important; padding-bottom: 0 !important; padding-top: 0 !important;}
526
- .stExpander {margin-top: 0 !important; margin-bottom: 10px !important;}
 
 
 
 
527
 
528
- /* Reduce space between section headings and content */
529
  .stSidebar h1, .stSidebar h2, .stSidebar h3, .stSidebar h4, .stSidebar h5 {
530
- margin-top: 0 !important;
531
- margin-bottom: 0 !important;
532
- padding-top: 0 !important;
533
- padding-bottom: 0 !important;
534
  line-height: 1.2 !important;
535
  }
536
 
537
- /* Make selectbox and other inputs more compact */
538
- .stSidebar .stSelectbox, .stSidebar .stSlider, .stSidebar .stNumberInput {
539
- margin-bottom: 5px !important;
540
- padding-bottom: 0 !important;
541
- padding-top: 0 !important;
542
- }
 
 
 
 
 
 
 
 
543
 
544
- /* Reduce all form element margins */
545
- .stForm > div {margin-bottom: 5px !important;}
546
- .stSidebar label {margin-bottom: 0 !important; line-height: 1.2 !important;}
547
  </style>
548
  """, unsafe_allow_html=True)
549
 
550
- # Model options - more compact
551
- st.markdown("##### Model Settings", help="Configure model options")
552
  use_vision = st.checkbox("Use Vision Model", value=True,
553
- help="For image files, use the vision model for improved analysis (may be slower)")
554
 
555
- # Historical Context section with minimal spacing
556
- st.markdown("##### Historical Context", help="Add historical context information")
 
 
 
557
 
558
  # Historical period selector
559
  historical_periods = [
@@ -566,10 +577,10 @@ with st.sidebar:
566
  ]
567
 
568
  selected_period = st.selectbox(
569
- "Historical Period",
570
  options=historical_periods,
571
  index=0,
572
- help="Select the time period of the document for better OCR processing"
573
  )
574
 
575
  # Document purpose selector
@@ -585,13 +596,13 @@ with st.sidebar:
585
  ]
586
 
587
  selected_purpose = st.selectbox(
588
- "Document Purpose",
589
  options=document_purposes,
590
  index=0,
591
- help="Select the purpose or type of the document for better OCR processing"
592
  )
593
 
594
- # Custom prompt field
595
  custom_prompt_text = ""
596
  if selected_period != "Select period (if known)":
597
  custom_prompt_text += f"This is a {selected_period} document. "
@@ -599,59 +610,44 @@ with st.sidebar:
599
  if selected_purpose != "Select purpose (if known)":
600
  custom_prompt_text += f"It appears to be a {selected_purpose}. "
601
 
 
 
 
602
  custom_prompt = st.text_area(
603
- "Additional Context",
604
  value=custom_prompt_text,
605
- placeholder="Example: This document has unusual handwriting with cursive script. Please identify any mentioned locations and dates.",
606
- height=150,
607
  max_chars=500,
608
  key="custom_analysis_instructions",
609
- help="Powerful instructions field that impacts how the AI processes your document. Can request translations, format images correctly, extract specific information, or handle challenging documents. See the 'Additional Context Instructions & Examples' section below for more details."
610
  )
611
 
612
- # Enhanced instructions for Additional Context with more capabilities
613
- with st.expander("Prompting Instructions"):
614
  st.markdown("""
615
- ### How Additional Context Affects Processing
616
-
617
- The "Additional Context" field provides instructions directly to the AI to influence how it processes your document. Use it to:
618
-
619
- #### Document Understanding
620
- - **Specify handwriting styles**: "This document uses old-fashioned cursive with numerous flourishes and abbreviations"
621
- - **Identify language features**: "The text contains archaic spellings common in 18th century documents"
622
- - **Highlight focus areas**: "Look for mentions of financial transactions or dates of travel"
623
-
624
- #### Output Formatting & Languages
625
- - **Request translations**: "After extracting the text, translate the content into Spanish"
626
- - **Format image orientation**: "Ensure images are displayed in the same orientation as they appear in the document"
627
- - **Format tables**: "Convert any tables in the document to structured format with clear columns"
628
-
629
- #### Special Processing
630
- - **Handle challenges**: "Some portions may be faded; the page edges contain handwritten notes"
631
- - **Technical terms**: "This is a medical document with specialized terminology about surgical procedures"
632
- - **Organization**: "Separate the letter content from the address blocks and signature"
633
-
634
- #### Example Combinations
635
- ```
636
- This is a handwritten letter from the 1850s. The writer uses archaic spellings and formal language.
637
- Please preserve paragraph structure, identify any place names mentioned, and note any references
638
- to historical events. Format any lists as bullet points.
639
- ```
640
  """)
641
 
 
 
 
642
  # Image preprocessing options with reduced spacing
643
- st.markdown("##### Image Preprocessing", help="Options for enhancing images before OCR")
644
  with st.expander("Preprocessing Options", expanded=False):
645
  preprocessing_options = {}
646
 
647
- # Document type selector - important for optimized processing
648
  doc_type_options = ["standard", "handwritten", "typed", "printed"]
649
  preprocessing_options["document_type"] = st.selectbox(
650
  "Document Type",
651
  options=doc_type_options,
652
- index=0, # Default to standard
653
  format_func=lambda x: x.capitalize(),
654
- help="Select document type for optimized processing - choose 'Handwritten' for letters and manuscripts"
655
  )
656
 
657
  preprocessing_options["grayscale"] = st.checkbox("Convert to Grayscale",
@@ -671,26 +667,25 @@ with st.sidebar:
671
  help="Rotate the document to correct orientation"
672
  )
673
 
 
 
 
674
  # PDF options with consistent formatting
675
- st.markdown("##### PDF Options", help="Settings for PDF documents")
676
- with st.expander("PDF Settings", expanded=False):
677
- pdf_dpi = st.slider("PDF Resolution (DPI)", 72, 300, 100,
678
- help="Higher DPI gives better quality but slower processing. Try 100 for faster processing.")
679
- max_pages = st.number_input("Maximum Pages to Process", 1, 20, 3,
680
  help="Limit number of pages to process")
681
 
682
  # Add PDF rotation option
683
- rotation_options = [0, 90, 180, 270]
684
  pdf_rotation = st.select_slider(
685
- "Rotate PDF",
686
  options=rotation_options,
687
  value=0,
688
- format_func=lambda x: f"{x}° {'(No rotation)' if x == 0 else ''}",
689
- help="Rotate the PDF pages to correct orientation"
690
  )
691
-
692
- # Store PDF rotation separately instead of in preprocessing_options
693
- # This prevents conflict with image preprocessing
694
 
695
  # Previous Results tab content
696
  with main_tab2:
@@ -1035,8 +1030,8 @@ with main_tab1:
1035
  # Create a container for the preview to better control layout
1036
  with st.container():
1037
  processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
1038
- # Use use_column_width=True for responsive design
1039
- st.image(io.BytesIO(processed_bytes), use_column_width=True)
1040
 
1041
  # Show preprocessing metadata in a well-formatted caption
1042
  meta_items = []
 
511
  # Options title with reduced top margin
512
  st.markdown("<h2 style='margin-top:-25px; margin-bottom:5px; padding:0;'>Options</h2>", unsafe_allow_html=True)
513
 
514
+ # Comprehensive CSS for optimal sidebar spacing and layout
515
  st.markdown("""
516
  <style>
517
+ /* Core sidebar spacing fixes */
518
  .block-container {padding-top: 0;}
519
  .stSidebar .block-container {padding-top: 0 !important;}
520
  .stSidebar [data-testid='stSidebarNav'] {margin-bottom: 0 !important;}
521
  .stSidebar [data-testid='stMarkdownContainer'] {margin-bottom: 0 !important; margin-top: 0 !important;}
522
  .stSidebar [data-testid='stVerticalBlock'] {gap: 0 !important;}
523
 
524
+ /* Input element optimization */
525
+ .stSidebar .stCheckbox {margin: 0 !important; padding: 0 !important;}
526
+ .stSidebar .stSelectbox {margin: 0 0 3px !important; padding: 0 !important;}
527
+ .stSidebar .stSlider {margin: 0 0 5px !important; padding: 0 !important;}
528
+ .stSidebar .stNumberInput {margin: 0 0 5px !important; padding: 0 !important;}
529
+ .stSidebar .stTextArea {margin: 0 0 5px !important; padding: 0 !important;}
530
+ .stSidebar .stTextInput {margin: 0 0 5px !important; padding: 0 !important;}
531
 
532
+ /* Heading and label optimization */
533
  .stSidebar h1, .stSidebar h2, .stSidebar h3, .stSidebar h4, .stSidebar h5 {
534
+ margin: 2px 0 !important;
535
+ padding: 0 !important;
 
 
536
  line-height: 1.2 !important;
537
  }
538
 
539
+ /* Label text optimization */
540
+ .stSidebar label {margin: 0 !important; line-height: 1.2 !important;}
541
+ .stSidebar .stTextArea label, .stSidebar .stSelectbox label {margin-top: 2px !important;}
542
+
543
+ /* Help text optimization */
544
+ .stSidebar .stTooltipIcon {margin: 0 !important; height: 1em !important;}
545
+
546
+ /* Slider optimization */
547
+ .stSidebar [data-baseweb="slider"] {margin: 10px 0 0 !important;}
548
+
549
+ /* Expander optimization */
550
+ .stSidebar .stExpander {margin: 0 0 8px !important;}
551
+ .stSidebar .streamlit-expanderHeader {font-size: 0.9em !important;}
552
+ .stSidebar .streamlit-expanderContent {padding-top: 5px !important;}
553
 
554
+ /* Remove unnecessary margins in form elements */
555
+ .stSidebar .stForm > div {margin: 0 !important;}
 
556
  </style>
557
  """, unsafe_allow_html=True)
558
 
559
+ # Model options
 
560
  use_vision = st.checkbox("Use Vision Model", value=True,
561
+ help="Use vision model for improved analysis (may be slower)")
562
 
563
+ # Add spacing between sections
564
+ st.markdown("<div style='margin: 10px 0;'></div>", unsafe_allow_html=True)
565
+
566
+ # Document Context section
567
+ st.markdown("##### Document Context", help="Add context information")
568
 
569
  # Historical period selector
570
  historical_periods = [
 
577
  ]
578
 
579
  selected_period = st.selectbox(
580
+ "Time Period",
581
  options=historical_periods,
582
  index=0,
583
+ help="Select the time period of the document"
584
  )
585
 
586
  # Document purpose selector
 
596
  ]
597
 
598
  selected_purpose = st.selectbox(
599
+ "Document Type",
600
  options=document_purposes,
601
  index=0,
602
+ help="Select the purpose or type of the document"
603
  )
604
 
605
+ # Dynamic custom prompt field
606
  custom_prompt_text = ""
607
  if selected_period != "Select period (if known)":
608
  custom_prompt_text += f"This is a {selected_period} document. "
 
610
  if selected_purpose != "Select purpose (if known)":
611
  custom_prompt_text += f"It appears to be a {selected_purpose}. "
612
 
613
+ # Add spacing between sections
614
+ st.markdown("<div style='margin: 10px 0;'></div>", unsafe_allow_html=True)
615
+
616
  custom_prompt = st.text_area(
617
+ "Special Instructions",
618
  value=custom_prompt_text,
619
+ placeholder="Example: Document has unusual cursive handwriting.",
620
+ height=90,
621
  max_chars=500,
622
  key="custom_analysis_instructions",
623
+ help="Specify document features or extraction needs"
624
  )
625
 
626
+ # Compact instructions expander
627
+ with st.expander("Instruction Examples"):
628
  st.markdown("""
629
+ - "Has faded text in corners"
630
+ - "Extract dates and locations"
631
+ - "Translate text to English"
632
+ - "Preserve tabular format"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
  """)
634
 
635
+ # Add spacing between sections
636
+ st.markdown("<div style='margin: 10px 0;'></div>", unsafe_allow_html=True)
637
+
638
  # Image preprocessing options with reduced spacing
639
+ st.markdown("##### Image Processing", help="Options for enhancing images")
640
  with st.expander("Preprocessing Options", expanded=False):
641
  preprocessing_options = {}
642
 
643
+ # Document type selector
644
  doc_type_options = ["standard", "handwritten", "typed", "printed"]
645
  preprocessing_options["document_type"] = st.selectbox(
646
  "Document Type",
647
  options=doc_type_options,
648
+ index=0,
649
  format_func=lambda x: x.capitalize(),
650
+ help="Select document type for optimized processing"
651
  )
652
 
653
  preprocessing_options["grayscale"] = st.checkbox("Convert to Grayscale",
 
667
  help="Rotate the document to correct orientation"
668
  )
669
 
670
+ # Add spacing between sections
671
+ st.markdown("<div style='margin: 10px 0;'></div>", unsafe_allow_html=True)
672
+
673
  # PDF options with consistent formatting
674
+ st.markdown("##### PDF Settings", help="Options for PDF documents")
675
+ with st.expander("PDF Options", expanded=False):
676
+ pdf_dpi = st.slider("Resolution (DPI)", 72, 300, 100,
677
+ help="Higher DPI = better quality but slower")
678
+ max_pages = st.number_input("Max Pages", 1, 20, 3,
679
  help="Limit number of pages to process")
680
 
681
  # Add PDF rotation option
 
682
  pdf_rotation = st.select_slider(
683
+ "Rotation",
684
  options=rotation_options,
685
  value=0,
686
+ format_func=lambda x: f"{x}°",
687
+ help="Rotate PDF pages"
688
  )
 
 
 
689
 
690
  # Previous Results tab content
691
  with main_tab2:
 
1030
  # Create a container for the preview to better control layout
1031
  with st.container():
1032
  processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
1033
+ # Use use_container_width=True for responsive design
1034
+ st.image(io.BytesIO(processed_bytes), use_container_width=True)
1035
 
1036
  # Show preprocessing metadata in a well-formatted caption
1037
  meta_items = []
ui/custom.css CHANGED
@@ -74,6 +74,7 @@
74
  /* Fix for image preprocessing preview */
75
  .stExpander {
76
  overflow: hidden !important;
 
77
  }
78
 
79
  .stExpander img {
@@ -85,6 +86,8 @@
85
  /* Additional fixes for image preprocessing preview in expanders */
86
  .streamlit-expanderContent {
87
  overflow: hidden !important;
 
 
88
  }
89
 
90
  .streamlit-expanderContent img {
@@ -93,6 +96,41 @@
93
  object-fit: contain !important;
94
  }
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  /* Metadata container styling */
97
  .metadata-container {
98
  background-color: #f8f9fa;
 
74
  /* Fix for image preprocessing preview */
75
  .stExpander {
76
  overflow: hidden !important;
77
+ margin-bottom: 10px !important;
78
  }
79
 
80
  .stExpander img {
 
86
  /* Additional fixes for image preprocessing preview in expanders */
87
  .streamlit-expanderContent {
88
  overflow: hidden !important;
89
+ padding-top: 5px !important;
90
+ padding-bottom: 5px !important;
91
  }
92
 
93
  .streamlit-expanderContent img {
 
96
  object-fit: contain !important;
97
  }
98
 
99
+ /* Compact sidebar expanders */
100
+ .stSidebar .stExpander {
101
+ margin-top: 0 !important;
102
+ margin-bottom: 8px !important;
103
+ }
104
+
105
+ .stSidebar .streamlit-expanderHeader {
106
+ font-size: 0.9em !important;
107
+ padding: 5px !important;
108
+ }
109
+
110
+ .stSidebar .streamlit-expanderContent {
111
+ padding: 5px !important;
112
+ }
113
+
114
+ /* Better sidebar section spacing */
115
+ .stSidebar h1, .stSidebar h2, .stSidebar h3, .stSidebar h4, .stSidebar h5 {
116
+ margin-top: 15px !important;
117
+ margin-bottom: 5px !important;
118
+ padding-top: 0 !important;
119
+ padding-bottom: 3px !important;
120
+ line-height: 1.2 !important;
121
+ font-weight: 600 !important;
122
+ }
123
+
124
+ /* First heading in sidebar doesn't need top margin */
125
+ .stSidebar [data-testid="stVerticalBlock"] > div:first-child h5 {
126
+ margin-top: 0 !important;
127
+ }
128
+
129
+ /* Optimize sidebar checkbox positioning */
130
+ .stSidebar .stCheckbox > div > div {
131
+ margin-bottom: 3px !important;
132
+ }
133
+
134
  /* Metadata container styling */
135
  .metadata-container {
136
  background-color: #f8f9fa;