milwright commited on
Commit
8d3bfba
·
1 Parent(s): d31328c

Update app.py, structured_ocr.py, and UI layout

Browse files
Files changed (3) hide show
  1. app.py +4 -8
  2. structured_ocr.py +4 -4
  3. ui/layout.py +19 -164
app.py CHANGED
@@ -531,7 +531,7 @@ if 'last_processed_file' not in st.session_state:
531
  if 'perform_reset' not in st.session_state:
532
  st.session_state.perform_reset = False
533
 
534
- # Check if we need to perform a complete reset (coming from "Close Document" button)
535
  if 'perform_reset' in st.session_state and st.session_state.perform_reset:
536
  # List of all session state keys that should be reset, except previous_results
537
  reset_keys = [key for key in list(st.session_state.keys())
@@ -2290,7 +2290,7 @@ with main_tab1:
2290
  st.session_state.temp_file_paths = []
2291
 
2292
  # Create the close button with a callback
2293
- st.button(" Close Document",
2294
  key="close_document_button",
2295
  help="Clear current document and start over",
2296
  on_click=clear_document_state)
@@ -2519,22 +2519,18 @@ with main_tab1:
2519
  # Sample document URLs dropdown with clearer label
2520
  sample_urls = [
2521
  "Select a sample document",
2522
- "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/a-la-carte.pdf",
2523
  "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/magician-or-bottle-cungerer.jpg",
2524
  "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/handwritten-letter.jpg",
2525
  "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/magellan-travels.jpg",
2526
- "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/milgram-flier.png",
2527
- "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/baldwin-15st-north.jpg"
2528
  ]
2529
 
2530
  sample_names = [
2531
  "Select a sample document",
2532
- "Restaurant Menu (PDF)",
2533
  "The Magician (Image)",
2534
  "Handwritten Letter (Image)",
2535
  "Magellan Travels (Image)",
2536
- "Milgram Flier (Image)",
2537
- "Baldwin Street (Image)"
2538
  ]
2539
 
2540
  # Initialize sample_document in session state if it doesn't exist
 
531
  if 'perform_reset' not in st.session_state:
532
  st.session_state.perform_reset = False
533
 
534
+ # Check if we need to perform a complete reset (coming from "X Close" button)
535
  if 'perform_reset' in st.session_state and st.session_state.perform_reset:
536
  # List of all session state keys that should be reset, except previous_results
537
  reset_keys = [key for key in list(st.session_state.keys())
 
2290
  st.session_state.temp_file_paths = []
2291
 
2292
  # Create the close button with a callback
2293
+ st.button("X Close",
2294
  key="close_document_button",
2295
  help="Clear current document and start over",
2296
  on_click=clear_document_state)
 
2519
  # Sample document URLs dropdown with clearer label
2520
  sample_urls = [
2521
  "Select a sample document",
 
2522
  "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/magician-or-bottle-cungerer.jpg",
2523
  "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/handwritten-letter.jpg",
2524
  "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/magellan-travels.jpg",
2525
+ "https://huggingface.co/spaces/milwright/historical-ocr/resolve/main/input/milgram-flier.png"
 
2526
  ]
2527
 
2528
  sample_names = [
2529
  "Select a sample document",
 
2530
  "The Magician (Image)",
2531
  "Handwritten Letter (Image)",
2532
  "Magellan Travels (Image)",
2533
+ "Milgram Flier (Image)"
 
2534
  ]
2535
 
2536
  # Initialize sample_document in session state if it doesn't exist
structured_ocr.py CHANGED
@@ -469,9 +469,9 @@ class StructuredOCR:
469
  try:
470
  page_result = self._process_image(Path(tmp.name), False, None)
471
  if 'ocr_contents' in page_result and 'raw_text' in page_result['ocr_contents']:
472
- # Add page text to combined text
473
  page_text = page_result['ocr_contents']['raw_text']
474
- combined_text.append(f"--- PAGE {page_num} ---\n{page_text}")
475
  except Exception as page_e:
476
  logger.warning(f"Error processing page {page_num}: {str(page_e)}")
477
  # Clean up temp file
@@ -729,9 +729,9 @@ class StructuredOCR:
729
  page_num = idx + 1
730
 
731
  page_markdown = page.markdown if hasattr(page, 'markdown') else ""
732
- # Add page header if content exists
733
  if page_markdown.strip():
734
- all_markdown.append(f"--- PAGE {page_num} ---\n{page_markdown}")
735
 
736
  # Join all pages with separation
737
  combined_markdown = "\n\n".join(all_markdown)
 
469
  try:
470
  page_result = self._process_image(Path(tmp.name), False, None)
471
  if 'ocr_contents' in page_result and 'raw_text' in page_result['ocr_contents']:
472
+ # Add page text to combined text without obvious page markers
473
  page_text = page_result['ocr_contents']['raw_text']
474
+ combined_text.append(f"{page_text}")
475
  except Exception as page_e:
476
  logger.warning(f"Error processing page {page_num}: {str(page_e)}")
477
  # Clean up temp file
 
729
  page_num = idx + 1
730
 
731
  page_markdown = page.markdown if hasattr(page, 'markdown') else ""
732
+ # Add page content without obvious page markers
733
  if page_markdown.strip():
734
+ all_markdown.append(f"{page_markdown}")
735
 
736
  # Join all pages with separation
737
  combined_markdown = "\n\n".join(all_markdown)
ui/layout.py CHANGED
@@ -1,172 +1,27 @@
 
 
 
 
 
1
  import streamlit as st
2
  from pathlib import Path
3
- import os
4
 
5
- # Load custom CSS
6
  def load_css():
7
- css_file = Path(__file__).parent / "custom.css"
8
- if css_file.exists():
9
- with open(css_file) as f:
10
- st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
11
- else:
12
- st.warning("Custom CSS file not found. Some styles may be missing.")
13
-
14
- # Header component
15
- def header():
16
- st.markdown("""
17
- <div class="main-header">
18
- <h1 class="title-text">Historical OCR Workshop</h1>
19
- </div>
20
- """, unsafe_allow_html=True)
21
-
22
- # Create a page wrapper similar to the React component
23
- def page_wrapper(content_function, current_module=1):
24
- """
25
- Creates a consistent page layout with navigation
26
- Args:
27
- content_function: Function that renders the page content
28
- current_module: Current module number (1-6)
29
- """
30
- # Load custom CSS
31
- load_css()
32
-
33
- # Display header
34
- header()
35
 
36
- # Ensure session state for navigation
37
- if 'current_module' not in st.session_state:
38
- st.session_state.current_module = current_module
39
 
40
- # Main content area with bottom padding for the nav
41
- st.markdown('<div class="main-content">', unsafe_allow_html=True)
 
 
42
 
43
- # Call the content function to render the module content
44
- content_function()
45
-
46
- # Add spacer for fixed nav
47
- st.markdown('<div class="footer-spacer"></div>', unsafe_allow_html=True)
48
-
49
- # Navigation
50
- render_navigation(current_module)
51
-
52
- st.markdown('</div>', unsafe_allow_html=True)
53
-
54
- # Navigation component
55
- def render_navigation(current_module):
56
- # Define modules names like in React
57
- modules = ['Introduction', 'Historical Context', 'Methodology', 'Case Studies', 'Interactive OCR', 'Conclusion']
58
-
59
- # Navigation container
60
- st.markdown(f"""
61
- <div class="nav-container">
62
- <div class="nav-buttons">
63
- {prev_button_html(current_module, modules)}
64
- {next_button_html(current_module, modules)}
65
- </div>
66
-
67
- <div class="nav-dots">
68
- {nav_dots_html(current_module, modules)}
69
- </div>
70
- </div>
71
- """, unsafe_allow_html=True)
72
-
73
- # Previous button HTML
74
- def prev_button_html(current_module, modules):
75
- if current_module > 1:
76
- prev_module = current_module - 1
77
- return f"""
78
- <button class="prev-button"
79
- onclick="document.getElementById('nav_prev_{prev_module}').click()"
80
- aria-label="Go to previous module: {modules[prev_module-1]}">
81
- ← Previous
82
- </button>
83
- """
84
- return ""
85
-
86
- # Next button HTML
87
- def next_button_html(current_module, modules):
88
- if current_module < len(modules):
89
- next_module = current_module + 1
90
- return f"""
91
- <button class="next-button"
92
- onclick="document.getElementById('nav_next_{next_module}').click()"
93
- aria-label="Go to next module: {modules[next_module-1]}">
94
- Next →
95
- </button>
96
- """
97
- return ""
98
-
99
- # Navigation dots HTML
100
- def nav_dots_html(current_module, modules):
101
- dots_html = ""
102
- for i, name in enumerate(modules, 1):
103
- active_class = "active" if i == current_module else ""
104
- dots_html += f"""
105
- <a class="nav-dot {active_class}"
106
- onclick="document.getElementById('nav_dot_{i}').click()"
107
- aria-current="{i == current_module}"
108
- aria-label="Go to module {i}: {name}">
109
- {i}
110
- </a>
111
- """
112
- return dots_html
113
-
114
- # Helper functions for container styles
115
- def gray_container(content, padding="1.5rem"):
116
- """Renders content in a gray container with consistent styling"""
117
- st.markdown(f'<div class="content-container" style="padding:{padding};">{content}</div>', unsafe_allow_html=True)
118
-
119
- def blue_container(content, padding="1.5rem"):
120
- """Renders content in a blue container with consistent styling"""
121
- st.markdown(f'<div class="blue-container" style="padding:{padding};">{content}</div>', unsafe_allow_html=True)
122
-
123
- def yellow_container(content, padding="1.5rem"):
124
- """Renders content in a yellow container with consistent styling"""
125
- st.markdown(f'<div class="yellow-container" style="padding:{padding};">{content}</div>', unsafe_allow_html=True)
126
-
127
- def card_grid(cards):
128
- """
129
- Renders a responsive grid of cards
130
- Args:
131
- cards: List of HTML strings for each card
132
- """
133
- grid_html = '<div class="card-grid">'
134
- for card in cards:
135
- grid_html += f'<div class="card">{card}</div>'
136
- grid_html += '</div>'
137
 
138
- st.markdown(grid_html, unsafe_allow_html=True)
139
-
140
- def module_card(number, title, description):
141
- """Creates a styled module card"""
142
- return f"""
143
- <div class="module-card">
144
- <div class="module-number">Module {number}</div>
145
- <div class="module-title">{title}</div>
146
- <p>{description}</p>
147
- </div>
148
- """
149
-
150
- def key_concept(content):
151
- """Renders a key concept box"""
152
- st.markdown(f'<div class="key-concept">{content}</div>', unsafe_allow_html=True)
153
-
154
- def research_question(content):
155
- """Renders a research question box"""
156
- st.markdown(f'<div class="research-question">{content}</div>', unsafe_allow_html=True)
157
-
158
- def quote(content, author=""):
159
- """Renders a quote with optional author"""
160
- quote_html = f'<div class="quote-container">{content}'
161
- if author:
162
- quote_html += f'<br/><br/><span style="font-size:0.9rem; text-align:right; display:block;">— {author}</span>'
163
- quote_html += '</div>'
164
- st.markdown(quote_html, unsafe_allow_html=True)
165
-
166
- def tool_container(content):
167
- """Renders content in a tool container"""
168
- st.markdown(f'<div class="tool-container">{content}</div>', unsafe_allow_html=True)
169
-
170
- def upload_container(content):
171
- """Renders content in an upload container"""
172
- st.markdown(f'<div class="upload-container">{content}</div>', unsafe_allow_html=True)
 
1
+ """
2
+ UI layout components for the OCR application.
3
+ """
4
+
5
+ import os
6
  import streamlit as st
7
  from pathlib import Path
 
8
 
 
9
  def load_css():
10
+ """Load custom CSS for the application."""
11
+ # Get the directory of the current file
12
+ current_dir = Path(os.path.dirname(os.path.abspath(__file__)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Path to the CSS file
15
+ css_file = current_dir / "custom.css"
 
16
 
17
+ # Check if the file exists
18
+ if not css_file.exists():
19
+ st.warning(f"Custom CSS file not found at {css_file}")
20
+ return
21
 
22
+ # Read the CSS content
23
+ with open(css_file) as f:
24
+ css_content = f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Apply the CSS
27
+ st.markdown(f"<style>{css_content}</style>", unsafe_allow_html=True)