Spaces:
Running
Running
initial commit
Browse files- README.md +64 -7
- app.py +158 -0
- models/braille_translator.py +166 -0
- models/document_ai.py +74 -0
- models/text_processor.py +180 -0
- requirements.txt +13 -0
- scripts/download_model.py +50 -0
- tests/test_braille.py +107 -0
- tests/test_ocr.py +104 -0
- utils/__init__.py +0 -0
- utils/braille_display.py +115 -0
- utils/image_preprocessing.py +39 -0
- utils/pdf_generator.py +198 -0
README.md
CHANGED
@@ -1,14 +1,71 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.29.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: Generate Food menu in Braille
|
12 |
---
|
|
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: BrailleMenuGen
|
3 |
+
emoji: 🦀
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.29.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
10 |
---
|
11 |
+
# Menu to Braille Converter
|
12 |
|
13 |
+
An AI-powered application that converts food menu images to Braille text for visually impaired users.
|
14 |
+
|
15 |
+
## Features
|
16 |
+
|
17 |
+
- Upload menu images
|
18 |
+
- Extract text using AI-powered document understanding (LayoutLMv2)
|
19 |
+
- Process and structure menu text using LLMs
|
20 |
+
- Convert text to Braille
|
21 |
+
- Display Braille in multiple formats (text, visual, side-by-side)
|
22 |
+
- Download as PDF in different formats
|
23 |
+
|
24 |
+
## Deployment on Hugging Face Spaces
|
25 |
+
|
26 |
+
### Option 1: Direct GitHub Repository Deployment
|
27 |
+
|
28 |
+
1. Fork this repository to your GitHub account
|
29 |
+
2. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
30 |
+
3. Click "Create new Space"
|
31 |
+
4. Choose "Streamlit" as the SDK
|
32 |
+
5. Connect your GitHub account and select this repository
|
33 |
+
6. Choose hardware requirements (recommend at least GPU for better performance)
|
34 |
+
7. Click "Create Space"
|
35 |
+
|
36 |
+
### Option 2: Manual Deployment
|
37 |
+
|
38 |
+
1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
39 |
+
2. Click "Create new Space"
|
40 |
+
3. Choose "Streamlit" as the SDK
|
41 |
+
4. Give your Space a name
|
42 |
+
5. Choose hardware requirements (recommend at least GPU for better performance)
|
43 |
+
6. Click "Create Space"
|
44 |
+
7. Clone the Space repository locally
|
45 |
+
8. Copy all files from this project to the cloned repository
|
46 |
+
9. Push the changes to the Space repository
|
47 |
+
|
48 |
+
## Hardware Requirements
|
49 |
+
|
50 |
+
- **Minimum**: CPU (2 vCPUs, 16 GB RAM)
|
51 |
+
- **Recommended**: GPU (T4 or better)
|
52 |
+
|
53 |
+
## Models Used
|
54 |
+
|
55 |
+
- **Document AI**: microsoft/layoutlmv2-base-uncased
|
56 |
+
- **Text Processing**: meta-llama/Meta-Llama-3-8B-Instruct (with fallback to mistralai/Mistral-7B-Instruct-v0.2)
|
57 |
+
- **Context Enhancement**: facebook/bart-large-cnn
|
58 |
+
|
59 |
+
## Local Development
|
60 |
+
|
61 |
+
1. Clone this repository
|
62 |
+
2. Install dependencies: `pip install -r requirements.txt`
|
63 |
+
3. Run the application: `streamlit run app.py`
|
64 |
+
|
65 |
+
|
66 |
+
## Future Enhancements
|
67 |
+
|
68 |
+
- Improved menu section recognition
|
69 |
+
- Support for multiple languages
|
70 |
+
- Physical Braille printer integration
|
71 |
+
- Mobile app version
|
app.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image
|
4 |
+
import io
|
5 |
+
import base64
|
6 |
+
|
7 |
+
# Import our custom modules
|
8 |
+
from utils.image_preprocessing import preprocess_image
|
9 |
+
from models.document_ai import extract_text_and_layout
|
10 |
+
from models.text_processor import process_menu_text
|
11 |
+
from models.braille_translator import text_to_braille, get_braille_metadata
|
12 |
+
from utils.pdf_generator import create_braille_pdf, create_braille_pdf_with_comparison
|
13 |
+
|
14 |
+
# Function to create a download link for a PDF
|
15 |
+
def generate_pdf(original_text, braille_text, title, comparison=False):
|
16 |
+
"""Generate a PDF file with Braille content."""
|
17 |
+
if comparison:
|
18 |
+
pdf_buffer = create_braille_pdf_with_comparison(original_text, braille_text, title)
|
19 |
+
else:
|
20 |
+
pdf_buffer = create_braille_pdf(original_text, braille_text, title)
|
21 |
+
|
22 |
+
return pdf_buffer
|
23 |
+
|
24 |
+
def process_image(image, use_llm, use_context):
|
25 |
+
"""Process the uploaded image and generate results."""
|
26 |
+
if image is None:
|
27 |
+
return "Please upload an image first.", "", "", None
|
28 |
+
|
29 |
+
# Convert to PIL Image if needed
|
30 |
+
if isinstance(image, np.ndarray):
|
31 |
+
image = Image.fromarray(image)
|
32 |
+
|
33 |
+
# Preprocess the image
|
34 |
+
preprocessed_img = preprocess_image(image)
|
35 |
+
|
36 |
+
# Extract text using document AI
|
37 |
+
try:
|
38 |
+
result = extract_text_and_layout(preprocessed_img)
|
39 |
+
|
40 |
+
if not result.get('words', []):
|
41 |
+
return "No text was extracted from the image.", "", "", None
|
42 |
+
|
43 |
+
raw_text = ' '.join(result['words'])
|
44 |
+
|
45 |
+
# Process text with LLM if enabled
|
46 |
+
if use_llm:
|
47 |
+
processed_result = process_menu_text(raw_text)
|
48 |
+
|
49 |
+
if processed_result['success']:
|
50 |
+
processed_text = processed_result['structured_text']
|
51 |
+
else:
|
52 |
+
processed_text = raw_text
|
53 |
+
else:
|
54 |
+
processed_text = raw_text
|
55 |
+
|
56 |
+
# Translate to Braille
|
57 |
+
braille_result = text_to_braille(processed_text, use_context=use_context)
|
58 |
+
|
59 |
+
if not braille_result['success']:
|
60 |
+
return processed_text, "", "Braille translation failed.", None
|
61 |
+
|
62 |
+
braille_text = braille_result['formatted_braille']
|
63 |
+
|
64 |
+
# Generate metadata
|
65 |
+
metadata = get_braille_metadata(processed_text)
|
66 |
+
metadata_text = f"Translation contains {metadata['word_count']} words, {metadata['character_count']} characters, {metadata['line_count']} lines."
|
67 |
+
|
68 |
+
# Return results
|
69 |
+
return processed_text, braille_text, metadata_text, (processed_text, braille_text)
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
return f"Error processing image: {str(e)}", "", "", None
|
73 |
+
|
74 |
+
def create_pdf(state, pdf_title, pdf_type):
|
75 |
+
"""Create a PDF file for download."""
|
76 |
+
if state is None or len(state) != 2:
|
77 |
+
return None
|
78 |
+
|
79 |
+
original_text, braille_text = state
|
80 |
+
comparison = (pdf_type == "Side-by-Side Comparison")
|
81 |
+
|
82 |
+
pdf_buffer = generate_pdf(original_text, braille_text, pdf_title, comparison)
|
83 |
+
|
84 |
+
# Return the file for download
|
85 |
+
return pdf_buffer
|
86 |
+
|
87 |
+
# Create the Gradio interface
|
88 |
+
with gr.Blocks(title="Menu to Braille Converter") as demo:
|
89 |
+
gr.Markdown("# Menu to Braille Converter")
|
90 |
+
gr.Markdown("Upload a menu image to convert it to Braille text")
|
91 |
+
|
92 |
+
with gr.Row():
|
93 |
+
with gr.Column(scale=1):
|
94 |
+
# Input components
|
95 |
+
image_input = gr.Image(type="pil", label="Upload Menu Image")
|
96 |
+
|
97 |
+
with gr.Row():
|
98 |
+
use_llm = gr.Checkbox(label="Use AI for text processing", value=True)
|
99 |
+
use_context = gr.Checkbox(label="Use AI for context enhancement", value=True)
|
100 |
+
|
101 |
+
process_button = gr.Button("Process Menu")
|
102 |
+
|
103 |
+
with gr.Column(scale=2):
|
104 |
+
# Output components
|
105 |
+
processed_text = gr.Textbox(label="Processed Text", lines=8)
|
106 |
+
braille_output = gr.Textbox(label="Braille Translation", lines=10)
|
107 |
+
metadata_output = gr.Markdown()
|
108 |
+
|
109 |
+
# Hidden state for PDF generation
|
110 |
+
state = gr.State()
|
111 |
+
|
112 |
+
# PDF download section
|
113 |
+
with gr.Group():
|
114 |
+
gr.Markdown("### Download Options")
|
115 |
+
pdf_title = gr.Textbox(label="PDF Title", value="Menu in Braille")
|
116 |
+
pdf_type = gr.Radio(
|
117 |
+
["Sequential (Text then Braille)", "Side-by-Side Comparison"],
|
118 |
+
label="PDF Format",
|
119 |
+
value="Sequential (Text then Braille)"
|
120 |
+
)
|
121 |
+
pdf_button = gr.Button("Generate PDF")
|
122 |
+
pdf_output = gr.File(label="Download PDF")
|
123 |
+
|
124 |
+
# Set up event handlers
|
125 |
+
process_button.click(
|
126 |
+
process_image,
|
127 |
+
inputs=[image_input, use_llm, use_context],
|
128 |
+
outputs=[processed_text, braille_output, metadata_output, state]
|
129 |
+
)
|
130 |
+
|
131 |
+
pdf_button.click(
|
132 |
+
create_pdf,
|
133 |
+
inputs=[state, pdf_title, pdf_type],
|
134 |
+
outputs=[pdf_output]
|
135 |
+
)
|
136 |
+
|
137 |
+
# Add examples
|
138 |
+
gr.Examples(
|
139 |
+
examples=["assets/sample_menus/menu1.jpg", "assets/sample_menus/menu2.jpg"],
|
140 |
+
inputs=image_input
|
141 |
+
)
|
142 |
+
|
143 |
+
# Add about section
|
144 |
+
with gr.Accordion("About", open=False):
|
145 |
+
gr.Markdown("""
|
146 |
+
This application converts menu images to Braille text using AI technologies:
|
147 |
+
|
148 |
+
- Document AI for text extraction
|
149 |
+
- LLMs for text processing and enhancement
|
150 |
+
- Braille translation with formatting
|
151 |
+
- PDF generation for download
|
152 |
+
|
153 |
+
Created as a demonstration of AI-powered accessibility tools.
|
154 |
+
""")
|
155 |
+
|
156 |
+
# Launch the app
|
157 |
+
if __name__ == "__main__":
|
158 |
+
demo.launch()
|
models/braille_translator.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
import re
|
3 |
+
|
4 |
+
# English to Braille mapping (Grade 1 Braille) #
|
5 |
+
BRAILLE_MAP = {
|
6 |
+
'a': '⠁', 'b': '⠃', 'c': '⠉', 'd': '⠙', 'e': '⠑', 'f': '⠋', 'g': '⠛', 'h': '⠓', 'i': '⠊', 'j': '⠚',
|
7 |
+
'k': '⠅', 'l': '⠇', 'm': '⠍', 'n': '⠝', 'o': '⠕', 'p': '⠏', 'q': '⠟', 'r': '⠗', 's': '⠎', 't': '⠞',
|
8 |
+
'u': '⠥', 'v': '⠧', 'w': '⠺', 'x': '⠭', 'y': '⠽', 'z': '⠵',
|
9 |
+
'0': '⠚', '1': '⠁', '2': '⠃', '3': '⠉', '4': '⠙', '5': '⠑', '6': '⠋', '7': '⠛', '8': '⠓', '9': '⠊',
|
10 |
+
'.': '⠲', ',': '⠂', ';': '⠆', ':': '⠒', '!': '⠖', '?': '⠦', '"': '⠦', "'": '⠄', '(': '⠐⠣', ')': '⠐⠜',
|
11 |
+
'-': '⠤', '/': '⠌', '+': '⠬', '=': '⠐⠶', '*': '⠐⠔', '&': '⠯', '%': '⠐⠏', '#': '⠼', '@': '⠐⠁',
|
12 |
+
'$': '⠐⠎', '€': '⠐⠑', '£': '⠐⠇', '¥': '⠐⠽', '₹': '⠐⠗',
|
13 |
+
' ': '⠀'
|
14 |
+
}
|
15 |
+
|
16 |
+
# Initialize the summarization pipeline for context understanding
|
17 |
+
summarizer = None
|
18 |
+
|
19 |
+
def get_summarizer():
|
20 |
+
"""Get or initialize the summarization model."""
|
21 |
+
global summarizer
|
22 |
+
if summarizer is None:
|
23 |
+
try:
|
24 |
+
# Use a small, efficient model for summarization
|
25 |
+
summarizer = pipeline(
|
26 |
+
"summarization",
|
27 |
+
model="facebook/bart-large-cnn",
|
28 |
+
max_length=100,
|
29 |
+
min_length=30,
|
30 |
+
truncation=True
|
31 |
+
)
|
32 |
+
except Exception as e:
|
33 |
+
print(f"Error loading summarizer: {str(e)}")
|
34 |
+
return summarizer
|
35 |
+
|
36 |
+
def text_to_grade1_braille(text):
|
37 |
+
"""
|
38 |
+
Convert text to Grade 1 Braille.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
text: Text to convert
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
Braille text
|
45 |
+
"""
|
46 |
+
braille_text = ""
|
47 |
+
for char in text.lower():
|
48 |
+
if char in BRAILLE_MAP:
|
49 |
+
braille_text += BRAILLE_MAP[char]
|
50 |
+
else:
|
51 |
+
# For characters not in our map, just keep the original
|
52 |
+
braille_text += char
|
53 |
+
|
54 |
+
return braille_text
|
55 |
+
|
56 |
+
def text_to_braille(text, use_context=True):
|
57 |
+
"""
|
58 |
+
Convert text to Braille, with optional context enhancement.
|
59 |
+
|
60 |
+
Args:
|
61 |
+
text: Text to convert to Braille
|
62 |
+
use_context: Whether to use AI to enhance context understanding
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
Dictionary with Braille text and metadata
|
66 |
+
"""
|
67 |
+
try:
|
68 |
+
# Basic Braille translation
|
69 |
+
braille_text = text_to_grade1_braille(text)
|
70 |
+
|
71 |
+
# If context enhancement is enabled
|
72 |
+
context_summary = None
|
73 |
+
if use_context and len(text) > 200: # Only for longer texts
|
74 |
+
summarizer = get_summarizer()
|
75 |
+
if summarizer:
|
76 |
+
try:
|
77 |
+
# Generate a summary to understand context
|
78 |
+
summary_result = summarizer(text)
|
79 |
+
if summary_result and len(summary_result) > 0:
|
80 |
+
context_summary = summary_result[0]['summary_text']
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Summarization error: {str(e)}")
|
83 |
+
|
84 |
+
# Format the Braille text for better readability
|
85 |
+
formatted_braille = format_braille_text(braille_text)
|
86 |
+
|
87 |
+
return {
|
88 |
+
'braille_text': braille_text,
|
89 |
+
'formatted_braille': formatted_braille,
|
90 |
+
'context_summary': context_summary,
|
91 |
+
'success': True
|
92 |
+
}
|
93 |
+
except Exception as e:
|
94 |
+
return {
|
95 |
+
'braille_text': '',
|
96 |
+
'error': str(e),
|
97 |
+
'success': False
|
98 |
+
}
|
99 |
+
|
100 |
+
def format_braille_text(braille_text, line_length=32):
|
101 |
+
"""
|
102 |
+
Format Braille text for better readability.
|
103 |
+
|
104 |
+
Args:
|
105 |
+
braille_text: Raw Braille text
|
106 |
+
line_length: Maximum characters per line
|
107 |
+
|
108 |
+
Returns:
|
109 |
+
Formatted Braille text
|
110 |
+
"""
|
111 |
+
# Split text by existing newlines first
|
112 |
+
paragraphs = braille_text.split('\n')
|
113 |
+
formatted_paragraphs = []
|
114 |
+
|
115 |
+
for paragraph in paragraphs:
|
116 |
+
# Skip empty paragraphs
|
117 |
+
if not paragraph.strip():
|
118 |
+
formatted_paragraphs.append('')
|
119 |
+
continue
|
120 |
+
|
121 |
+
# Word wrap to line_length
|
122 |
+
words = paragraph.split(' ')
|
123 |
+
lines = []
|
124 |
+
current_line = []
|
125 |
+
current_length = 0
|
126 |
+
|
127 |
+
for word in words:
|
128 |
+
# If adding this word exceeds line length, start a new line
|
129 |
+
if current_length + len(word) + (1 if current_length > 0 else 0) > line_length:
|
130 |
+
lines.append(' '.join(current_line))
|
131 |
+
current_line = [word]
|
132 |
+
current_length = len(word)
|
133 |
+
else:
|
134 |
+
if current_length > 0:
|
135 |
+
current_length += 1 # Space
|
136 |
+
current_line.append(word)
|
137 |
+
current_length += len(word)
|
138 |
+
|
139 |
+
# Add the last line if not empty
|
140 |
+
if current_line:
|
141 |
+
lines.append(' '.join(current_line))
|
142 |
+
|
143 |
+
formatted_paragraphs.append('\n'.join(lines))
|
144 |
+
|
145 |
+
# Join paragraphs with double newlines
|
146 |
+
return '\n\n'.join(formatted_paragraphs)
|
147 |
+
|
148 |
+
def get_braille_metadata(text):
|
149 |
+
"""
|
150 |
+
Get metadata about the Braille translation.
|
151 |
+
|
152 |
+
Args:
|
153 |
+
text: Original text
|
154 |
+
|
155 |
+
Returns:
|
156 |
+
Dictionary with metadata
|
157 |
+
"""
|
158 |
+
word_count = len(re.findall(r'\b\w+\b', text))
|
159 |
+
character_count = len(text)
|
160 |
+
line_count = len(text.split('\n'))
|
161 |
+
|
162 |
+
return {
|
163 |
+
'word_count': word_count,
|
164 |
+
'character_count': character_count,
|
165 |
+
'line_count': line_count
|
166 |
+
}
|
models/document_ai.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import LayoutLMv2Processor, LayoutLMv2ForSequenceClassification
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
import pytesseract
|
6 |
+
|
7 |
+
# Initialize the model and processor with caching
|
8 |
+
processor = None
|
9 |
+
model = None
|
10 |
+
|
11 |
+
def get_document_ai_models():
|
12 |
+
"""Get or initialize document AI models with proper caching."""
|
13 |
+
global processor, model
|
14 |
+
if processor is None:
|
15 |
+
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
16 |
+
if model is None:
|
17 |
+
model = LayoutLMv2ForSequenceClassification.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
18 |
+
return processor, model
|
19 |
+
|
20 |
+
def extract_text_with_tesseract(image):
|
21 |
+
"""Extract text using Tesseract OCR."""
|
22 |
+
if isinstance(image, np.ndarray):
|
23 |
+
pil_image = Image.fromarray(image).convert("RGB")
|
24 |
+
else:
|
25 |
+
pil_image = image.convert("RGB")
|
26 |
+
|
27 |
+
# Use pytesseract for OCR
|
28 |
+
text = pytesseract.image_to_string(pil_image)
|
29 |
+
|
30 |
+
# Get word boxes for structure
|
31 |
+
boxes = pytesseract.image_to_data(pil_image, output_type=pytesseract.Output.DICT)
|
32 |
+
|
33 |
+
# Extract words and their positions
|
34 |
+
words = []
|
35 |
+
word_boxes = []
|
36 |
+
|
37 |
+
for i in range(len(boxes['text'])):
|
38 |
+
if boxes['text'][i].strip() != '':
|
39 |
+
words.append(boxes['text'][i])
|
40 |
+
x, y, w, h = boxes['left'][i], boxes['top'][i], boxes['width'][i], boxes['height'][i]
|
41 |
+
word_boxes.append([x, y, x + w, y + h])
|
42 |
+
|
43 |
+
return words, word_boxes
|
44 |
+
|
45 |
+
def extract_text_and_layout(image):
|
46 |
+
"""
|
47 |
+
Extract text and layout information using OCR and LayoutLMv2.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
image: PIL Image object
|
51 |
+
|
52 |
+
Returns:
|
53 |
+
Dictionary with extracted text and layout information
|
54 |
+
"""
|
55 |
+
# Convert numpy array to PIL Image if needed
|
56 |
+
if isinstance(image, np.ndarray):
|
57 |
+
image = Image.fromarray(image).convert("RGB")
|
58 |
+
|
59 |
+
# Extract text using Tesseract
|
60 |
+
words, boxes = extract_text_with_tesseract(image)
|
61 |
+
|
62 |
+
# If no words were found, return empty result
|
63 |
+
if not words:
|
64 |
+
return {
|
65 |
+
'words': [],
|
66 |
+
'boxes': [],
|
67 |
+
'success': False
|
68 |
+
}
|
69 |
+
|
70 |
+
return {
|
71 |
+
'words': words,
|
72 |
+
'boxes': boxes,
|
73 |
+
'success': True
|
74 |
+
}
|
models/text_processor.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
2 |
+
import torch
|
3 |
+
import json
|
4 |
+
|
5 |
+
# Model ID for a smaller model suitable for Spaces
|
6 |
+
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
|
7 |
+
FALLBACK_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
|
8 |
+
|
9 |
+
# Initialize with None - will be loaded on first use
|
10 |
+
tokenizer = None
|
11 |
+
text_generation_pipeline = None
|
12 |
+
|
13 |
+
def get_text_pipeline():
|
14 |
+
"""
|
15 |
+
Initialize or return the text generation pipeline.
|
16 |
+
Uses smaller models that work well on Spaces.
|
17 |
+
"""
|
18 |
+
global tokenizer, text_generation_pipeline
|
19 |
+
|
20 |
+
if text_generation_pipeline is None:
|
21 |
+
try:
|
22 |
+
# Try to load primary model
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
24 |
+
|
25 |
+
# Use 8-bit quantization to reduce memory usage
|
26 |
+
model = AutoModelForCausalLM.from_pretrained(
|
27 |
+
MODEL_ID,
|
28 |
+
device_map="auto",
|
29 |
+
torch_dtype=torch.float16,
|
30 |
+
load_in_8bit=True
|
31 |
+
)
|
32 |
+
|
33 |
+
# Create the pipeline
|
34 |
+
text_generation_pipeline = pipeline(
|
35 |
+
"text-generation",
|
36 |
+
model=model,
|
37 |
+
tokenizer=tokenizer,
|
38 |
+
max_new_tokens=1024,
|
39 |
+
do_sample=True,
|
40 |
+
temperature=0.3,
|
41 |
+
top_p=0.95,
|
42 |
+
repetition_penalty=1.15
|
43 |
+
)
|
44 |
+
|
45 |
+
except Exception as e:
|
46 |
+
print(f"Error loading primary model: {str(e)}")
|
47 |
+
print(f"Falling back to {FALLBACK_MODEL_ID}")
|
48 |
+
|
49 |
+
try:
|
50 |
+
# Fall back to Mistral model which is more widely available
|
51 |
+
tokenizer = AutoTokenizer.from_pretrained(FALLBACK_MODEL_ID)
|
52 |
+
model = AutoModelForCausalLM.from_pretrained(
|
53 |
+
FALLBACK_MODEL_ID,
|
54 |
+
device_map="auto",
|
55 |
+
torch_dtype=torch.float16,
|
56 |
+
load_in_8bit=True
|
57 |
+
)
|
58 |
+
|
59 |
+
text_generation_pipeline = pipeline(
|
60 |
+
"text-generation",
|
61 |
+
model=model,
|
62 |
+
tokenizer=tokenizer,
|
63 |
+
max_new_tokens=1024,
|
64 |
+
do_sample=True,
|
65 |
+
temperature=0.3,
|
66 |
+
top_p=0.95,
|
67 |
+
repetition_penalty=1.15
|
68 |
+
)
|
69 |
+
except Exception as e2:
|
70 |
+
print(f"Error loading fallback model: {str(e2)}")
|
71 |
+
return None
|
72 |
+
|
73 |
+
return text_generation_pipeline
|
74 |
+
|
75 |
+
def process_menu_text(raw_text):
|
76 |
+
"""
|
77 |
+
Process raw OCR text using LLM to improve structure and readability.
|
78 |
+
|
79 |
+
Args:
|
80 |
+
raw_text: Raw text extracted from menu image
|
81 |
+
|
82 |
+
Returns:
|
83 |
+
Processed and structured menu text
|
84 |
+
"""
|
85 |
+
# Get the pipeline
|
86 |
+
pipeline = get_text_pipeline()
|
87 |
+
|
88 |
+
if pipeline is None:
|
89 |
+
# Fallback to simple processing if model not available
|
90 |
+
return {
|
91 |
+
'structured_text': raw_text,
|
92 |
+
'menu_sections': [],
|
93 |
+
'success': False,
|
94 |
+
'error': "LLM model not available"
|
95 |
+
}
|
96 |
+
|
97 |
+
# Construct prompt for the LLM
|
98 |
+
prompt = f"""<|system|>
|
99 |
+
You are an AI assistant that helps structure menu text from OCR.
|
100 |
+
Your task is to clean up the text, correct obvious OCR errors, and structure it properly.
|
101 |
+
Identify menu sections, items, and prices.
|
102 |
+
Format your response as JSON with menu sections, items, and prices.
|
103 |
+
<|user|>
|
104 |
+
Here is the raw text extracted from a menu image:
|
105 |
+
|
106 |
+
{raw_text}
|
107 |
+
|
108 |
+
Please clean and structure this menu text. Format your response as JSON with the following structure:
|
109 |
+
{{
|
110 |
+
"menu_sections": [
|
111 |
+
{{
|
112 |
+
"section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
|
113 |
+
"items": [
|
114 |
+
{{
|
115 |
+
"name": "Item name",
|
116 |
+
"description": "Item description if available",
|
117 |
+
"price": "Price if available"
|
118 |
+
}}
|
119 |
+
]
|
120 |
+
}}
|
121 |
+
]
|
122 |
+
}}
|
123 |
+
<|assistant|>
|
124 |
+
"""
|
125 |
+
|
126 |
+
try:
|
127 |
+
# Generate response from LLM
|
128 |
+
response = pipeline(prompt, return_full_text=False)[0]['generated_text']
|
129 |
+
|
130 |
+
# Extract JSON from response
|
131 |
+
response_text = response.strip()
|
132 |
+
|
133 |
+
# Find JSON in the response
|
134 |
+
json_start = response_text.find('{')
|
135 |
+
json_end = response_text.rfind('}') + 1
|
136 |
+
|
137 |
+
if json_start >= 0 and json_end > json_start:
|
138 |
+
json_str = response_text[json_start:json_end]
|
139 |
+
menu_data = json.loads(json_str)
|
140 |
+
|
141 |
+
# Reconstruct structured text
|
142 |
+
structured_text = ""
|
143 |
+
for section in menu_data.get('menu_sections', []):
|
144 |
+
structured_text += f"{section.get('section_name', 'Menu Items')}\n"
|
145 |
+
structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
|
146 |
+
|
147 |
+
for item in section.get('items', []):
|
148 |
+
structured_text += f"{item.get('name', '')}"
|
149 |
+
if item.get('price'):
|
150 |
+
structured_text += f" - {item.get('price')}"
|
151 |
+
structured_text += "\n"
|
152 |
+
|
153 |
+
if item.get('description'):
|
154 |
+
structured_text += f" {item.get('description')}\n"
|
155 |
+
|
156 |
+
structured_text += "\n"
|
157 |
+
|
158 |
+
structured_text += "\n"
|
159 |
+
|
160 |
+
return {
|
161 |
+
'structured_text': structured_text,
|
162 |
+
'menu_data': menu_data,
|
163 |
+
'success': True
|
164 |
+
}
|
165 |
+
else:
|
166 |
+
# Fallback to simple processing
|
167 |
+
return {
|
168 |
+
'structured_text': raw_text,
|
169 |
+
'menu_sections': [],
|
170 |
+
'success': False,
|
171 |
+
'error': "Failed to parse LLM response as JSON"
|
172 |
+
}
|
173 |
+
|
174 |
+
except Exception as e:
|
175 |
+
return {
|
176 |
+
'structured_text': raw_text,
|
177 |
+
'menu_sections': [],
|
178 |
+
'success': False,
|
179 |
+
'error': str(e)
|
180 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=3.50.0
|
2 |
+
pillow>=9.0.0
|
3 |
+
numpy>=1.22.0
|
4 |
+
torch>=2.0.0
|
5 |
+
transformers>=4.30.0
|
6 |
+
layoutlmv2>=0.1.0
|
7 |
+
pytesseract>=0.3.10
|
8 |
+
opencv-python>=4.7.0
|
9 |
+
sentence-transformers>=2.2.2
|
10 |
+
python-braille>=0.1.0
|
11 |
+
reportlab>=3.6.12
|
12 |
+
|
13 |
+
|
scripts/download_model.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import requests
|
4 |
+
from tqdm import tqdm
|
5 |
+
import huggingface_hub
|
6 |
+
|
7 |
+
# Add parent directory to path
|
8 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
9 |
+
|
10 |
+
def download_model():
|
11 |
+
"""
|
12 |
+
Download the Llama 3 model from Hugging Face.
|
13 |
+
"""
|
14 |
+
model_name = "TheBloke/Llama-3-8B-Instruct-GGUF"
|
15 |
+
filename = "llama-3-8b-instruct.Q4_K_M.gguf"
|
16 |
+
|
17 |
+
# Create models directory if it doesn't exist
|
18 |
+
models_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "models")
|
19 |
+
os.makedirs(models_dir, exist_ok=True)
|
20 |
+
|
21 |
+
model_path = os.path.join(models_dir, filename)
|
22 |
+
|
23 |
+
if os.path.exists(model_path):
|
24 |
+
print(f"Model already exists at {model_path}")
|
25 |
+
return model_path
|
26 |
+
|
27 |
+
print(f"Downloading {filename} from {model_name}...")
|
28 |
+
|
29 |
+
try:
|
30 |
+
# Download using huggingface_hub
|
31 |
+
huggingface_hub.hf_hub_download(
|
32 |
+
repo_id=model_name,
|
33 |
+
filename=filename,
|
34 |
+
local_dir=models_dir,
|
35 |
+
local_dir_use_symlinks=False
|
36 |
+
)
|
37 |
+
|
38 |
+
print(f"Model downloaded successfully to {model_path}")
|
39 |
+
return model_path
|
40 |
+
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Error downloading model: {str(e)}")
|
43 |
+
print("\nManual download instructions:")
|
44 |
+
print(f"1. Go to https://huggingface.co/{model_name}/tree/main")
|
45 |
+
print(f"2. Download the file {filename}")
|
46 |
+
print(f"3. Place it in the models directory at {models_dir}")
|
47 |
+
return None
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
download_model()
|
tests/test_braille.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import time
|
4 |
+
|
5 |
+
# Add the parent directory to the path so we can import our modules
|
6 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
7 |
+
|
8 |
+
from models.braille_translator import text_to_braille, get_braille_metadata
|
9 |
+
|
10 |
+
def test_braille_translation(text):
|
11 |
+
"""
|
12 |
+
Test Braille translation on a given text.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
text: Text to translate to Braille
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
Dictionary with test results
|
19 |
+
"""
|
20 |
+
start_time = time.time()
|
21 |
+
|
22 |
+
# Translate to Braille
|
23 |
+
try:
|
24 |
+
result = text_to_braille(text, use_context=True)
|
25 |
+
success = result['success']
|
26 |
+
braille_text = result.get('formatted_braille', '')
|
27 |
+
error = result.get('error', None)
|
28 |
+
except Exception as e:
|
29 |
+
success = False
|
30 |
+
braille_text = ''
|
31 |
+
error = str(e)
|
32 |
+
|
33 |
+
end_time = time.time()
|
34 |
+
|
35 |
+
# Get metadata
|
36 |
+
metadata = get_braille_metadata(text)
|
37 |
+
|
38 |
+
# Compile results
|
39 |
+
test_results = {
|
40 |
+
'original_text': text,
|
41 |
+
'success': success,
|
42 |
+
'processing_time': end_time - start_time,
|
43 |
+
'braille_text': braille_text[:100] + '...' if len(braille_text) > 100 else braille_text,
|
44 |
+
'word_count': metadata['word_count'],
|
45 |
+
'character_count': metadata['character_count'],
|
46 |
+
'line_count': metadata['line_count']
|
47 |
+
}
|
48 |
+
|
49 |
+
if not success:
|
50 |
+
test_results['error'] = error
|
51 |
+
|
52 |
+
return test_results
|
53 |
+
|
54 |
+
def run_braille_tests():
|
55 |
+
"""
|
56 |
+
Run tests on sample menu texts.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
List of test results
|
60 |
+
"""
|
61 |
+
# Sample menu texts
|
62 |
+
sample_texts = [
|
63 |
+
# Simple menu item
|
64 |
+
"Cheeseburger - $10.99\nServed with fries and a pickle.",
|
65 |
+
|
66 |
+
# Menu section
|
67 |
+
"APPETIZERS\n-----------\nMozzarella Sticks - $7.99\nLoaded Nachos - $9.99\nBuffalo Wings - $12.99",
|
68 |
+
|
69 |
+
# Complex menu with formatting
|
70 |
+
"""MAIN COURSE
|
71 |
+
-------------
|
72 |
+
Grilled Salmon - $18.99
|
73 |
+
Fresh Atlantic salmon served with seasonal vegetables and rice pilaf.
|
74 |
+
|
75 |
+
Filet Mignon - $24.99
|
76 |
+
8oz center-cut filet served with mashed potatoes and asparagus.
|
77 |
+
|
78 |
+
Vegetable Pasta - $14.99
|
79 |
+
Penne pasta with seasonal vegetables in a creamy garlic sauce."""
|
80 |
+
]
|
81 |
+
|
82 |
+
results = []
|
83 |
+
|
84 |
+
for i, text in enumerate(sample_texts):
|
85 |
+
print(f"\nTesting sample {i+1}...")
|
86 |
+
result = test_braille_translation(text)
|
87 |
+
results.append(result)
|
88 |
+
|
89 |
+
# Print progress
|
90 |
+
status = "SUCCESS" if result['success'] else "FAILED"
|
91 |
+
print(f"Sample {i+1}: {status}")
|
92 |
+
print(f"Words: {result['word_count']}, Time: {result['processing_time']:.2f}s")
|
93 |
+
print(f"Braille sample: {result['braille_text'][:50]}...")
|
94 |
+
|
95 |
+
return results
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
print("Testing Braille translation functionality...")
|
99 |
+
results = run_braille_tests()
|
100 |
+
|
101 |
+
# Print summary
|
102 |
+
success_count = sum(1 for r in results if r['success'])
|
103 |
+
print(f"\nSummary: {success_count}/{len(results)} tests passed")
|
104 |
+
|
105 |
+
if results:
|
106 |
+
avg_time = sum(r['processing_time'] for r in results) / len(results)
|
107 |
+
print(f"Average processing time: {avg_time:.2f} seconds")
|
tests/test_ocr.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import time
|
4 |
+
from PIL import Image
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
# Add the parent directory to the path so we can import our modules
|
8 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
9 |
+
|
10 |
+
from utils.image_preprocessing import preprocess_image
|
11 |
+
from models.document_ai import extract_text_and_layout
|
12 |
+
|
13 |
+
def test_menu_extraction(image_path):
|
14 |
+
"""
|
15 |
+
Test the OCR extraction on a single menu image.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
image_path: Path to the menu image
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
Dictionary with test results
|
22 |
+
"""
|
23 |
+
start_time = time.time()
|
24 |
+
|
25 |
+
# Load and preprocess image
|
26 |
+
image = Image.open(image_path)
|
27 |
+
preprocessed_img = preprocess_image(image)
|
28 |
+
|
29 |
+
# Extract text
|
30 |
+
try:
|
31 |
+
result = extract_text_and_layout(preprocessed_img)
|
32 |
+
extracted_text = ' '.join(result['words']) if 'words' in result else ''
|
33 |
+
success = True
|
34 |
+
except Exception as e:
|
35 |
+
extracted_text = ''
|
36 |
+
success = False
|
37 |
+
error = str(e)
|
38 |
+
|
39 |
+
end_time = time.time()
|
40 |
+
|
41 |
+
# Compile results
|
42 |
+
test_results = {
|
43 |
+
'image_path': image_path,
|
44 |
+
'success': success,
|
45 |
+
'processing_time': end_time - start_time,
|
46 |
+
'extracted_text': extracted_text,
|
47 |
+
'text_length': len(extracted_text),
|
48 |
+
'word_count': len(extracted_text.split()) if extracted_text else 0
|
49 |
+
}
|
50 |
+
|
51 |
+
if not success:
|
52 |
+
test_results['error'] = error
|
53 |
+
|
54 |
+
return test_results
|
55 |
+
|
56 |
+
def run_batch_test(image_dir):
|
57 |
+
"""
|
58 |
+
Run tests on all images in a directory.
|
59 |
+
|
60 |
+
Args:
|
61 |
+
image_dir: Directory containing menu images
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
List of test results
|
65 |
+
"""
|
66 |
+
results = []
|
67 |
+
|
68 |
+
for filename in os.listdir(image_dir):
|
69 |
+
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
|
70 |
+
image_path = os.path.join(image_dir, filename)
|
71 |
+
result = test_menu_extraction(image_path)
|
72 |
+
results.append(result)
|
73 |
+
|
74 |
+
# Print progress
|
75 |
+
status = "SUCCESS" if result['success'] else "FAILED"
|
76 |
+
print(f"{filename}: {status} - {result['word_count']} words extracted")
|
77 |
+
|
78 |
+
return results
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
# Test with sample menus in the assets directory
|
82 |
+
sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
83 |
+
"assets", "sample_menus")
|
84 |
+
|
85 |
+
if not os.path.exists(sample_dir):
|
86 |
+
print(f"Sample directory not found: {sample_dir}")
|
87 |
+
print("Creating directory and downloading sample images...")
|
88 |
+
os.makedirs(sample_dir, exist_ok=True)
|
89 |
+
# You would add code here to download sample images
|
90 |
+
# For now, just create a note to add sample images manually
|
91 |
+
with open(os.path.join(sample_dir, "README.txt"), "w") as f:
|
92 |
+
f.write("Add sample menu images to this directory for testing.")
|
93 |
+
|
94 |
+
results = run_batch_test(sample_dir)
|
95 |
+
|
96 |
+
# Print summary
|
97 |
+
success_count = sum(1 for r in results if r['success'])
|
98 |
+
print(f"\nSummary: {success_count}/{len(results)} tests passed")
|
99 |
+
|
100 |
+
if results:
|
101 |
+
avg_words = sum(r['word_count'] for r in results) / len(results)
|
102 |
+
avg_time = sum(r['processing_time'] for r in results) / len(results)
|
103 |
+
print(f"Average words extracted: {avg_words:.1f}")
|
104 |
+
print(f"Average processing time: {avg_time:.2f} seconds")
|
utils/__init__.py
ADDED
File without changes
|
utils/braille_display.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def text_to_unicode_braille(braille_text):
|
2 |
+
"""
|
3 |
+
Convert Braille dots notation to Unicode Braille symbols.
|
4 |
+
|
5 |
+
Args:
|
6 |
+
braille_text: Braille text in dots notation
|
7 |
+
|
8 |
+
Returns:
|
9 |
+
Text with Unicode Braille symbols
|
10 |
+
"""
|
11 |
+
# Mapping from Braille dots to Unicode Braille patterns
|
12 |
+
# Unicode Braille patterns start at U+2800 (⠀)
|
13 |
+
unicode_base = 0x2800
|
14 |
+
|
15 |
+
# Convert each Braille character to its Unicode equivalent
|
16 |
+
unicode_braille = ""
|
17 |
+
for char in braille_text:
|
18 |
+
# Check if the character is a standard Braille pattern
|
19 |
+
if char in "⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿":
|
20 |
+
unicode_braille += char
|
21 |
+
else:
|
22 |
+
# For non-Braille characters, keep them as is
|
23 |
+
unicode_braille += char
|
24 |
+
|
25 |
+
return unicode_braille
|
26 |
+
|
27 |
+
def create_braille_html(braille_text):
|
28 |
+
"""
|
29 |
+
Create HTML to display Braille with proper styling.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
braille_text: Braille text (either in dots or Unicode)
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
HTML string for displaying Braille
|
36 |
+
"""
|
37 |
+
# Convert to Unicode Braille if not already
|
38 |
+
unicode_braille = text_to_unicode_braille(braille_text)
|
39 |
+
|
40 |
+
# Replace newlines with <br> tags before using in f-string
|
41 |
+
formatted_text = unicode_braille.replace('\n', '<br>')
|
42 |
+
|
43 |
+
# Create HTML with proper styling
|
44 |
+
html = f"""
|
45 |
+
<div style="font-family: 'Courier New', monospace; font-size: 20px; line-height: 1.5;
|
46 |
+
background-color: #f5f5f5; padding: 15px; border-radius: 5px;">
|
47 |
+
{formatted_text}
|
48 |
+
</div>
|
49 |
+
"""
|
50 |
+
|
51 |
+
return html
|
52 |
+
|
53 |
+
|
54 |
+
def create_braille_comparison(text, braille_text):
|
55 |
+
"""
|
56 |
+
Create a side-by-side comparison of text and its Braille representation.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
text: Original text
|
60 |
+
braille_text: Braille translation
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
HTML string for displaying the comparison
|
64 |
+
"""
|
65 |
+
# Convert to Unicode Braille
|
66 |
+
unicode_braille = text_to_unicode_braille(braille_text)
|
67 |
+
|
68 |
+
# Split into lines
|
69 |
+
text_lines = text.split('\n')
|
70 |
+
braille_lines = unicode_braille.split('\n')
|
71 |
+
|
72 |
+
# Ensure both lists have the same length
|
73 |
+
max_lines = max(len(text_lines), len(braille_lines))
|
74 |
+
text_lines = text_lines + [''] * (max_lines - len(text_lines))
|
75 |
+
braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
|
76 |
+
|
77 |
+
# Create HTML table for comparison
|
78 |
+
html = """
|
79 |
+
<style>
|
80 |
+
.braille-table {
|
81 |
+
width: 100%;
|
82 |
+
border-collapse: collapse;
|
83 |
+
}
|
84 |
+
.braille-table td {
|
85 |
+
padding: 8px;
|
86 |
+
vertical-align: top;
|
87 |
+
border-bottom: 1px solid #ddd;
|
88 |
+
}
|
89 |
+
.braille-text {
|
90 |
+
font-family: 'Courier New', monospace;
|
91 |
+
font-size: 20px;
|
92 |
+
background-color: #f5f5f5;
|
93 |
+
}
|
94 |
+
.original-text {
|
95 |
+
font-family: Arial, sans-serif;
|
96 |
+
}
|
97 |
+
</style>
|
98 |
+
<table class="braille-table">
|
99 |
+
<tr>
|
100 |
+
<th>Original Text</th>
|
101 |
+
<th>Braille Representation</th>
|
102 |
+
</tr>
|
103 |
+
"""
|
104 |
+
|
105 |
+
for i in range(max_lines):
|
106 |
+
html += f"""
|
107 |
+
<tr>
|
108 |
+
<td class="original-text">{text_lines[i]}</td>
|
109 |
+
<td class="braille-text">{braille_lines[i]}</td>
|
110 |
+
</tr>
|
111 |
+
"""
|
112 |
+
|
113 |
+
html += "</table>"
|
114 |
+
|
115 |
+
return html
|
utils/image_preprocessing.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def preprocess_image(image, target_size=(1000, 1000)):
|
6 |
+
"""
|
7 |
+
Preprocess image for document analysis.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
image: PIL Image object
|
11 |
+
target_size: Tuple of (width, height) to resize to
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
Preprocessed image as numpy array
|
15 |
+
"""
|
16 |
+
# Convert PIL Image to numpy array if needed
|
17 |
+
if isinstance(image, Image.Image):
|
18 |
+
img_array = np.array(image)
|
19 |
+
else:
|
20 |
+
img_array = image
|
21 |
+
|
22 |
+
# Convert to RGB if grayscale
|
23 |
+
if len(img_array.shape) == 2:
|
24 |
+
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
|
25 |
+
elif img_array.shape[2] == 4:
|
26 |
+
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
|
27 |
+
|
28 |
+
# Resize image
|
29 |
+
img_array = cv2.resize(img_array, target_size)
|
30 |
+
|
31 |
+
# Enhance contrast
|
32 |
+
lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
|
33 |
+
l, a, b = cv2.split(lab)
|
34 |
+
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
35 |
+
cl = clahe.apply(l)
|
36 |
+
enhanced_lab = cv2.merge((cl, a, b))
|
37 |
+
enhanced_img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
|
38 |
+
|
39 |
+
return enhanced_img
|
utils/pdf_generator.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
from reportlab.lib.pagesizes import letter
|
4 |
+
from reportlab.lib import colors
|
5 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
6 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
7 |
+
from reportlab.pdfbase import pdfmetrics
|
8 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
9 |
+
import io
|
10 |
+
|
11 |
+
# Try to register a font that supports Braille Unicode characters
|
12 |
+
try:
|
13 |
+
# Check for common Braille fonts
|
14 |
+
font_paths = [
|
15 |
+
"DejaVuSans.ttf", # Common on Linux
|
16 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
17 |
+
"/System/Library/Fonts/Arial Unicode.ttf", # Mac
|
18 |
+
"C:\\Windows\\Fonts\\arial.ttf" # Windows
|
19 |
+
]
|
20 |
+
|
21 |
+
font_registered = False
|
22 |
+
for font_path in font_paths:
|
23 |
+
if os.path.exists(font_path):
|
24 |
+
pdfmetrics.registerFont(TTFont('BrailleFont', font_path))
|
25 |
+
font_registered = True
|
26 |
+
break
|
27 |
+
|
28 |
+
if not font_registered:
|
29 |
+
# Use default font if none of the above are found
|
30 |
+
print("No suitable font found for Braille. Using default font.")
|
31 |
+
except Exception as e:
|
32 |
+
print(f"Error registering font: {str(e)}")
|
33 |
+
|
34 |
+
def create_braille_pdf(original_text, braille_text, title="Menu in Braille"):
|
35 |
+
"""
|
36 |
+
Create a PDF file with original text and its Braille translation.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
original_text: Original text content
|
40 |
+
braille_text: Braille translation
|
41 |
+
title: PDF title
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
BytesIO object containing the PDF
|
45 |
+
"""
|
46 |
+
# Create a BytesIO object to store the PDF
|
47 |
+
buffer = io.BytesIO()
|
48 |
+
|
49 |
+
# Create the PDF document
|
50 |
+
doc = SimpleDocTemplate(
|
51 |
+
buffer,
|
52 |
+
pagesize=letter,
|
53 |
+
rightMargin=72,
|
54 |
+
leftMargin=72,
|
55 |
+
topMargin=72,
|
56 |
+
bottomMargin=72
|
57 |
+
)
|
58 |
+
|
59 |
+
# Define styles
|
60 |
+
styles = getSampleStyleSheet()
|
61 |
+
title_style = styles['Title']
|
62 |
+
heading_style = styles['Heading2']
|
63 |
+
normal_style = styles['Normal']
|
64 |
+
|
65 |
+
# Create a custom style for Braille text
|
66 |
+
braille_style = ParagraphStyle(
|
67 |
+
'Braille',
|
68 |
+
parent=normal_style,
|
69 |
+
fontName='BrailleFont' if font_registered else 'Helvetica',
|
70 |
+
fontSize=14,
|
71 |
+
leading=18,
|
72 |
+
spaceAfter=12
|
73 |
+
)
|
74 |
+
|
75 |
+
# Create the content
|
76 |
+
content = []
|
77 |
+
|
78 |
+
# Add title
|
79 |
+
content.append(Paragraph(title, title_style))
|
80 |
+
content.append(Spacer(1, 12))
|
81 |
+
|
82 |
+
# Add original text section
|
83 |
+
content.append(Paragraph("Original Text", heading_style))
|
84 |
+
content.append(Spacer(1, 6))
|
85 |
+
|
86 |
+
# Split original text by lines and add each as a paragraph
|
87 |
+
for line in original_text.split('\n'):
|
88 |
+
if line.strip():
|
89 |
+
content.append(Paragraph(line, normal_style))
|
90 |
+
else:
|
91 |
+
content.append(Spacer(1, 12))
|
92 |
+
|
93 |
+
content.append(Spacer(1, 24))
|
94 |
+
|
95 |
+
# Add Braille section
|
96 |
+
content.append(Paragraph("Braille Translation", heading_style))
|
97 |
+
content.append(Spacer(1, 6))
|
98 |
+
|
99 |
+
# Split Braille text by lines and add each as a paragraph
|
100 |
+
for line in braille_text.split('\n'):
|
101 |
+
if line.strip():
|
102 |
+
content.append(Paragraph(line, braille_style))
|
103 |
+
else:
|
104 |
+
content.append(Spacer(1, 12))
|
105 |
+
|
106 |
+
# Build the PDF
|
107 |
+
doc.build(content)
|
108 |
+
|
109 |
+
# Reset buffer position to the beginning
|
110 |
+
buffer.seek(0)
|
111 |
+
return buffer
|
112 |
+
|
113 |
+
def create_braille_pdf_with_comparison(original_text, braille_text, title="Menu in Braille"):
|
114 |
+
"""
|
115 |
+
Create a PDF file with side-by-side comparison of original text and Braille.
|
116 |
+
|
117 |
+
Args:
|
118 |
+
original_text: Original text content
|
119 |
+
braille_text: Braille translation
|
120 |
+
title: PDF title
|
121 |
+
|
122 |
+
Returns:
|
123 |
+
BytesIO object containing the PDF
|
124 |
+
"""
|
125 |
+
# Create a BytesIO object to store the PDF
|
126 |
+
buffer = io.BytesIO()
|
127 |
+
|
128 |
+
# Create the PDF document
|
129 |
+
doc = SimpleDocTemplate(
|
130 |
+
buffer,
|
131 |
+
pagesize=letter,
|
132 |
+
rightMargin=72,
|
133 |
+
leftMargin=72,
|
134 |
+
topMargin=72,
|
135 |
+
bottomMargin=72
|
136 |
+
)
|
137 |
+
|
138 |
+
# Define styles
|
139 |
+
styles = getSampleStyleSheet()
|
140 |
+
title_style = styles['Title']
|
141 |
+
heading_style = styles['Heading2']
|
142 |
+
normal_style = styles['Normal']
|
143 |
+
|
144 |
+
# Create a custom style for Braille text
|
145 |
+
braille_style = ParagraphStyle(
|
146 |
+
'Braille',
|
147 |
+
parent=normal_style,
|
148 |
+
fontName='BrailleFont' if font_registered else 'Helvetica',
|
149 |
+
fontSize=14,
|
150 |
+
leading=18
|
151 |
+
)
|
152 |
+
|
153 |
+
# Create the content
|
154 |
+
content = []
|
155 |
+
|
156 |
+
# Add title
|
157 |
+
content.append(Paragraph(title, title_style))
|
158 |
+
content.append(Spacer(1, 12))
|
159 |
+
|
160 |
+
# Split text into lines
|
161 |
+
original_lines = original_text.split('\n')
|
162 |
+
braille_lines = braille_text.split('\n')
|
163 |
+
|
164 |
+
# Ensure both lists have the same length
|
165 |
+
max_lines = max(len(original_lines), len(braille_lines))
|
166 |
+
original_lines = original_lines + [''] * (max_lines - len(original_lines))
|
167 |
+
braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
|
168 |
+
|
169 |
+
# Create a table for side-by-side comparison
|
170 |
+
table_data = [
|
171 |
+
[Paragraph("Original Text", heading_style), Paragraph("Braille Translation", heading_style)]
|
172 |
+
]
|
173 |
+
|
174 |
+
# Add each line as a row in the table
|
175 |
+
for i in range(max_lines):
|
176 |
+
original_para = Paragraph(original_lines[i], normal_style) if original_lines[i].strip() else Spacer(1, 12)
|
177 |
+
braille_para = Paragraph(braille_lines[i], braille_style) if braille_lines[i].strip() else Spacer(1, 12)
|
178 |
+
table_data.append([original_para, braille_para])
|
179 |
+
|
180 |
+
# Create the table
|
181 |
+
table = Table(table_data, colWidths=[doc.width/2.0-12, doc.width/2.0-12])
|
182 |
+
|
183 |
+
# Style the table
|
184 |
+
table.setStyle(TableStyle([
|
185 |
+
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
186 |
+
('GRID', (0, 0), (-1, 0), 1, colors.black),
|
187 |
+
('BOX', (0, 0), (-1, -1), 1, colors.black),
|
188 |
+
('BACKGROUND', (0, 0), (1, 0), colors.lightgrey)
|
189 |
+
]))
|
190 |
+
|
191 |
+
content.append(table)
|
192 |
+
|
193 |
+
# Build the PDF
|
194 |
+
doc.build(content)
|
195 |
+
|
196 |
+
# Reset buffer position to the beginning
|
197 |
+
buffer.seek(0)
|
198 |
+
return buffer
|