Spaces:

Chamin09
/

BrailleMenuGenV2

Sleeping

App Files Files Community

Chamin09 commited on May 8

Commit

93c4f75

verified ·

1 Parent(s): dd3979d

initial commit

Browse files

Files changed (13) hide show

README.md +64 -7
app.py +158 -0
models/braille_translator.py +166 -0
models/document_ai.py +74 -0
models/text_processor.py +180 -0
requirements.txt +13 -0
scripts/download_model.py +50 -0
tests/test_braille.py +107 -0
tests/test_ocr.py +104 -0
utils/__init__.py +0 -0
utils/braille_display.py +115 -0
utils/image_preprocessing.py +39 -0
utils/pdf_generator.py +198 -0

README.md CHANGED Viewed

@@ -1,14 +1,71 @@
 ---
-title: BrailleMenuGenV2
-emoji: 🔥
-colorFrom: indigo
-colorTo: red
 sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
 pinned: false
-license: mit
-short_description: Generate Food menu in Braille
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BrailleMenuGen
+emoji: 🦀
+colorFrom: green
+colorTo: green
 sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
 pinned: false
 ---
+# Menu to Braille Converter
+An AI-powered application that converts food menu images to Braille text for visually impaired users.
+## Features
+- Upload menu images
+- Extract text using AI-powered document understanding (LayoutLMv2)
+- Process and structure menu text using LLMs
+- Convert text to Braille
+- Display Braille in multiple formats (text, visual, side-by-side)
+- Download as PDF in different formats
+## Deployment on Hugging Face Spaces
+### Option 1: Direct GitHub Repository Deployment
+1. Fork this repository to your GitHub account
+2. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
+3. Click "Create new Space"
+4. Choose "Streamlit" as the SDK
+5. Connect your GitHub account and select this repository
+6. Choose hardware requirements (recommend at least GPU for better performance)
+7. Click "Create Space"
+### Option 2: Manual Deployment
+1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
+2. Click "Create new Space"
+3. Choose "Streamlit" as the SDK
+4. Give your Space a name
+5. Choose hardware requirements (recommend at least GPU for better performance)
+6. Click "Create Space"
+7. Clone the Space repository locally
+8. Copy all files from this project to the cloned repository
+9. Push the changes to the Space repository
+## Hardware Requirements
+- **Minimum**: CPU (2 vCPUs, 16 GB RAM)
+- **Recommended**: GPU (T4 or better)
+## Models Used
+- **Document AI**: microsoft/layoutlmv2-base-uncased
+- **Text Processing**: meta-llama/Meta-Llama-3-8B-Instruct (with fallback to mistralai/Mistral-7B-Instruct-v0.2)
+- **Context Enhancement**: facebook/bart-large-cnn
+## Local Development
+1. Clone this repository
+2. Install dependencies: `pip install -r requirements.txt`
+3. Run the application: `streamlit run app.py`
+## Future Enhancements
+- Improved menu section recognition
+- Support for multiple languages
+- Physical Braille printer integration
+- Mobile app version

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+import io
+import base64
+# Import our custom modules
+from utils.image_preprocessing import preprocess_image
+from models.document_ai import extract_text_and_layout
+from models.text_processor import process_menu_text
+from models.braille_translator import text_to_braille, get_braille_metadata
+from utils.pdf_generator import create_braille_pdf, create_braille_pdf_with_comparison
+# Function to create a download link for a PDF
+def generate_pdf(original_text, braille_text, title, comparison=False):
+    """Generate a PDF file with Braille content."""
+    if comparison:
+        pdf_buffer = create_braille_pdf_with_comparison(original_text, braille_text, title)
+    else:
+        pdf_buffer = create_braille_pdf(original_text, braille_text, title)
+    return pdf_buffer
+def process_image(image, use_llm, use_context):
+    """Process the uploaded image and generate results."""
+    if image is None:
+        return "Please upload an image first.", "", "", None
+    # Convert to PIL Image if needed
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    # Preprocess the image
+    preprocessed_img = preprocess_image(image)
+    # Extract text using document AI
+    try:
+        result = extract_text_and_layout(preprocessed_img)
+        if not result.get('words', []):
+            return "No text was extracted from the image.", "", "", None
+        raw_text = ' '.join(result['words'])
+        # Process text with LLM if enabled
+        if use_llm:
+            processed_result = process_menu_text(raw_text)
+            if processed_result['success']:
+                processed_text = processed_result['structured_text']
+            else:
+                processed_text = raw_text
+        else:
+            processed_text = raw_text
+        # Translate to Braille
+        braille_result = text_to_braille(processed_text, use_context=use_context)
+        if not braille_result['success']:
+            return processed_text, "", "Braille translation failed.", None
+        braille_text = braille_result['formatted_braille']
+        # Generate metadata
+        metadata = get_braille_metadata(processed_text)
+        metadata_text = f"Translation contains {metadata['word_count']} words, {metadata['character_count']} characters, {metadata['line_count']} lines."
+        # Return results
+        return processed_text, braille_text, metadata_text, (processed_text, braille_text)
+    except Exception as e:
+        return f"Error processing image: {str(e)}", "", "", None
+def create_pdf(state, pdf_title, pdf_type):
+    """Create a PDF file for download."""
+    if state is None or len(state) != 2:
+        return None
+    original_text, braille_text = state
+    comparison = (pdf_type == "Side-by-Side Comparison")
+    pdf_buffer = generate_pdf(original_text, braille_text, pdf_title, comparison)
+    # Return the file for download
+    return pdf_buffer
+# Create the Gradio interface
+with gr.Blocks(title="Menu to Braille Converter") as demo:
+    gr.Markdown("# Menu to Braille Converter")
+    gr.Markdown("Upload a menu image to convert it to Braille text")
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Input components
+            image_input = gr.Image(type="pil", label="Upload Menu Image")
+            with gr.Row():
+                use_llm = gr.Checkbox(label="Use AI for text processing", value=True)
+                use_context = gr.Checkbox(label="Use AI for context enhancement", value=True)
+            process_button = gr.Button("Process Menu")
+        with gr.Column(scale=2):
+            # Output components
+            processed_text = gr.Textbox(label="Processed Text", lines=8)
+            braille_output = gr.Textbox(label="Braille Translation", lines=10)
+            metadata_output = gr.Markdown()
+            # Hidden state for PDF generation
+            state = gr.State()
+            # PDF download section
+            with gr.Group():
+                gr.Markdown("### Download Options")
+                pdf_title = gr.Textbox(label="PDF Title", value="Menu in Braille")
+                pdf_type = gr.Radio(
+                    ["Sequential (Text then Braille)", "Side-by-Side Comparison"],
+                    label="PDF Format",
+                    value="Sequential (Text then Braille)"
+                )
+                pdf_button = gr.Button("Generate PDF")
+                pdf_output = gr.File(label="Download PDF")
+    # Set up event handlers
+    process_button.click(
+        process_image,
+        inputs=[image_input, use_llm, use_context],
+        outputs=[processed_text, braille_output, metadata_output, state]
+    )
+    pdf_button.click(
+        create_pdf,
+        inputs=[state, pdf_title, pdf_type],
+        outputs=[pdf_output]
+    )
+    # Add examples
+    gr.Examples(
+        examples=["assets/sample_menus/menu1.jpg", "assets/sample_menus/menu2.jpg"],
+        inputs=image_input
+    )
+    # Add about section
+    with gr.Accordion("About", open=False):
+        gr.Markdown("""
+        This application converts menu images to Braille text using AI technologies:
+        - Document AI for text extraction
+        - LLMs for text processing and enhancement
+        - Braille translation with formatting
+        - PDF generation for download
+        Created as a demonstration of AI-powered accessibility tools.
+        """)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

models/braille_translator.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from transformers import pipeline
+import re
+# English to Braille mapping (Grade 1 Braille) #
+BRAILLE_MAP = {
+    'a': '⠁', 'b': '⠃', 'c': '⠉', 'd': '⠙', 'e': '⠑', 'f': '⠋', 'g': '⠛', 'h': '⠓', 'i': '⠊', 'j': '⠚',
+    'k': '⠅', 'l': '⠇', 'm': '⠍', 'n': '⠝', 'o': '⠕', 'p': '⠏', 'q': '⠟', 'r': '⠗', 's': '⠎', 't': '⠞',
+    'u': '⠥', 'v': '⠧', 'w': '⠺', 'x': '⠭', 'y': '⠽', 'z': '⠵',
+    '0': '⠚', '1': '⠁', '2': '⠃', '3': '⠉', '4': '⠙', '5': '⠑', '6': '⠋', '7': '⠛', '8': '⠓', '9': '⠊',
+    '.': '⠲', ',': '⠂', ';': '⠆', ':': '⠒', '!': '⠖', '?': '⠦', '"': '⠦', "'": '⠄', '(': '⠐⠣', ')': '⠐⠜',
+    '-': '⠤', '/': '⠌', '+': '⠬', '=': '⠐⠶', '*': '⠐⠔', '&': '⠯', '%': '⠐⠏', '#': '⠼', '@': '⠐⠁',
+    '$': '⠐⠎', '€': '⠐⠑', '£': '⠐⠇', '¥': '⠐⠽', '₹': '⠐⠗',
+    ' ': '⠀'
+}
+# Initialize the summarization pipeline for context understanding
+summarizer = None
+def get_summarizer():
+    """Get or initialize the summarization model."""
+    global summarizer
+    if summarizer is None:
+        try:
+            # Use a small, efficient model for summarization
+            summarizer = pipeline(
+                "summarization",
+                model="facebook/bart-large-cnn",
+                max_length=100,
+                min_length=30,
+                truncation=True
+            )
+        except Exception as e:
+            print(f"Error loading summarizer: {str(e)}")
+    return summarizer
+def text_to_grade1_braille(text):
+    """
+    Convert text to Grade 1 Braille.
+    Args:
+        text: Text to convert
+    Returns:
+        Braille text
+    """
+    braille_text = ""
+    for char in text.lower():
+        if char in BRAILLE_MAP:
+            braille_text += BRAILLE_MAP[char]
+        else:
+            # For characters not in our map, just keep the original
+            braille_text += char
+    return braille_text
+def text_to_braille(text, use_context=True):
+    """
+    Convert text to Braille, with optional context enhancement.
+    Args:
+        text: Text to convert to Braille
+        use_context: Whether to use AI to enhance context understanding
+    Returns:
+        Dictionary with Braille text and metadata
+    """
+    try:
+        # Basic Braille translation
+        braille_text = text_to_grade1_braille(text)
+        # If context enhancement is enabled
+        context_summary = None
+        if use_context and len(text) > 200:  # Only for longer texts
+            summarizer = get_summarizer()
+            if summarizer:
+                try:
+                    # Generate a summary to understand context
+                    summary_result = summarizer(text)
+                    if summary_result and len(summary_result) > 0:
+                        context_summary = summary_result[0]['summary_text']
+                except Exception as e:
+                    print(f"Summarization error: {str(e)}")
+        # Format the Braille text for better readability
+        formatted_braille = format_braille_text(braille_text)
+        return {
+            'braille_text': braille_text,
+            'formatted_braille': formatted_braille,
+            'context_summary': context_summary,
+            'success': True
+        }
+    except Exception as e:
+        return {
+            'braille_text': '',
+            'error': str(e),
+            'success': False
+        }
+def format_braille_text(braille_text, line_length=32):
+    """
+    Format Braille text for better readability.
+    Args:
+        braille_text: Raw Braille text
+        line_length: Maximum characters per line
+    Returns:
+        Formatted Braille text
+    """
+    # Split text by existing newlines first
+    paragraphs = braille_text.split('\n')
+    formatted_paragraphs = []
+    for paragraph in paragraphs:
+        # Skip empty paragraphs
+        if not paragraph.strip():
+            formatted_paragraphs.append('')
+            continue
+        # Word wrap to line_length
+        words = paragraph.split(' ')
+        lines = []
+        current_line = []
+        current_length = 0
+        for word in words:
+            # If adding this word exceeds line length, start a new line
+            if current_length + len(word) + (1 if current_length > 0 else 0) > line_length:
+                lines.append(' '.join(current_line))
+                current_line = [word]
+                current_length = len(word)
+            else:
+                if current_length > 0:
+                    current_length += 1  # Space
+                current_line.append(word)
+                current_length += len(word)
+        # Add the last line if not empty
+        if current_line:
+            lines.append(' '.join(current_line))
+        formatted_paragraphs.append('\n'.join(lines))
+    # Join paragraphs with double newlines
+    return '\n\n'.join(formatted_paragraphs)
+def get_braille_metadata(text):
+    """
+    Get metadata about the Braille translation.
+    Args:
+        text: Original text
+    Returns:
+        Dictionary with metadata
+    """
+    word_count = len(re.findall(r'\b\w+\b', text))
+    character_count = len(text)
+    line_count = len(text.split('\n'))
+    return {
+        'word_count': word_count,
+        'character_count': character_count,
+        'line_count': line_count
+    }

models/document_ai.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import torch
+from transformers import LayoutLMv2Processor, LayoutLMv2ForSequenceClassification
+from PIL import Image
+import numpy as np
+import pytesseract
+# Initialize the model and processor with caching
+processor = None
+model = None
+def get_document_ai_models():
+    """Get or initialize document AI models with proper caching."""
+    global processor, model
+    if processor is None:
+        processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+    if model is None:
+        model = LayoutLMv2ForSequenceClassification.from_pretrained("microsoft/layoutlmv2-base-uncased")
+    return processor, model
+def extract_text_with_tesseract(image):
+    """Extract text using Tesseract OCR."""
+    if isinstance(image, np.ndarray):
+        pil_image = Image.fromarray(image).convert("RGB")
+    else:
+        pil_image = image.convert("RGB")
+    # Use pytesseract for OCR
+    text = pytesseract.image_to_string(pil_image)
+    # Get word boxes for structure
+    boxes = pytesseract.image_to_data(pil_image, output_type=pytesseract.Output.DICT)
+    # Extract words and their positions
+    words = []
+    word_boxes = []
+    for i in range(len(boxes['text'])):
+        if boxes['text'][i].strip() != '':
+            words.append(boxes['text'][i])
+            x, y, w, h = boxes['left'][i], boxes['top'][i], boxes['width'][i], boxes['height'][i]
+            word_boxes.append([x, y, x + w, y + h])
+    return words, word_boxes
+def extract_text_and_layout(image):
+    """
+    Extract text and layout information using OCR and LayoutLMv2.
+    Args:
+        image: PIL Image object
+    Returns:
+        Dictionary with extracted text and layout information
+    """
+    # Convert numpy array to PIL Image if needed
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image).convert("RGB")
+    # Extract text using Tesseract
+    words, boxes = extract_text_with_tesseract(image)
+    # If no words were found, return empty result
+    if not words:
+        return {
+            'words': [],
+            'boxes': [],
+            'success': False
+        }
+    return {
+        'words': words,
+        'boxes': boxes,
+        'success': True
+    }

models/text_processor.py ADDED Viewed

	@@ -0,0 +1,180 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+import json
+# Model ID for a smaller model suitable for Spaces
+MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
+FALLBACK_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
+# Initialize with None - will be loaded on first use
+tokenizer = None
+text_generation_pipeline = None
+def get_text_pipeline():
+    """
+    Initialize or return the text generation pipeline.
+    Uses smaller models that work well on Spaces.
+    """
+    global tokenizer, text_generation_pipeline
+    if text_generation_pipeline is None:
+        try:
+            # Try to load primary model
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+            # Use 8-bit quantization to reduce memory usage
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_ID,
+                device_map="auto",
+                torch_dtype=torch.float16,
+                load_in_8bit=True
+            )
+            # Create the pipeline
+            text_generation_pipeline = pipeline(
+                "text-generation",
+                model=model,
+                tokenizer=tokenizer,
+                max_new_tokens=1024,
+                do_sample=True,
+                temperature=0.3,
+                top_p=0.95,
+                repetition_penalty=1.15
+            )
+        except Exception as e:
+            print(f"Error loading primary model: {str(e)}")
+            print(f"Falling back to {FALLBACK_MODEL_ID}")
+            try:
+                # Fall back to Mistral model which is more widely available
+                tokenizer = AutoTokenizer.from_pretrained(FALLBACK_MODEL_ID)
+                model = AutoModelForCausalLM.from_pretrained(
+                    FALLBACK_MODEL_ID,
+                    device_map="auto",
+                    torch_dtype=torch.float16,
+                    load_in_8bit=True
+                )
+                text_generation_pipeline = pipeline(
+                    "text-generation",
+                    model=model,
+                    tokenizer=tokenizer,
+                    max_new_tokens=1024,
+                    do_sample=True,
+                    temperature=0.3,
+                    top_p=0.95,
+                    repetition_penalty=1.15
+                )
+            except Exception as e2:
+                print(f"Error loading fallback model: {str(e2)}")
+                return None
+    return text_generation_pipeline
+def process_menu_text(raw_text):
+    """
+    Process raw OCR text using LLM to improve structure and readability.
+    Args:
+        raw_text: Raw text extracted from menu image
+    Returns:
+        Processed and structured menu text
+    """
+    # Get the pipeline
+    pipeline = get_text_pipeline()
+    if pipeline is None:
+        # Fallback to simple processing if model not available
+        return {
+            'structured_text': raw_text,
+            'menu_sections': [],
+            'success': False,
+            'error': "LLM model not available"
+        }
+    # Construct prompt for the LLM
+    prompt = f"""<|system|>
+You are an AI assistant that helps structure menu text from OCR.
+Your task is to clean up the text, correct obvious OCR errors, and structure it properly.
+Identify menu sections, items, and prices.
+Format your response as JSON with menu sections, items, and prices.
+<|user|>
+Here is the raw text extracted from a menu image:
+{raw_text}
+Please clean and structure this menu text. Format your response as JSON with the following structure:
+{{
+    "menu_sections": [
+        {{
+            "section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
+            "items": [
+                {{
+                    "name": "Item name",
+                    "description": "Item description if available",
+                    "price": "Price if available"
+                }}
+            ]
+        }}
+    ]
+}}
+<|assistant|>
+"""
+    try:
+        # Generate response from LLM
+        response = pipeline(prompt, return_full_text=False)[0]['generated_text']
+        # Extract JSON from response
+        response_text = response.strip()
+        # Find JSON in the response
+        json_start = response_text.find('{')
+        json_end = response_text.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            json_str = response_text[json_start:json_end]
+            menu_data = json.loads(json_str)
+            # Reconstruct structured text
+            structured_text = ""
+            for section in menu_data.get('menu_sections', []):
+                structured_text += f"{section.get('section_name', 'Menu Items')}\n"
+                structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
+                for item in section.get('items', []):
+                    structured_text += f"{item.get('name', '')}"
+                    if item.get('price'):
+                        structured_text += f" - {item.get('price')}"
+                    structured_text += "\n"
+                    if item.get('description'):
+                        structured_text += f"  {item.get('description')}\n"
+                    structured_text += "\n"
+                structured_text += "\n"
+            return {
+                'structured_text': structured_text,
+                'menu_data': menu_data,
+                'success': True
+            }
+        else:
+            # Fallback to simple processing
+            return {
+                'structured_text': raw_text,
+                'menu_sections': [],
+                'success': False,
+                'error': "Failed to parse LLM response as JSON"
+            }
+    except Exception as e:
+        return {
+            'structured_text': raw_text,
+            'menu_sections': [],
+            'success': False,
+            'error': str(e)
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+gradio>=3.50.0
+pillow>=9.0.0
+numpy>=1.22.0
+torch>=2.0.0
+transformers>=4.30.0
+layoutlmv2>=0.1.0
+pytesseract>=0.3.10
+opencv-python>=4.7.0
+sentence-transformers>=2.2.2
+python-braille>=0.1.0
+reportlab>=3.6.12

scripts/download_model.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import sys
+import requests
+from tqdm import tqdm
+import huggingface_hub
+# Add parent directory to path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+def download_model():
+    """
+    Download the Llama 3 model from Hugging Face.
+    """
+    model_name = "TheBloke/Llama-3-8B-Instruct-GGUF"
+    filename = "llama-3-8b-instruct.Q4_K_M.gguf"
+    # Create models directory if it doesn't exist
+    models_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "models")
+    os.makedirs(models_dir, exist_ok=True)
+    model_path = os.path.join(models_dir, filename)
+    if os.path.exists(model_path):
+        print(f"Model already exists at {model_path}")
+        return model_path
+    print(f"Downloading {filename} from {model_name}...")
+    try:
+        # Download using huggingface_hub
+        huggingface_hub.hf_hub_download(
+            repo_id=model_name,
+            filename=filename,
+            local_dir=models_dir,
+            local_dir_use_symlinks=False
+        )
+        print(f"Model downloaded successfully to {model_path}")
+        return model_path
+    except Exception as e:
+        print(f"Error downloading model: {str(e)}")
+        print("\nManual download instructions:")
+        print(f"1. Go to https://huggingface.co/{model_name}/tree/main")
+        print(f"2. Download the file {filename}")
+        print(f"3. Place it in the models directory at {models_dir}")
+        return None
+if __name__ == "__main__":
+    download_model()

tests/test_braille.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os
+import sys
+import time
+# Add the parent directory to the path so we can import our modules
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from models.braille_translator import text_to_braille, get_braille_metadata
+def test_braille_translation(text):
+    """
+    Test Braille translation on a given text.
+    Args:
+        text: Text to translate to Braille
+    Returns:
+        Dictionary with test results
+    """
+    start_time = time.time()
+    # Translate to Braille
+    try:
+        result = text_to_braille(text, use_context=True)
+        success = result['success']
+        braille_text = result.get('formatted_braille', '')
+        error = result.get('error', None)
+    except Exception as e:
+        success = False
+        braille_text = ''
+        error = str(e)
+    end_time = time.time()
+    # Get metadata
+    metadata = get_braille_metadata(text)
+    # Compile results
+    test_results = {
+        'original_text': text,
+        'success': success,
+        'processing_time': end_time - start_time,
+        'braille_text': braille_text[:100] + '...' if len(braille_text) > 100 else braille_text,
+        'word_count': metadata['word_count'],
+        'character_count': metadata['character_count'],
+        'line_count': metadata['line_count']
+    }
+    if not success:
+        test_results['error'] = error
+    return test_results
+def run_braille_tests():
+    """
+    Run tests on sample menu texts.
+    Returns:
+        List of test results
+    """
+    # Sample menu texts
+    sample_texts = [
+        # Simple menu item
+        "Cheeseburger - $10.99\nServed with fries and a pickle.",
+        # Menu section
+        "APPETIZERS\n-----------\nMozzarella Sticks - $7.99\nLoaded Nachos - $9.99\nBuffalo Wings - $12.99",
+        # Complex menu with formatting
+        """MAIN COURSE
+        -------------
+        Grilled Salmon - $18.99
+        Fresh Atlantic salmon served with seasonal vegetables and rice pilaf.
+        Filet Mignon - $24.99
+        8oz center-cut filet served with mashed potatoes and asparagus.
+        Vegetable Pasta - $14.99
+        Penne pasta with seasonal vegetables in a creamy garlic sauce."""
+    ]
+    results = []
+    for i, text in enumerate(sample_texts):
+        print(f"\nTesting sample {i+1}...")
+        result = test_braille_translation(text)
+        results.append(result)
+        # Print progress
+        status = "SUCCESS" if result['success'] else "FAILED"
+        print(f"Sample {i+1}: {status}")
+        print(f"Words: {result['word_count']}, Time: {result['processing_time']:.2f}s")
+        print(f"Braille sample: {result['braille_text'][:50]}...")
+    return results
+if __name__ == "__main__":
+    print("Testing Braille translation functionality...")
+    results = run_braille_tests()
+    # Print summary
+    success_count = sum(1 for r in results if r['success'])
+    print(f"\nSummary: {success_count}/{len(results)} tests passed")
+    if results:
+        avg_time = sum(r['processing_time'] for r in results) / len(results)
+        print(f"Average processing time: {avg_time:.2f} seconds")

tests/test_ocr.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import sys
+import time
+from PIL import Image
+import numpy as np
+# Add the parent directory to the path so we can import our modules
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from utils.image_preprocessing import preprocess_image
+from models.document_ai import extract_text_and_layout
+def test_menu_extraction(image_path):
+    """
+    Test the OCR extraction on a single menu image.
+    Args:
+        image_path: Path to the menu image
+    Returns:
+        Dictionary with test results
+    """
+    start_time = time.time()
+    # Load and preprocess image
+    image = Image.open(image_path)
+    preprocessed_img = preprocess_image(image)
+    # Extract text
+    try:
+        result = extract_text_and_layout(preprocessed_img)
+        extracted_text = ' '.join(result['words']) if 'words' in result else ''
+        success = True
+    except Exception as e:
+        extracted_text = ''
+        success = False
+        error = str(e)
+    end_time = time.time()
+    # Compile results
+    test_results = {
+        'image_path': image_path,
+        'success': success,
+        'processing_time': end_time - start_time,
+        'extracted_text': extracted_text,
+        'text_length': len(extracted_text),
+        'word_count': len(extracted_text.split()) if extracted_text else 0
+    }
+    if not success:
+        test_results['error'] = error
+    return test_results
+def run_batch_test(image_dir):
+    """
+    Run tests on all images in a directory.
+    Args:
+        image_dir: Directory containing menu images
+    Returns:
+        List of test results
+    """
+    results = []
+    for filename in os.listdir(image_dir):
+        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
+            image_path = os.path.join(image_dir, filename)
+            result = test_menu_extraction(image_path)
+            results.append(result)
+            # Print progress
+            status = "SUCCESS" if result['success'] else "FAILED"
+            print(f"{filename}: {status} - {result['word_count']} words extracted")
+    return results
+if __name__ == "__main__":
+    # Test with sample menus in the assets directory
+    sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+                             "assets", "sample_menus")
+    if not os.path.exists(sample_dir):
+        print(f"Sample directory not found: {sample_dir}")
+        print("Creating directory and downloading sample images...")
+        os.makedirs(sample_dir, exist_ok=True)
+        # You would add code here to download sample images
+        # For now, just create a note to add sample images manually
+        with open(os.path.join(sample_dir, "README.txt"), "w") as f:
+            f.write("Add sample menu images to this directory for testing.")
+    results = run_batch_test(sample_dir)
+    # Print summary
+    success_count = sum(1 for r in results if r['success'])
+    print(f"\nSummary: {success_count}/{len(results)} tests passed")
+    if results:
+        avg_words = sum(r['word_count'] for r in results) / len(results)
+        avg_time = sum(r['processing_time'] for r in results) / len(results)
+        print(f"Average words extracted: {avg_words:.1f}")
+        print(f"Average processing time: {avg_time:.2f} seconds")

utils/__init__.py ADDED Viewed

File without changes

utils/braille_display.py ADDED Viewed

	@@ -0,0 +1,115 @@

+def text_to_unicode_braille(braille_text):
+    """
+    Convert Braille dots notation to Unicode Braille symbols.
+    Args:
+        braille_text: Braille text in dots notation
+    Returns:
+        Text with Unicode Braille symbols
+    """
+    # Mapping from Braille dots to Unicode Braille patterns
+    # Unicode Braille patterns start at U+2800 (⠀)
+    unicode_base = 0x2800
+    # Convert each Braille character to its Unicode equivalent
+    unicode_braille = ""
+    for char in braille_text:
+        # Check if the character is a standard Braille pattern
+        if char in "⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿":
+            unicode_braille += char
+        else:
+            # For non-Braille characters, keep them as is
+            unicode_braille += char
+    return unicode_braille
+def create_braille_html(braille_text):
+    """
+    Create HTML to display Braille with proper styling.
+    Args:
+        braille_text: Braille text (either in dots or Unicode)
+    Returns:
+        HTML string for displaying Braille
+    """
+    # Convert to Unicode Braille if not already
+    unicode_braille = text_to_unicode_braille(braille_text)
+    # Replace newlines with <br> tags before using in f-string
+    formatted_text = unicode_braille.replace('\n', '<br>')
+    # Create HTML with proper styling
+    html = f"""
+    <div style="font-family: 'Courier New', monospace; font-size: 20px; line-height: 1.5;
+                background-color: #f5f5f5; padding: 15px; border-radius: 5px;">
+        {formatted_text}
+    </div>
+    """
+    return html
+def create_braille_comparison(text, braille_text):
+    """
+    Create a side-by-side comparison of text and its Braille representation.
+    Args:
+        text: Original text
+        braille_text: Braille translation
+    Returns:
+        HTML string for displaying the comparison
+    """
+    # Convert to Unicode Braille
+    unicode_braille = text_to_unicode_braille(braille_text)
+    # Split into lines
+    text_lines = text.split('\n')
+    braille_lines = unicode_braille.split('\n')
+    # Ensure both lists have the same length
+    max_lines = max(len(text_lines), len(braille_lines))
+    text_lines = text_lines + [''] * (max_lines - len(text_lines))
+    braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
+    # Create HTML table for comparison
+    html = """
+    <style>
+        .braille-table {
+            width: 100%;
+            border-collapse: collapse;
+        }
+        .braille-table td {
+            padding: 8px;
+            vertical-align: top;
+            border-bottom: 1px solid #ddd;
+        }
+        .braille-text {
+            font-family: 'Courier New', monospace;
+            font-size: 20px;
+            background-color: #f5f5f5;
+        }
+        .original-text {
+            font-family: Arial, sans-serif;
+        }
+    </style>
+    <table class="braille-table">
+        <tr>
+            <th>Original Text</th>
+            <th>Braille Representation</th>
+        </tr>
+    """
+    for i in range(max_lines):
+        html += f"""
+        <tr>
+            <td class="original-text">{text_lines[i]}</td>
+            <td class="braille-text">{braille_lines[i]}</td>
+        </tr>
+        """
+    html += "</table>"
+    return html

utils/image_preprocessing.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import cv2
+import numpy as np
+from PIL import Image
+def preprocess_image(image, target_size=(1000, 1000)):
+    """
+    Preprocess image for document analysis.
+    Args:
+        image: PIL Image object
+        target_size: Tuple of (width, height) to resize to
+    Returns:
+        Preprocessed image as numpy array
+    """
+    # Convert PIL Image to numpy array if needed
+    if isinstance(image, Image.Image):
+        img_array = np.array(image)
+    else:
+        img_array = image
+    # Convert to RGB if grayscale
+    if len(img_array.shape) == 2:
+        img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
+    elif img_array.shape[2] == 4:
+        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
+    # Resize image
+    img_array = cv2.resize(img_array, target_size)
+    # Enhance contrast
+    lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
+    l, a, b = cv2.split(lab)
+    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+    cl = clahe.apply(l)
+    enhanced_lab = cv2.merge((cl, a, b))
+    enhanced_img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
+    return enhanced_img

utils/pdf_generator.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import os
+import tempfile
+from reportlab.lib.pagesizes import letter
+from reportlab.lib import colors
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
+from reportlab.pdfbase import pdfmetrics
+from reportlab.pdfbase.ttfonts import TTFont
+import io
+# Try to register a font that supports Braille Unicode characters
+try:
+    # Check for common Braille fonts
+    font_paths = [
+        "DejaVuSans.ttf",  # Common on Linux
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+        "/System/Library/Fonts/Arial Unicode.ttf",  # Mac
+        "C:\\Windows\\Fonts\\arial.ttf"  # Windows
+    ]
+    font_registered = False
+    for font_path in font_paths:
+        if os.path.exists(font_path):
+            pdfmetrics.registerFont(TTFont('BrailleFont', font_path))
+            font_registered = True
+            break
+    if not font_registered:
+        # Use default font if none of the above are found
+        print("No suitable font found for Braille. Using default font.")
+except Exception as e:
+    print(f"Error registering font: {str(e)}")
+def create_braille_pdf(original_text, braille_text, title="Menu in Braille"):
+    """
+    Create a PDF file with original text and its Braille translation.
+    Args:
+        original_text: Original text content
+        braille_text: Braille translation
+        title: PDF title
+    Returns:
+        BytesIO object containing the PDF
+    """
+    # Create a BytesIO object to store the PDF
+    buffer = io.BytesIO()
+    # Create the PDF document
+    doc = SimpleDocTemplate(
+        buffer,
+        pagesize=letter,
+        rightMargin=72,
+        leftMargin=72,
+        topMargin=72,
+        bottomMargin=72
+    )
+    # Define styles
+    styles = getSampleStyleSheet()
+    title_style = styles['Title']
+    heading_style = styles['Heading2']
+    normal_style = styles['Normal']
+    # Create a custom style for Braille text
+    braille_style = ParagraphStyle(
+        'Braille',
+        parent=normal_style,
+        fontName='BrailleFont' if font_registered else 'Helvetica',
+        fontSize=14,
+        leading=18,
+        spaceAfter=12
+    )
+    # Create the content
+    content = []
+    # Add title
+    content.append(Paragraph(title, title_style))
+    content.append(Spacer(1, 12))
+    # Add original text section
+    content.append(Paragraph("Original Text", heading_style))
+    content.append(Spacer(1, 6))
+    # Split original text by lines and add each as a paragraph
+    for line in original_text.split('\n'):
+        if line.strip():
+            content.append(Paragraph(line, normal_style))
+        else:
+            content.append(Spacer(1, 12))
+    content.append(Spacer(1, 24))
+    # Add Braille section
+    content.append(Paragraph("Braille Translation", heading_style))
+    content.append(Spacer(1, 6))
+    # Split Braille text by lines and add each as a paragraph
+    for line in braille_text.split('\n'):
+        if line.strip():
+            content.append(Paragraph(line, braille_style))
+        else:
+            content.append(Spacer(1, 12))
+    # Build the PDF
+    doc.build(content)
+    # Reset buffer position to the beginning
+    buffer.seek(0)
+    return buffer
+def create_braille_pdf_with_comparison(original_text, braille_text, title="Menu in Braille"):
+    """
+    Create a PDF file with side-by-side comparison of original text and Braille.
+    Args:
+        original_text: Original text content
+        braille_text: Braille translation
+        title: PDF title
+    Returns:
+        BytesIO object containing the PDF
+    """
+    # Create a BytesIO object to store the PDF
+    buffer = io.BytesIO()
+    # Create the PDF document
+    doc = SimpleDocTemplate(
+        buffer,
+        pagesize=letter,
+        rightMargin=72,
+        leftMargin=72,
+        topMargin=72,
+        bottomMargin=72
+    )
+    # Define styles
+    styles = getSampleStyleSheet()
+    title_style = styles['Title']
+    heading_style = styles['Heading2']
+    normal_style = styles['Normal']
+    # Create a custom style for Braille text
+    braille_style = ParagraphStyle(
+        'Braille',
+        parent=normal_style,
+        fontName='BrailleFont' if font_registered else 'Helvetica',
+        fontSize=14,
+        leading=18
+    )
+    # Create the content
+    content = []
+    # Add title
+    content.append(Paragraph(title, title_style))
+    content.append(Spacer(1, 12))
+    # Split text into lines
+    original_lines = original_text.split('\n')
+    braille_lines = braille_text.split('\n')
+    # Ensure both lists have the same length
+    max_lines = max(len(original_lines), len(braille_lines))
+    original_lines = original_lines + [''] * (max_lines - len(original_lines))
+    braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
+    # Create a table for side-by-side comparison
+    table_data = [
+        [Paragraph("Original Text", heading_style), Paragraph("Braille Translation", heading_style)]
+    ]
+    # Add each line as a row in the table
+    for i in range(max_lines):
+        original_para = Paragraph(original_lines[i], normal_style) if original_lines[i].strip() else Spacer(1, 12)
+        braille_para = Paragraph(braille_lines[i], braille_style) if braille_lines[i].strip() else Spacer(1, 12)
+        table_data.append([original_para, braille_para])
+    # Create the table
+    table = Table(table_data, colWidths=[doc.width/2.0-12, doc.width/2.0-12])
+    # Style the table
+    table.setStyle(TableStyle([
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('GRID', (0, 0), (-1, 0), 1, colors.black),
+        ('BOX', (0, 0), (-1, -1), 1, colors.black),
+        ('BACKGROUND', (0, 0), (1, 0), colors.lightgrey)
+    ]))
+    content.append(table)
+    # Build the PDF
+    doc.build(content)
+    # Reset buffer position to the beginning
+    buffer.seek(0)
+    return buffer