Spaces:

Chamin09
/

BrailleMenuGenV2

Running

File size: 5,664 Bytes

93c4f75

import gradio as gr
import numpy as np
from PIL import Image
import io
import base64

# Import our custom modules
from utils.image_preprocessing import preprocess_image
from models.document_ai import extract_text_and_layout
from models.text_processor import process_menu_text
from models.braille_translator import text_to_braille, get_braille_metadata
from utils.pdf_generator import create_braille_pdf, create_braille_pdf_with_comparison

# Function to create a download link for a PDF
def generate_pdf(original_text, braille_text, title, comparison=False):
    """Generate a PDF file with Braille content."""
    if comparison:
        pdf_buffer = create_braille_pdf_with_comparison(original_text, braille_text, title)
    else:
        pdf_buffer = create_braille_pdf(original_text, braille_text, title)
    
    return pdf_buffer

def process_image(image, use_llm, use_context):
    """Process the uploaded image and generate results."""
    if image is None:
        return "Please upload an image first.", "", "", None
    
    # Convert to PIL Image if needed
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    
    # Preprocess the image
    preprocessed_img = preprocess_image(image)
    
    # Extract text using document AI
    try:
        result = extract_text_and_layout(preprocessed_img)
        
        if not result.get('words', []):
            return "No text was extracted from the image.", "", "", None
        
        raw_text = ' '.join(result['words'])
        
        # Process text with LLM if enabled
        if use_llm:
            processed_result = process_menu_text(raw_text)
            
            if processed_result['success']:
                processed_text = processed_result['structured_text']
            else:
                processed_text = raw_text
        else:
            processed_text = raw_text
        
        # Translate to Braille
        braille_result = text_to_braille(processed_text, use_context=use_context)
        
        if not braille_result['success']:
            return processed_text, "", "Braille translation failed.", None
        
        braille_text = braille_result['formatted_braille']
        
        # Generate metadata
        metadata = get_braille_metadata(processed_text)
        metadata_text = f"Translation contains {metadata['word_count']} words, {metadata['character_count']} characters, {metadata['line_count']} lines."
        
        # Return results
        return processed_text, braille_text, metadata_text, (processed_text, braille_text)
    
    except Exception as e:
        return f"Error processing image: {str(e)}", "", "", None

def create_pdf(state, pdf_title, pdf_type):
    """Create a PDF file for download."""
    if state is None or len(state) != 2:
        return None
    
    original_text, braille_text = state
    comparison = (pdf_type == "Side-by-Side Comparison")
    
    pdf_buffer = generate_pdf(original_text, braille_text, pdf_title, comparison)
    
    # Return the file for download
    return pdf_buffer

# Create the Gradio interface
with gr.Blocks(title="Menu to Braille Converter") as demo:
    gr.Markdown("# Menu to Braille Converter")
    gr.Markdown("Upload a menu image to convert it to Braille text")
    
    with gr.Row():
        with gr.Column(scale=1):
            # Input components
            image_input = gr.Image(type="pil", label="Upload Menu Image")
            
            with gr.Row():
                use_llm = gr.Checkbox(label="Use AI for text processing", value=True)
                use_context = gr.Checkbox(label="Use AI for context enhancement", value=True)
            
            process_button = gr.Button("Process Menu")
        
        with gr.Column(scale=2):
            # Output components
            processed_text = gr.Textbox(label="Processed Text", lines=8)
            braille_output = gr.Textbox(label="Braille Translation", lines=10)
            metadata_output = gr.Markdown()
            
            # Hidden state for PDF generation
            state = gr.State()
            
            # PDF download section
            with gr.Group():
                gr.Markdown("### Download Options")
                pdf_title = gr.Textbox(label="PDF Title", value="Menu in Braille")
                pdf_type = gr.Radio(
                    ["Sequential (Text then Braille)", "Side-by-Side Comparison"],
                    label="PDF Format",
                    value="Sequential (Text then Braille)"
                )
                pdf_button = gr.Button("Generate PDF")
                pdf_output = gr.File(label="Download PDF")
    
    # Set up event handlers
    process_button.click(
        process_image,
        inputs=[image_input, use_llm, use_context],
        outputs=[processed_text, braille_output, metadata_output, state]
    )
    
    pdf_button.click(
        create_pdf,
        inputs=[state, pdf_title, pdf_type],
        outputs=[pdf_output]
    )
    
    # Add examples
    gr.Examples(
        examples=["assets/sample_menus/menu1.jpg", "assets/sample_menus/menu2.jpg"],
        inputs=image_input
    )
    
    # Add about section
    with gr.Accordion("About", open=False):
        gr.Markdown("""
        This application converts menu images to Braille text using AI technologies:
        
        - Document AI for text extraction
        - LLMs for text processing and enhancement
        - Braille translation with formatting
        - PDF generation for download
        
        Created as a demonstration of AI-powered accessibility tools.
        """)

# Launch the app
if __name__ == "__main__":
    demo.launch()