Chamin09's picture
initial commit
93c4f75 verified
raw
history blame
5.66 kB
import gradio as gr
import numpy as np
from PIL import Image
import io
import base64
# Import our custom modules
from utils.image_preprocessing import preprocess_image
from models.document_ai import extract_text_and_layout
from models.text_processor import process_menu_text
from models.braille_translator import text_to_braille, get_braille_metadata
from utils.pdf_generator import create_braille_pdf, create_braille_pdf_with_comparison
# Function to create a download link for a PDF
def generate_pdf(original_text, braille_text, title, comparison=False):
"""Generate a PDF file with Braille content."""
if comparison:
pdf_buffer = create_braille_pdf_with_comparison(original_text, braille_text, title)
else:
pdf_buffer = create_braille_pdf(original_text, braille_text, title)
return pdf_buffer
def process_image(image, use_llm, use_context):
"""Process the uploaded image and generate results."""
if image is None:
return "Please upload an image first.", "", "", None
# Convert to PIL Image if needed
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
# Preprocess the image
preprocessed_img = preprocess_image(image)
# Extract text using document AI
try:
result = extract_text_and_layout(preprocessed_img)
if not result.get('words', []):
return "No text was extracted from the image.", "", "", None
raw_text = ' '.join(result['words'])
# Process text with LLM if enabled
if use_llm:
processed_result = process_menu_text(raw_text)
if processed_result['success']:
processed_text = processed_result['structured_text']
else:
processed_text = raw_text
else:
processed_text = raw_text
# Translate to Braille
braille_result = text_to_braille(processed_text, use_context=use_context)
if not braille_result['success']:
return processed_text, "", "Braille translation failed.", None
braille_text = braille_result['formatted_braille']
# Generate metadata
metadata = get_braille_metadata(processed_text)
metadata_text = f"Translation contains {metadata['word_count']} words, {metadata['character_count']} characters, {metadata['line_count']} lines."
# Return results
return processed_text, braille_text, metadata_text, (processed_text, braille_text)
except Exception as e:
return f"Error processing image: {str(e)}", "", "", None
def create_pdf(state, pdf_title, pdf_type):
"""Create a PDF file for download."""
if state is None or len(state) != 2:
return None
original_text, braille_text = state
comparison = (pdf_type == "Side-by-Side Comparison")
pdf_buffer = generate_pdf(original_text, braille_text, pdf_title, comparison)
# Return the file for download
return pdf_buffer
# Create the Gradio interface
with gr.Blocks(title="Menu to Braille Converter") as demo:
gr.Markdown("# Menu to Braille Converter")
gr.Markdown("Upload a menu image to convert it to Braille text")
with gr.Row():
with gr.Column(scale=1):
# Input components
image_input = gr.Image(type="pil", label="Upload Menu Image")
with gr.Row():
use_llm = gr.Checkbox(label="Use AI for text processing", value=True)
use_context = gr.Checkbox(label="Use AI for context enhancement", value=True)
process_button = gr.Button("Process Menu")
with gr.Column(scale=2):
# Output components
processed_text = gr.Textbox(label="Processed Text", lines=8)
braille_output = gr.Textbox(label="Braille Translation", lines=10)
metadata_output = gr.Markdown()
# Hidden state for PDF generation
state = gr.State()
# PDF download section
with gr.Group():
gr.Markdown("### Download Options")
pdf_title = gr.Textbox(label="PDF Title", value="Menu in Braille")
pdf_type = gr.Radio(
["Sequential (Text then Braille)", "Side-by-Side Comparison"],
label="PDF Format",
value="Sequential (Text then Braille)"
)
pdf_button = gr.Button("Generate PDF")
pdf_output = gr.File(label="Download PDF")
# Set up event handlers
process_button.click(
process_image,
inputs=[image_input, use_llm, use_context],
outputs=[processed_text, braille_output, metadata_output, state]
)
pdf_button.click(
create_pdf,
inputs=[state, pdf_title, pdf_type],
outputs=[pdf_output]
)
# Add examples
gr.Examples(
examples=["assets/sample_menus/menu1.jpg", "assets/sample_menus/menu2.jpg"],
inputs=image_input
)
# Add about section
with gr.Accordion("About", open=False):
gr.Markdown("""
This application converts menu images to Braille text using AI technologies:
- Document AI for text extraction
- LLMs for text processing and enhancement
- Braille translation with formatting
- PDF generation for download
Created as a demonstration of AI-powered accessibility tools.
""")
# Launch the app
if __name__ == "__main__":
demo.launch()