import gradio as gr import numpy as np from PIL import Image import io import base64 # Import our custom modules from utils.image_preprocessing import preprocess_image from models.document_ai import extract_text_and_layout from models.text_processor import process_menu_text from models.braille_translator import text_to_braille, get_braille_metadata from utils.pdf_generator import create_braille_pdf, create_braille_pdf_with_comparison def generate_pdf(original_text, braille_text, title, comparison=False): """Generate a PDF file with Braille content.""" try: if comparison: pdf_buffer = create_braille_pdf_with_comparison(original_text, braille_text, title) else: pdf_buffer = create_braille_pdf(original_text, braille_text, title) return pdf_buffer except Exception as e: print(f"Error in generate_pdf: {str(e)}") raise # Function to create a download link for a PDF def generate_pdf1(original_text, braille_text, title, comparison=False): """Generate a PDF file with Braille content.""" if comparison: pdf_buffer = create_braille_pdf_with_comparison(original_text, braille_text, title) else: pdf_buffer = create_braille_pdf(original_text, braille_text, title) return pdf_buffer def process_image_v2(image, use_llm, use_context): """Process the uploaded image and generate results.""" if image is None: return "Please upload an image first.", "", "", None # Convert to PIL Image if needed if isinstance(image, np.ndarray): image = Image.fromarray(image) # Preprocess the image preprocessed_img = preprocess_image(image) # Extract text using document AI try: result = extract_text_and_layout(preprocessed_img) if not result.get('words', []): return "No text was extracted from the image.", "", "", None raw_text = ' '.join(result['words']) # Process text with LLM if enabled if use_llm: processed_result = process_menu_text(raw_text) if processed_result['success']: processed_text = processed_result['structured_text'] else: processed_text = raw_text else: processed_text = raw_text # Translate to Braille braille_result = text_to_braille(processed_text, use_context=use_context) if not braille_result['success']: return processed_text, "", "Braille translation failed.", None braille_text = braille_result['formatted_braille'] # Generate metadata metadata = get_braille_metadata(processed_text) metadata_text = f"Translation contains {metadata['word_count']} words, {metadata['character_count']} characters, {metadata['line_count']} lines." # Store both Unicode and ASCII versions for later use state_data = { 'original_text': processed_text, 'braille_text': braille_text, 'ascii_braille': braille_result.get('formatted_ascii', '') } # Return results return processed_text, braille_text, metadata_text, state_data except Exception as e: return f"Error processing image: {str(e)}", "", "", None def process_image(image, use_llm, use_context): """Process the uploaded image and generate results.""" if image is None: return "Please upload an image first.", "", "", None # Convert to PIL Image if needed if isinstance(image, np.ndarray): image = Image.fromarray(image) # Preprocess the image preprocessed_img = preprocess_image(image) # Extract text using document AI try: result = extract_text_and_layout(preprocessed_img) if not result.get('words', []): return "No text was extracted from the image.", "", "", None raw_text = ' '.join(result['words']) # Process text with LLM if enabled if use_llm: processed_result = process_menu_text(raw_text) if processed_result['success']: processed_text = processed_result['structured_text'] else: processed_text = raw_text else: processed_text = raw_text # Translate to Braille braille_result = text_to_braille(processed_text, use_context=use_context) if not braille_result['success']: return processed_text, "", "Braille translation failed.", None braille_text = braille_result['formatted_braille'] # Generate metadata metadata = get_braille_metadata(processed_text) metadata_text = f"Translation contains {metadata['word_count']} words, {metadata['character_count']} characters, {metadata['line_count']} lines." # Return results return processed_text, braille_text, metadata_text, (processed_text, braille_text) except Exception as e: return f"Error processing image: {str(e)}", "", "", None def create_pdf_v2(state, pdf_title, pdf_type): """Create a PDF file for download.""" if state is None: return None # Extract data from state try: original_text = state['original_text'] ascii_braille = state['ascii_braille'] # If ASCII version is not available, use the Unicode version if not ascii_braille: ascii_braille = state['braille_text'] except: # Fallback for backward compatibility if isinstance(state, tuple) and len(state) == 2: original_text, braille_text = state ascii_braille = braille_text else: return None comparison = (pdf_type == "Side-by-Side Comparison") try: pdf_buffer = generate_pdf(original_text, ascii_braille, pdf_title, comparison) # Create a temporary file to save the PDF temp_file_path = f"/tmp/{pdf_title.replace(' ', '_').lower()}.pdf" # Write the buffer to a file with open(temp_file_path, "wb") as f: f.write(pdf_buffer.getvalue()) return temp_file_path except Exception as e: print(f"Error generating PDF: {str(e)}") return None def create_pdf(state, pdf_title, pdf_type): """Create a PDF file for download.""" if state is None: return None # Extract data from state try: original_text = state['original_text'] braille_text = state['braille_text'] # Use Unicode Braille text except: # Fallback for backward compatibility if isinstance(state, tuple) and len(state) == 2: original_text, braille_text = state else: return None comparison = (pdf_type == "Side-by-Side Comparison") try: pdf_buffer = generate_pdf(original_text, braille_text, pdf_title, comparison) # Create a temporary file to save the PDF temp_file_path = f"/tmp/{pdf_title.replace(' ', '_').lower()}.pdf" # Write the buffer to a file with open(temp_file_path, "wb") as f: f.write(pdf_buffer.getvalue()) return temp_file_path except Exception as e: print(f"Error generating PDF: {str(e)}") return None def create_pdf_v1_working(state, pdf_title, pdf_type): """Create a PDF file for download.""" if state is None or len(state) != 2: return None original_text, braille_text = state # Get ASCII representation for PDF try: braille_result = text_to_braille(original_text, use_context=False) ascii_braille = braille_result.get('formatted_ascii', braille_text) except: ascii_braille = braille_text comparison = (pdf_type == "Side-by-Side Comparison") try: pdf_buffer = generate_pdf(original_text, ascii_braille, pdf_title, comparison) # Create a temporary file to save the PDF temp_file_path = f"/tmp/{pdf_title.replace(' ', '_').lower()}.pdf" # Write the buffer to a file with open(temp_file_path, "wb") as f: f.write(pdf_buffer.getvalue()) return temp_file_path except Exception as e: print(f"Error generating PDF: {str(e)}") return None def create_pdf2(state, pdf_title, pdf_type): """Create a PDF file for download.""" if state is None or len(state) != 2: return None original_text, braille_text = state comparison = (pdf_type == "Side-by-Side Comparison") try: pdf_buffer = generate_pdf(original_text, braille_text, pdf_title, comparison) # Create a temporary file to save the PDF temp_file_path = f"/tmp/{pdf_title.replace(' ', '_').lower()}.pdf" # Write the buffer to a file with open(temp_file_path, "wb") as f: f.write(pdf_buffer.getvalue()) return temp_file_path except Exception as e: print(f"Error generating PDF: {str(e)}") return None def create_pdf1(state, pdf_title, pdf_type): """Create a PDF file for download.""" if state is None or len(state) != 2: return None original_text, braille_text = state comparison = (pdf_type == "Side-by-Side Comparison") pdf_buffer = generate_pdf(original_text, braille_text, pdf_title, comparison) # Return the file for download return pdf_buffer # Create the Gradio interface with gr.Blocks(title="English Menu to Braille Menu Converter") as demo: gr.Markdown("# English Menu to Braille Menu") gr.Markdown("Upload a menu image to convert it to Braille text") with gr.Row(): with gr.Column(scale=1): # Input components image_input = gr.Image(type="pil", label="Upload Menu Image") with gr.Row(): use_llm = gr.Checkbox(label="Use AI for text processing", value=True) use_context = gr.Checkbox(label="Use AI for context enhancement", value=True) process_button = gr.Button("Process Menu") with gr.Column(scale=2): # Output components processed_text = gr.Textbox(label="Processed Text", lines=8) braille_output = gr.Textbox(label="Braille Translation", lines=10) metadata_output = gr.Markdown() # Hidden state for PDF generation state = gr.State() # PDF download section with gr.Group(): gr.Markdown("### Download Options") pdf_title = gr.Textbox(label="PDF Title", value="Menu in Braille") pdf_type = gr.Radio( ["Sequential (Text then Braille)", "Side-by-Side Comparison"], label="PDF Format", value="Sequential (Text then Braille)" ) pdf_button = gr.Button("Generate PDF") pdf_output = gr.File(label="Download PDF") # Set up event handlers process_button.click( process_image, inputs=[image_input, use_llm, use_context], outputs=[processed_text, braille_output, metadata_output, state] ) pdf_button.click( create_pdf, inputs=[state, pdf_title, pdf_type], outputs=[pdf_output] ) # Add examples gr.Examples( examples=["assets/sample_menus/menu1.jpg", "assets/sample_menus/menu2.jpg"], inputs=image_input ) # Add about section with gr.Accordion("About", open=False): gr.Markdown(""" This application converts menu images to Braille text using AI technologies: - Document AI for text extraction - LLMs for text processing and enhancement - Braille translation with formatting - PDF generation for download Created as a demonstration of AI-powered accessibility tools. """) # Launch the app if __name__ == "__main__": demo.launch()