import streamlit as st from PIL import Image import pytesseract import io import os # Try to set Tesseract path if available TESSERACT_PATH = "/usr/bin/tesseract" if os.path.exists(TESSERACT_PATH): pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH else: pytesseract.pytesseract.tesseract_cmd = "tesseract" # fallback st.set_page_config(page_title="OCR – Image to Text", layout="centered") st.title("🖼️ OCR – Image to Text") st.write("Upload a PNG or JPG image to extract text.") uploaded_file = st.file_uploader("Choose an image file", type=["png", "jpg", "jpeg"]) if uploaded_file: try: image = Image.open(io.BytesIO(uploaded_file.read())) st.image(image, caption="Preview", use_column_width=True) if st.button("Extract Text"): with st.spinner("Running OCR..."): try: text = pytesseract.image_to_string(image) st.subheader("Extracted Text") st.text_area("", text, height=300) except pytesseract.TesseractNotFoundError: st.error("⚠️ Tesseract is not installed on the server. Please check your `apt.txt` or switch SDK to Docker.") except Exception as e: st.error(f"❌ Failed to process image: {e}") else: st.info("Please upload an image to begin.")