from transformers import pipeline
import gradio as gr

# Load the model
pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")

# Function to process the image and extract text
def extract_text(image):
    # Pass the image to the pipeline
    result = pipe(image)
    # Return the text from the image
    return result[0]['generated_text'] if result else "No text detected"

# Define the Gradio interface
iface = gr.Interface(
    fn=extract_text,                    # The function that processes the image
    inputs=gr.Image(type="pil"),        # Input is an image (PIL format)
    outputs="text",                     # Output is text
    title="OCR with Donut-CORD Model",  # Title of the interface
    description="Upload an image to extract text using the OCR Donut-CORD model.",
)

# Launch the app
if __name__ == "__main__":
    iface.launch()