Spaces:

ZennyKenny
/

note-to-text

Running on Zero

File size: 1,630 Bytes

44342ba
 
 
fd11c5a
3fa52bd
44342ba
 
d4bf218
 
44342ba
 
817e54c
fd11c5a
817e54c
fd11c5a
 
 
817e54c
fd11c5a
 
3fa52bd
 
 
 
 
fd11c5a
 
 
 
 
 
817e54c
fd11c5a
 
817e54c
 
fd11c5a
817e54c
44342ba
 
 
 
 
 
 
 
 
 
817e54c

import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch
import matplotlib.pyplot as plt

# Load TrOCR model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")

def recognize_text(image):
    try:
        # Convert image to RGB if it's not already
        image = image.convert("RGB")
        print("Image converted to RGB.")

        # Preprocess the image
        pixel_values = processor(images=image, return_tensors="pt").pixel_values
        print("Image preprocessed. Pixel values shape:", pixel_values.shape)

        # Visualize preprocessed image
        plt.imshow(pixel_values.squeeze().permute(1, 2, 0))
        plt.title("Preprocessed Image")
        plt.show()

        # Generate text from the image
        with torch.no_grad():  # Disable gradient calculation for inference
            generated_ids = model.generate(pixel_values)
            print("Generated IDs:", generated_ids)

        # Decode the generated IDs to text
        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        print("Decoded text:", text)

        return text
    except Exception as e:
        print(f"Error: {str(e)}")
        return f"Error: {str(e)}"

# Gradio UI
note = gr.Interface(
    fn=recognize_text,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="Handwritten Note to Digital Text",
    description="Upload an image of handwritten text, and the AI will convert it to digital text."
)

note.launch()