Spaces:

ii5
/

ocr-to-latex

Running

ii5 commited on 21 days ago

Commit

7e72b07

verified ·

1 Parent(s): 387731a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torch
 import torch.nn as nn
 from torchvision import transforms as T
 from PIL import Image
 import re
 import gradio as gr
 import tempfile
@@ -285,6 +286,10 @@ def ocr_image_attn(img: torch.Tensor, model: CRNN_Attn, tokenizer: AttnTokenizer
     return outs[0]
 def ocr_image(image: Image.Image, ckpt_path: Path) -> str:
     model_type, model, tokenizer = load_model_from_ckpt(ckpt_path)
     img = TRANSFORM(image.convert("L")).unsqueeze(0).to(device)
     if model_type == "ctc":
@@ -407,6 +412,7 @@ with gr.Blocks(title="OCR → LaTeX", theme=gr.themes.Base()) as demo:
                 label="Input Image (Upload or Camera)",
                 image_mode="RGB",
                 sources=["upload", "webcam"],
             )
             model_sel = gr.Dropdown(choices=["V2 (Attention)", "V1 (CTC)"], value="V1 (CTC)", label="Model")
             run_btn = gr.Button("Recognize", variant="primary")
@@ -444,6 +450,6 @@ with gr.Blocks(title="OCR → LaTeX", theme=gr.themes.Base()) as demo:
     load_solution_btn.click(_compute_solution_only, inputs=[latex_out], outputs=[solution_md])
 if __name__ == "__main__":
-    demo.launch()

 import torch.nn as nn
 from torchvision import transforms as T
 from PIL import Image
+from PIL import ImageOps
 import re
 import gradio as gr
 import tempfile
     return outs[0]
 def ocr_image(image: Image.Image, ckpt_path: Path) -> str:
+    # Apply EXIF orientation if present so mobile photos/camera captures
+    # are correctly oriented before transforming.
+    image = ImageOps.exif_transpose(image)
     model_type, model, tokenizer = load_model_from_ckpt(ckpt_path)
     img = TRANSFORM(image.convert("L")).unsqueeze(0).to(device)
     if model_type == "ctc":
                 label="Input Image (Upload or Camera)",
                 image_mode="RGB",
                 sources=["upload", "webcam"],
+                webcam_options=gr.WebcamOptions(mirror=False),
             )
             model_sel = gr.Dropdown(choices=["V2 (Attention)", "V1 (CTC)"], value="V1 (CTC)", label="Model")
             run_btn = gr.Button("Recognize", variant="primary")
     load_solution_btn.click(_compute_solution_only, inputs=[latex_out], outputs=[solution_md])
 if __name__ == "__main__":
+    demo.launch(share=True)