Spaces:

Yakova
/

Embedding

Running

App Files Files Community

Mbonea commited on Jan 13

Commit

88b7f59

1 Parent(s): d0a7224

ooh well

Browse files

Files changed (3) hide show

App/OCR/Tesseract.py +103 -0
App/app.py +2 -1
Dockerfile +2 -0

App/OCR/Tesseract.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import tempfile
+import os
+from fastapi.responses import JSONResponse
+import pytesseract
+from pytesseract import Output
+from PIL import Image
+import requests
+from fastapi.routing import APIRouter
+from io import BytesIO
+tessaract_ocr_router = APIRouter(tags=["OCR"])
+class HighlightRequest(BaseModel):
+    imageUrl: str
+    searchTerms: list[str]
+@tessaract_ocr_router.post("/highlight")
+async def highlight(request: HighlightRequest):
+    image_url = request.imageUrl
+    search_terms = request.searchTerms
+    if not image_url or not isinstance(search_terms, list) or len(search_terms) == 0:
+        raise HTTPException(
+            status_code=400, detail="imageUrl and searchTerms are required"
+        )
+    try:
+        # Download the image
+        response = requests.get(image_url)
+        if response.status_code != 200:
+            raise HTTPException(status_code=400, detail="Failed to download image")
+        image = Image.open(BytesIO(response.content))
+        # Run OCR
+        ocr_data = pytesseract.image_to_data(image, lang="eng", output_type=Output.DICT)
+        words = [
+            {
+                "text": ocr_data["text"][i],
+                "bbox": {
+                    "x0": ocr_data["left"][i],
+                    "y0": ocr_data["top"][i],
+                    "x1": ocr_data["left"][i] + ocr_data["width"][i],
+                    "y1": ocr_data["top"][i] + ocr_data["height"][i],
+                },
+            }
+            for i in range(len(ocr_data["text"]))
+            if ocr_data["text"][i].strip() != ""
+        ]
+        highlights = []
+        # Search for each term
+        for term in search_terms:
+            term_words = term.lower().split(" ")
+            term_len = len(term_words)
+            word_index = 0
+            for i, word_obj in enumerate(words):
+                word = word_obj["text"].lower()
+                if word == term_words[word_index]:
+                    word_index += 1
+                    # If all words match
+                    if word_index == term_len:
+                        word_index = 0
+                        # Get bounding box
+                        x_start = words[i - term_len + 1]["bbox"]["x0"]
+                        y_start = words[i - term_len + 1]["bbox"]["y0"]
+                        x_end = words[i]["bbox"]["x1"]
+                        y_end = words[i]["bbox"]["y1"]
+                        highlights.append(
+                            {
+                                "text": term,
+                                "bbox": {
+                                    "x0": x_start,
+                                    "y0": y_start,
+                                    "x1": x_end,
+                                    "y1": y_end,
+                                },
+                            }
+                        )
+                else:
+                    word_index = 0  # Reset if match breaks
+        # Respond with highlights
+        return JSONResponse(
+            content={"searchTerms": search_terms, "highlights": highlights}
+        )
+    except Exception as e:
+        return HTTPException(
+            status_code=500,
+            detail=f"An error occurred while processing the image: {str(e)}",
+        )

App/app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from fastapi.middleware.gzip import GZipMiddleware
 from .TTS.TTSRoutes import tts_router
 from .Embedding.EmbeddingRoutes import embeddigs_router
 from fastapi.middleware.cors import CORSMiddleware
@@ -46,5 +46,6 @@ async def landing_page():
 app.include_router(embeddigs_router)
 app.include_router(tts_router)
 # app.include_router(shader_router)

 from .TTS.TTSRoutes import tts_router
 from .Embedding.EmbeddingRoutes import embeddigs_router
+from .OCR.Tesseract import tessaract_ocr_router
 from fastapi.middleware.cors import CORSMiddleware
 app.include_router(embeddigs_router)
+app.include_router(tessaract_ocr_router)
 app.include_router(tts_router)
 # app.include_router(shader_router)

Dockerfile CHANGED Viewed

@@ -33,6 +33,8 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy the application code
 USER admin
 COPY --chown=admin . /srv
 # Command to run the application

 # Copy the application code
 USER admin
+RUN pip install pytesseract && apt install -y tesseract-ocr
 COPY --chown=admin . /srv
 # Command to run the application