sitammeur commited on
Commit
7622505
Β·
verified Β·
1 Parent(s): b559743

Upload 3 files

Browse files
src/florence/__init__.py ADDED
File without changes
src/florence/model.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing necessary libraries
2
+ import sys
3
+ import subprocess
4
+ from typing import Optional
5
+ from PIL import Image
6
+ import gradio as gr
7
+ import spaces
8
+ from transformers import AutoProcessor, AutoModelForCausalLM
9
+
10
+ # Local imports
11
+ from src.logger import logging
12
+ from src.exception import CustomExceptionHandling
13
+
14
+
15
+ # Install the required dependencies
16
+ subprocess.run(
17
+ "pip install flash-attn --no-build-isolation",
18
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
19
+ shell=True,
20
+ )
21
+
22
+ # Load model and processor from Hugging Face
23
+ model_id = "microsoft/Florence-2-large-ft"
24
+ try:
25
+ model = (
26
+ AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
27
+ .to("cuda")
28
+ .eval()
29
+ )
30
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
31
+ logging.info("Model and processor loaded successfully.")
32
+
33
+ # Handle exceptions that may occur during the process
34
+ except Exception as e:
35
+ # Custom exception handling
36
+ raise CustomExceptionHandling(e, sys) from e
37
+
38
+
39
+ @spaces.GPU
40
+ def run_example(
41
+ task_prompt: str, image: Image.Image, text_input: Optional[str] = None
42
+ ) -> str:
43
+ """
44
+ Runs an example using the given task prompt and image.
45
+
46
+ Args:
47
+ - task_prompt (str): The task prompt for the example.
48
+ - image (PIL.Image.Image): The image to be processed.
49
+ - text_input (str, optional): Additional text input to be appended to the task prompt. Defaults to None.
50
+
51
+ Returns:
52
+ str: The parsed answer generated by the model.
53
+ """
54
+ try:
55
+ # Check if image is None
56
+ if image is None:
57
+ gr.Warning("Please provide an image.")
58
+
59
+ # If there is no text input, use the task prompt as the prompt
60
+ prompt = task_prompt if text_input is None else task_prompt + text_input
61
+
62
+ # Process the image and text input
63
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
64
+
65
+ # Generate the answer using the model
66
+ generated_ids = model.generate(
67
+ input_ids=inputs["input_ids"],
68
+ pixel_values=inputs["pixel_values"],
69
+ max_new_tokens=1024,
70
+ early_stopping=False,
71
+ do_sample=False,
72
+ num_beams=3,
73
+ )
74
+ generated_text = processor.batch_decode(
75
+ generated_ids, skip_special_tokens=False
76
+ )[0]
77
+ parsed_answer = processor.post_process_generation(
78
+ generated_text, task=task_prompt, image_size=(image.width, image.height)
79
+ )
80
+
81
+ # Return the parsed answer
82
+ return parsed_answer
83
+
84
+ # Handle exceptions that may occur during the process
85
+ except Exception as e:
86
+ # Custom exception handling
87
+ raise CustomExceptionHandling(e, sys) from e
src/florence/task.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import sys
3
+ import copy
4
+ from typing import Tuple
5
+ from PIL import Image
6
+ import supervision as sv
7
+
8
+ # Local imports
9
+ from src.utils.processing import clean_text, draw_ocr_bboxes
10
+ from src.florence.model import run_example
11
+ from src.logger import logging
12
+ from src.exception import CustomExceptionHandling
13
+
14
+
15
+ def ocr_task(image: Image.Image) -> Tuple[Image.Image, str]:
16
+ """
17
+ Perform OCR (Optical Character Recognition) on the given image.
18
+
19
+ Args:
20
+ image (PIL.Image.Image): The input image to perform OCR on.
21
+
22
+ Returns:
23
+ tuple: A tuple containing the output image with OCR bounding boxes drawn and the cleaned OCR text.
24
+ """
25
+ try:
26
+ # Task prompts
27
+ ocr_prompt = "<OCR>"
28
+ ocr_with_region_prompt = "<OCR_WITH_REGION>"
29
+
30
+ # Get OCR text
31
+ ocr_results = run_example(ocr_prompt, image)
32
+ cleaned_text = clean_text(ocr_results["<OCR>"])
33
+
34
+ # Log the successful extraction and cleaning of OCR text
35
+ logging.info("OCR text extracted and cleaned successfully.")
36
+
37
+ # Get OCR with region
38
+ ocr_with_region_results = run_example(ocr_with_region_prompt, image)
39
+ output_image = copy.deepcopy(image)
40
+ detections = sv.Detections.from_lmm(
41
+ lmm=sv.LMM.FLORENCE_2,
42
+ result=ocr_with_region_results,
43
+ resolution_wh=image.size,
44
+ )
45
+ output_image = draw_ocr_bboxes(image, detections)
46
+
47
+ # Log the successful drawing of OCR bounding boxes
48
+ logging.info("OCR bounding boxes drawn successfully.")
49
+
50
+ # Return the output image and cleaned OCR text
51
+ return output_image, cleaned_text
52
+
53
+ # Handle exceptions that may occur during the process
54
+ except Exception as e:
55
+ # Custom exception handling
56
+ raise CustomExceptionHandling(e, sys) from e