senga-dnotes

Running

serenarolloh commited on May 2

Commit

16be0ef

verified ·

1 Parent(s): 1a23ce8

Update routers/donut_inference.py

Files changed (1) hide show

routers/donut_inference.py CHANGED Viewed

@@ -7,16 +7,11 @@ from functools import lru_cache
 import os
 import requests
 @lru_cache(maxsize=1)
 def load_model(model_url: str):
-    """
-    Load the processor and model dynamically based on the model URL.
-    :param model_url: The URL for the model to use.
-    :return: The processor, model, and device.
-    """
-    # Assuming the model URL follows a pattern like "https://huggingface.co/{model_name}"
-    model_name = model_url.split("/")[-1]  # Extract model name from the URL
     processor = DonutProcessor.from_pretrained(model_name)
     model = VisionEncoderDecoderModel.from_pretrained(model_name)
@@ -36,7 +31,7 @@ def process_document_donut(image, model_url: str):
     :return: A tuple of the result and processing time.
     """
     worker_pid = os.getpid()
-    print(f"Handling inference request with worker PID: {worker_pid}")
     start_time = time.time()
@@ -72,6 +67,6 @@ def process_document_donut(image, model_url: str):
     end_time = time.time()
     processing_time = end_time - start_time
-    print(f"Inference done, worker PID: {worker_pid}")
-    return processor.token2json(sequence), processing_time

 import os
 import requests
 @lru_cache(maxsize=1)
 def load_model(model_url: str):
+    model_name = model_url.replace("https://huggingface.co/", "")
+    print(f"[Model Loader] Loading model: {model_name}")
     processor = DonutProcessor.from_pretrained(model_name)
     model = VisionEncoderDecoderModel.from_pretrained(model_name)
     :return: A tuple of the result and processing time.
     """
     worker_pid = os.getpid()
+    print(f"[Inference] Handling request with worker PID: {worker_pid}")
     start_time = time.time()
     end_time = time.time()
     processing_time = end_time - start_time
+    print(f"[Inference] Done. PID: {worker_pid} | Time taken: {processing_time:.2f} sec")
+    return processor.token2json(sequence), processing_time