henok3878 commited on
Commit
44bb151
·
1 Parent(s): 56ecf7d

feature: add quantized model for cpu inference

Browse files
main.py CHANGED
@@ -9,11 +9,13 @@ import logging
9
  import time
10
  from contextlib import asynccontextmanager
11
  from inference_utils import construct_alphabet_list, convert_offsets_to_absolute_coords, encode_text, get_alphabet_map
 
12
 
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
  MODEL_DIR = Path("./packaged_models")
 
17
  SCRIPTED_MODEL_NAME = "model.scripted.pt"
18
  METADATA_MODEL_NAME = "model.pt"
19
 
@@ -62,6 +64,8 @@ async def lifespan(app: FastAPI):
62
 
63
  scripted_model_path = MODEL_DIR / SCRIPTED_MODEL_NAME
64
  metadata_model_path = MODEL_DIR / METADATA_MODEL_NAME
 
 
65
 
66
  if not scripted_model_path.exists():
67
  logger.error(f"Traced model not found at {scripted_model_path}")
 
9
  import time
10
  from contextlib import asynccontextmanager
11
  from inference_utils import construct_alphabet_list, convert_offsets_to_absolute_coords, encode_text, get_alphabet_map
12
+ from ml.src.inference_benchmark import QUANTIZED_MODEL_NAME
13
 
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
  MODEL_DIR = Path("./packaged_models")
18
+ QUANTIZED_MODEL_NAME = "model.scripted.quantized.pt"
19
  SCRIPTED_MODEL_NAME = "model.scripted.pt"
20
  METADATA_MODEL_NAME = "model.pt"
21
 
 
64
 
65
  scripted_model_path = MODEL_DIR / SCRIPTED_MODEL_NAME
66
  metadata_model_path = MODEL_DIR / METADATA_MODEL_NAME
67
+ if device.type == "cpu":
68
+ scripted_model_path = MODEL_DIR / QUANTIZED_MODEL_NAME
69
 
70
  if not scripted_model_path.exists():
71
  logger.error(f"Traced model not found at {scripted_model_path}")
packaged_models/model.scripted.quantized.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:793a525a5a8d4f62cc80ddbf0f0ca0fddc13ec202ef2fc6efd9bfaa32c78e306
3
- size 17674936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c88342d00b8eb7d0380da89d9cf9f9744a9e6a7caac2f783f6b912d58c3129ac
3
+ size 4509816