Ivan Shelonik commited on
Commit
9a98ec7
·
1 Parent(s): cc125f7

upd/add: pred_proba & refactored

Browse files
Files changed (1) hide show
  1. api_server.py +24 -23
api_server.py CHANGED
@@ -1,6 +1,12 @@
1
- """
2
- official fastapi HF example https://huggingface.co/docs/hub/spaces-sdks-docker-examples#docker-spaces-examples
3
- """
 
 
 
 
 
 
4
 
5
  import os
6
  import time
@@ -18,26 +24,27 @@ from flask import Flask, jsonify, request, render_template
18
  load_type = 'remote_hub_from_pretrained'
19
  """
20
  local;
21
- remote_hub_download; - /cache error even using TRANSFORMERS_CACHE & cache_dir to local folder
22
- remote_hub_from_pretrained; - /cache error even using TRANSFORMERS_CACHE & cache_dir to local folder
23
  remote_hub_pipeline; - needs config.json and this is not easy to grasp how to do it with custom models
24
  https://discuss.huggingface.co/t/how-to-create-a-config-json-after-saving-a-model/10459/4
25
  """
26
 
27
  REPO_ID = "1vash/mnist_demo_model"
 
28
 
29
  # Load the saved model into memory
30
  if load_type == 'local':
31
- model = keras.models.load_model('artifacts/models/mnist_model.h5')
32
  elif load_type == 'remote_hub_download':
33
  from huggingface_hub import hf_hub_download
34
 
35
  model = keras.models.load_model(hf_hub_download(repo_id=REPO_ID, filename="saved_model.pb"))
36
  elif load_type == 'remote_hub_from_pretrained':
37
  # https://huggingface.co/docs/hub/keras
38
- os.environ['TRANSFORMERS_CACHE'] = str(Path('./artifacts/').absolute())
39
  from huggingface_hub import from_pretrained_keras
40
- model = from_pretrained_keras(REPO_ID, cache_dir='./artifacts/')
41
  elif load_type == 'remote_hub_pipeline':
42
  from transformers import pipeline
43
 
@@ -62,7 +69,8 @@ def predict():
62
 
63
  Response format:
64
  {
65
- "prediction": predicted_label,
 
66
  "ml-latency-ms": latency_in_milliseconds
67
  (Measures time only for ML operations preprocessing with predict)
68
  }
@@ -78,9 +86,6 @@ def predict():
78
  # Get pixels out of file
79
  image_data = Image.open(file)
80
 
81
- # Get the image data from the request
82
- # image_data = request.get_json()['image']
83
-
84
  # Preprocess the image
85
  processed_image = preprocess_image(image_data)
86
 
@@ -89,13 +94,17 @@ def predict():
89
 
90
  # Get the predicted class label
91
  predicted_label = np.argmax(prediction)
 
92
 
93
  # Calculate latency in milliseconds
94
  latency_ms = (time.time() - start_time) * 1000
95
 
96
- # Return the prediction result and latency as JSON response
97
- response = {'prediction': int(predicted_label),
98
- 'ml-latency-ms': round(latency_ms, 4)}
 
 
 
99
 
100
  # dictionary is not a JSON: https://www.quora.com/What-is-the-difference-between-JSON-and-a-dictionary
101
  # flask.jsonify vs json.dumps https://sentry.io/answers/difference-between-json-dumps-and-flask-jsonify/
@@ -149,11 +158,3 @@ def hello_world():
149
  # Start the Flask application
150
  if __name__ == '__main__':
151
  app.run(debug=True)
152
-
153
- ##################
154
- # Flask API usages:
155
- # 1. Just a wrapper over OpenAI API
156
- # 2. You can use Chain calls of OpenAI API
157
- # 3. Using your own ML model in combination with openAPI functionality
158
- # 4. ...
159
- ##################
 
1
+ # official fastapi HF example https://huggingface.co/docs/hub/spaces-sdks-docker-examples#docker-spaces-examples
2
+
3
+ ##################
4
+ # Flask API usages:
5
+ # 1. Just a wrapper over OpenAI API
6
+ # 2. You can use Chain calls of OpenAI API
7
+ # 3. Using your own ML model in combination with openAPI functionality
8
+ # 4. ...
9
+ ##################
10
 
11
  import os
12
  import time
 
24
  load_type = 'remote_hub_from_pretrained'
25
  """
26
  local;
27
+ remote_hub_download;
28
+ remote_hub_from_pretrained;
29
  remote_hub_pipeline; - needs config.json and this is not easy to grasp how to do it with custom models
30
  https://discuss.huggingface.co/t/how-to-create-a-config-json-after-saving-a-model/10459/4
31
  """
32
 
33
  REPO_ID = "1vash/mnist_demo_model"
34
+ MODEL_DIR = "./artifacts/models"
35
 
36
  # Load the saved model into memory
37
  if load_type == 'local':
38
+ model = keras.models.load_model(f'{MODEL_DIR}/mnist_model.h5')
39
  elif load_type == 'remote_hub_download':
40
  from huggingface_hub import hf_hub_download
41
 
42
  model = keras.models.load_model(hf_hub_download(repo_id=REPO_ID, filename="saved_model.pb"))
43
  elif load_type == 'remote_hub_from_pretrained':
44
  # https://huggingface.co/docs/hub/keras
45
+ os.environ['TRANSFORMERS_CACHE'] = str(Path(MODEL_DIR).absolute())
46
  from huggingface_hub import from_pretrained_keras
47
+ model = from_pretrained_keras(REPO_ID, cache_dir=MODEL_DIR)
48
  elif load_type == 'remote_hub_pipeline':
49
  from transformers import pipeline
50
 
 
69
 
70
  Response format:
71
  {
72
+ "label": predicted_label,
73
+ "pred_proba" prediction class probability
74
  "ml-latency-ms": latency_in_milliseconds
75
  (Measures time only for ML operations preprocessing with predict)
76
  }
 
86
  # Get pixels out of file
87
  image_data = Image.open(file)
88
 
 
 
 
89
  # Preprocess the image
90
  processed_image = preprocess_image(image_data)
91
 
 
94
 
95
  # Get the predicted class label
96
  predicted_label = np.argmax(prediction)
97
+ proba = prediction[0][predicted_label]
98
 
99
  # Calculate latency in milliseconds
100
  latency_ms = (time.time() - start_time) * 1000
101
 
102
+ # Return the prediction result and latency as dictionary response
103
+ response = {
104
+ 'label': int(predicted_label),
105
+ 'pred_proba': float(proba),
106
+ 'ml-latency-ms': round(latency_ms, 4)
107
+ }
108
 
109
  # dictionary is not a JSON: https://www.quora.com/What-is-the-difference-between-JSON-and-a-dictionary
110
  # flask.jsonify vs json.dumps https://sentry.io/answers/difference-between-json-dumps-and-flask-jsonify/
 
158
  # Start the Flask application
159
  if __name__ == '__main__':
160
  app.run(debug=True)