Lyon28 commited on
Commit
d2346a3
·
verified ·
1 Parent(s): 90260b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -89
app.py CHANGED
@@ -1,20 +1,21 @@
1
- # app.py untuk Hugging Face Spaces
2
- from flask import Flask, jsonify, request, render_template_string
3
  from flask_cors import CORS
4
  from transformers import pipeline
5
  import logging
6
- import gc
7
  import torch
 
8
 
9
  app = Flask(__name__)
10
- CORS(app)
11
 
12
- # Setup logging
13
- logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
- # Global models storage
17
- models = {}
 
18
  model_info = {
19
  "Albert-Base-V2": {"task": "fill-mask", "description": "BERT-based model for masked language modeling"},
20
  "GPT-2": {"task": "text-generation", "description": "GPT-2 model for text generation"},
@@ -24,117 +25,174 @@ model_info = {
24
  "Bert-Tinny": {"task": "fill-mask", "description": "Tiny BERT model"},
25
  "Distilbert-Base-Uncased": {"task": "fill-mask", "description": "Distilled BERT model"},
26
  "Pythia": {"task": "text-generation", "description": "Pythia language model"},
27
- "T5-Small": {"task": "text2text-generation", "description": "Small T5 model"},
28
  "GPT-Neo": {"task": "text-generation", "description": "GPT-Neo model"},
29
  "Distil-GPT-2": {"task": "text-generation", "description": "Distilled GPT-2 model"}
30
  }
31
 
32
- def load_models():
33
- """Load all models at startup"""
34
- logger.info("Loading all models...")
35
-
36
- for model_name, info in model_info.items():
 
 
 
 
 
 
 
 
 
 
 
37
  try:
38
- logger.info(f"Loading {model_name}...")
39
- model_path = f"Lyon28/{model_name}"
40
-
41
- # Load model with appropriate task
42
  models[model_name] = pipeline(
43
- info["task"],
44
- model=model_path,
45
- device=-1, # CPU only
46
- torch_dtype=torch.float32
47
  )
48
- logger.info(f"✅ {model_name} loaded successfully")
49
-
50
  except Exception as e:
51
- logger.error(f"❌ Failed to load {model_name}: {str(e)}")
52
-
53
- logger.info(f"Loaded {len(models)}/{len(model_info)} models")
54
 
55
- # Load models on startup
56
- load_models()
57
 
58
  @app.route('/')
59
  def home():
60
- """Simple API status - no HTML interface"""
61
  return jsonify({
62
- "message": "Lyon28's AI Models API",
63
  "status": "online",
64
- "total_models": len(model_info),
65
- "loaded_models": len(models),
66
- "endpoints": {
67
- "models_list": "/api/models",
68
- "health_check": "/health",
69
- "prediction": "/api/{model_name}"
70
- },
71
- "available_models": list(models.keys())
72
  })
73
 
74
  @app.route('/api/models', methods=['GET'])
75
- def list_models():
76
- """Get list of available models"""
77
- available_models = []
78
- for name, info in model_info.items():
79
- available_models.append({
80
- "name": name,
81
  "task": info["task"],
82
- "description": info["description"],
83
- "status": "ready" if name in models else "failed",
84
  "endpoint": f"/api/{name}"
85
- })
86
-
 
87
  return jsonify({
88
- "total": len(model_info),
89
- "loaded": len(models),
90
- "models": available_models
91
  })
92
 
93
- @app.route('/api/<model_name>', methods=['POST'])
94
- def predict(model_name):
95
- """Main prediction endpoint"""
96
- if model_name not in models:
97
- return jsonify({
98
- "error": f"Model '{model_name}' not available. Available models: {list(models.keys())}"
99
- }), 404
100
-
 
 
 
101
  try:
 
 
 
102
  data = request.json
103
  inputs = data.get('inputs', '')
104
- parameters = data.get('parameters', {})
105
-
106
  if not inputs:
107
- return jsonify({"error": "No inputs provided"}), 400
108
-
109
- # Get model
110
- model = models[model_name]
111
-
112
- # Generate prediction
113
- if parameters:
114
- result = model(inputs, **parameters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  else:
116
- result = model(inputs)
117
-
118
- return jsonify({
119
- "model": model_name,
120
- "inputs": inputs,
121
- "outputs": result,
122
- "parameters": parameters
123
- })
124
-
 
 
 
 
 
125
  except Exception as e:
126
- logger.error(f"Error with model {model_name}: {str(e)}")
127
- return jsonify({"error": str(e)}), 500
 
128
 
129
  @app.route('/health', methods=['GET'])
130
  def health_check():
131
- """Health check endpoint"""
132
- return jsonify({
133
- "status": "healthy",
134
- "models_loaded": len(models),
135
- "models_total": len(model_info),
136
- "memory_usage": "CPU only"
137
- })
138
 
 
139
  if __name__ == '__main__':
140
- app.run(host='0.0.0.0', port=7860, debug=False)
 
 
 
 
1
+ # app.py
2
+ from flask import Flask, jsonify, request
3
  from flask_cors import CORS
4
  from transformers import pipeline
5
  import logging
 
6
  import torch
7
+ import os # Untuk mendapatkan environment variables, misalnya di Hugging Face Spaces
8
 
9
  app = Flask(__name__)
10
+ CORS(app) # Mengaktifkan CORS untuk mengizinkan permintaan dari frontend Anda
11
 
12
+ # --- Setup Logging ---
13
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
  logger = logging.getLogger(__name__)
15
 
16
+ # --- Konfigurasi Model dan Informasi ---
17
+ # Tambahkan 'hf_model_name' jika nama model di Hugging Face berbeda dari ID yang Anda inginkan.
18
+ # Jika nama model di Hugging Face sama, tidak perlu 'hf_model_name'.
19
  model_info = {
20
  "Albert-Base-V2": {"task": "fill-mask", "description": "BERT-based model for masked language modeling"},
21
  "GPT-2": {"task": "text-generation", "description": "GPT-2 model for text generation"},
 
25
  "Bert-Tinny": {"task": "fill-mask", "description": "Tiny BERT model"},
26
  "Distilbert-Base-Uncased": {"task": "fill-mask", "description": "Distilled BERT model"},
27
  "Pythia": {"task": "text-generation", "description": "Pythia language model"},
28
+ "T5-Small": {"task": "text2text-generation", "description": "Small T5 model", "hf_model_name": "t5-small"},
29
  "GPT-Neo": {"task": "text-generation", "description": "GPT-Neo model"},
30
  "Distil-GPT-2": {"task": "text-generation", "description": "Distilled GPT-2 model"}
31
  }
32
 
33
+ # --- Penyimpanan Model Global (untuk Lazy Loading) ---
34
+ models = {}
35
+
36
+ # --- Fungsi Utility untuk Model Lazy Loading ---
37
+ def get_model_pipeline(model_name):
38
+ """
39
+ Memuat model hanya jika belum dimuat (lazy loading).
40
+ Mengembalikan pipeline model yang diminta.
41
+ """
42
+ if model_name not in models:
43
+ logger.info(f"Model '{model_name}' belum dimuat. Memuat sekarang...")
44
+ if model_name not in model_info:
45
+ logger.error(f"Informasi model '{model_name}' tidak ditemukan di model_info.")
46
+ raise ValueError(f"Model '{model_name}' tidak dikenal.")
47
+
48
+ info = model_info[model_name]
49
  try:
50
+ # Gunakan 'hf_model_name' jika disediakan, jika tidak, gunakan model_name dengan prefix 'Lyon28/'
51
+ hf_model_path = info.get("hf_model_name", f"Lyon28/{model_name}")
52
+
53
+ # Explicitly set device to "cpu" for CPU-only environments
54
  models[model_name] = pipeline(
55
+ info["task"],
56
+ model=hf_model_path,
57
+ device="cpu", # Penting: Pastikan ini "cpu" jika Anda tidak punya GPU
58
+ torch_dtype=torch.float32 # Tetap float32 untuk performa terbaik di CPU
59
  )
60
+ logger.info(f"✅ Model '{model_name}' (Path: {hf_model_path}) berhasil dimuat.")
 
61
  except Exception as e:
62
+ logger.error(f"❌ Gagal memuat model '{model_name}' (Path: {hf_model_path}): {str(e)}", exc_info=True)
63
+ raise RuntimeError(f"Gagal memuat model: {model_name}. Detail: {str(e)}") from e
64
+ return models[model_name]
65
 
66
+ # --- Rute API ---
 
67
 
68
  @app.route('/')
69
  def home():
70
+ """Endpoint root untuk status API."""
71
  return jsonify({
72
+ "message": "Flask API untuk Model Hugging Face",
73
  "status": "online",
74
+ "loaded_models_count": len(models),
75
+ "available_model_configs": list(model_info.keys()),
76
+ "info": "Gunakan /api/models untuk daftar model yang tersedia."
 
 
 
 
 
77
  })
78
 
79
  @app.route('/api/models', methods=['GET'])
80
+ def list_available_models():
81
+ """Mengembalikan daftar semua model yang dikonfigurasi, termasuk status muatan."""
82
+ available_models_data = [
83
+ {
84
+ "id": name,
85
+ "name": info["description"],
86
  "task": info["task"],
87
+ "status": "loaded" if name in models else "not_loaded", # Menunjukkan apakah sudah dimuat via lazy loading
 
88
  "endpoint": f"/api/{name}"
89
+ }
90
+ for name, info in model_info.items()
91
+ ]
92
  return jsonify({
93
+ "total_configured_models": len(model_info),
94
+ "currently_loaded_models": len(models),
95
+ "models": available_models_data
96
  })
97
 
98
+ @app.route('/api/<model_id>', methods=['POST'])
99
+ def predict_with_model(model_id):
100
+ """
101
+ Endpoint utama untuk prediksi model.
102
+ Menerima 'inputs' (teks) dan 'parameters' (dictionary) opsional.
103
+ """
104
+ logger.info(f"Menerima permintaan untuk model: {model_id}")
105
+ if model_id not in model_info:
106
+ logger.warning(f"Permintaan untuk model tidak dikenal: {model_id}")
107
+ return jsonify({"error": f"Model '{model_id}' tidak dikenal. Lihat /api/models untuk daftar yang tersedia."}), 404
108
+
109
  try:
110
+ model_pipeline = get_model_pipeline(model_id) # Memuat model jika belum ada
111
+ model_task = model_info[model_id]["task"]
112
+
113
  data = request.json
114
  inputs = data.get('inputs', '')
115
+ parameters = data.get('parameters', {}) # Default ke dictionary kosong jika tidak ada
116
+
117
  if not inputs:
118
+ return jsonify({"error": "Input 'inputs' tidak boleh kosong."}), 400
119
+
120
+ logger.info(f"Inferensi: Model='{model_id}', Task='{model_task}', Input='{inputs[:100]}...', Params='{parameters}'")
121
+
122
+ result = []
123
+ # --- Penanganan Parameter dan Inferensi berdasarkan Tipe Tugas ---
124
+ if model_task == "text-generation":
125
+ # Default parameters for text-generation
126
+ gen_params = {
127
+ "max_new_tokens": parameters.get("max_new_tokens", 150), # Lebih banyak token untuk roleplay
128
+ "temperature": parameters.get("temperature", 0.7),
129
+ "do_sample": parameters.get("do_sample", True),
130
+ "return_full_text": parameters.get("return_full_text", False), # Sangat penting untuk chatbot
131
+ "num_return_sequences": parameters.get("num_return_sequences", 1),
132
+ "top_k": parameters.get("top_k", 50),
133
+ "top_p": parameters.get("top_p", 0.95),
134
+ "repetition_penalty": parameters.get("repetition_penalty", 1.2), # Mencegah pengulangan
135
+ }
136
+ result = model_pipeline(inputs, **gen_params)
137
+
138
+ elif model_task == "fill-mask":
139
+ mask_params = {
140
+ "top_k": parameters.get("top_k", 5)
141
+ }
142
+ result = model_pipeline(inputs, **mask_params)
143
+
144
+ elif model_task == "text2text-generation": # Misalnya untuk T5
145
+ t2t_params = {
146
+ "max_new_tokens": parameters.get("max_new_tokens", 150),
147
+ "temperature": parameters.get("temperature", 0.7),
148
+ "do_sample": parameters.get("do_sample", True),
149
+ }
150
+ result = model_pipeline(inputs, **t2t_params)
151
+
152
+ else:
153
+ # Fallback for other tasks or if no specific parameters are needed
154
+ result = model_pipeline(inputs, **parameters)
155
+
156
+ # --- Konsistensi Format Output ---
157
+ response_output = {}
158
+ if model_task == "text-generation" or model_task == "text2text-generation":
159
+ if result and len(result) > 0 and 'generated_text' in result[0]:
160
+ response_output['text'] = result[0]['generated_text'].strip()
161
+ else:
162
+ response_output['text'] = "[Tidak ada teks yang dihasilkan atau format tidak sesuai.]"
163
+ elif model_task == "fill-mask":
164
+ response_output['predictions'] = [
165
+ {"sequence": p.get('sequence', ''), "score": p.get('score', 0.0), "token_str": p.get('token_str', '')}
166
+ for p in result
167
+ ]
168
  else:
169
+ # Untuk jenis tugas lain, kembalikan hasil mentah
170
+ response_output = result
171
+
172
+ logger.info(f"Inferensi berhasil untuk '{model_id}'. Output singkat: '{str(response_output)[:200]}'")
173
+ return jsonify({"model": model_id, "inputs": inputs, "outputs": response_output})
174
+
175
+ except ValueError as ve:
176
+ # Error yang berasal dari get_model_pipeline atau validasi input
177
+ logger.error(f"Validasi atau konfigurasi error untuk model '{model_id}': {str(ve)}")
178
+ return jsonify({"error": str(ve), "message": "Kesalahan konfigurasi atau input model."}), 400
179
+ except RuntimeError as re:
180
+ # Error saat memuat model
181
+ logger.error(f"Error runtime saat memuat model '{model_id}': {str(re)}")
182
+ return jsonify({"error": str(re), "message": "Model gagal dimuat."}), 503 # Service Unavailable
183
  except Exception as e:
184
+ # Catch all other unexpected errors during prediction
185
+ logger.error(f"Terjadi kesalahan tak terduga saat memprediksi dengan model '{model_id}': {str(e)}", exc_info=True)
186
+ return jsonify({"error": str(e), "message": "Terjadi kesalahan internal server."}), 500
187
 
188
  @app.route('/health', methods=['GET'])
189
  def health_check():
190
+ """Endpoint untuk health check."""
191
+ return jsonify({"status": "healthy", "loaded_models_count": len(models), "message": "API berfungsi normal."})
 
 
 
 
 
192
 
193
+ # --- Jalankan Aplikasi ---
194
  if __name__ == '__main__':
195
+ # Untuk Hugging Face Spaces, port biasanya 7860
196
+ # Menggunakan HOST dari environment variable jika tersedia, default ke 0.0.0.0
197
+ # Debug=False untuk produksi
198
+ app.run(host=os.getenv('HOST', '0.0.0.0'), port=int(os.getenv('PORT', 7860)), debug=False)