Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
# 1. Definisikan "otak" aplikasi (sama persis seperti di Gradio)
|
6 |
+
MODEL_CONFIG = {
|
7 |
+
# (Salin MODEL_CONFIG lengkap dari atas ke sini)
|
8 |
+
"Lyon28/GPT-2-Tinny": {"task": "text-generation", "display_name": "GPT-2 (Tiny)"},
|
9 |
+
"Lyon28/GPT-2": {"task": "text-generation", "display_name": "GPT-2"},
|
10 |
+
"Lyon28/Distil_GPT-2": {"task": "text-generation", "display_name": "DistilGPT-2"},
|
11 |
+
"Lyon28/GPT-Neo": {"task": "text-generation", "display_name": "GPT-Neo"},
|
12 |
+
"Lyon28/Pythia": {"task": "text-generation", "display_name": "Pythia"},
|
13 |
+
"Lyon28/Tinny-Llama": {"task": "text-generation", "display_name": "Tinny-Llama"},
|
14 |
+
"Lyon28/Bert-Tinny": {"task": "fill-mask", "display_name": "BERT (Tiny)"},
|
15 |
+
"Lyon28/Distilbert-Base-Uncased": {"task": "fill-mask", "display_name": "DistilBERT"},
|
16 |
+
"Lyon28/Albert-Base-V2": {"task": "fill-mask", "display_name": "Albert v2"},
|
17 |
+
"Lyon28/Electra-Small": {"task": "fill-mask", "display_name": "Electra (Small)"},
|
18 |
+
"Lyon28/T5-Small": {"task": "text2text-generation", "display_name": "T5 (Small)"},
|
19 |
+
}
|
20 |
+
|
21 |
+
# 2. Buat "gudang" model (sama persis)
|
22 |
+
loaded_pipelines = {}
|
23 |
+
|
24 |
+
# 3. Definisikan format request yang diterima
|
25 |
+
class InferenceRequest(BaseModel):
|
26 |
+
model_id: str
|
27 |
+
prompt: str
|
28 |
+
|
29 |
+
app = FastAPI()
|
30 |
+
|
31 |
+
@app.get("/")
|
32 |
+
def read_root():
|
33 |
+
return {"message": "Smart Inference API is running. Use the /inference endpoint."}
|
34 |
+
|
35 |
+
@app.post("/inference")
|
36 |
+
def smart_inference(request: InferenceRequest):
|
37 |
+
model_id = request.model_id
|
38 |
+
|
39 |
+
# Validasi: Cek apakah model_id ada di config kita
|
40 |
+
if model_id not in MODEL_CONFIG:
|
41 |
+
raise HTTPException(status_code=400, detail=f"Model '{model_id}' tidak valid atau tidak didukung.")
|
42 |
+
|
43 |
+
task = MODEL_CONFIG[model_id]["task"]
|
44 |
+
|
45 |
+
# Cek "gudang" (logika caching yang sama)
|
46 |
+
if model_id not in loaded_pipelines:
|
47 |
+
print(f"Memuat model: {model_id} untuk task: {task}...")
|
48 |
+
try:
|
49 |
+
pipe = pipeline(task, model=model_id, device=-1)
|
50 |
+
loaded_pipelines[model_id] = pipe
|
51 |
+
print("Model berhasil dimuat.")
|
52 |
+
except Exception as e:
|
53 |
+
raise HTTPException(status_code=500, detail=f"Gagal memuat model: {str(e)}")
|
54 |
+
|
55 |
+
pipe = loaded_pipelines[model_id]
|
56 |
+
|
57 |
+
# Jalankan inference
|
58 |
+
try:
|
59 |
+
result = pipe(request.prompt)
|
60 |
+
return {"model_used": model_id, "task": task, "input_prompt": request.prompt, "output": result}
|
61 |
+
except Exception as e:
|
62 |
+
raise HTTPException(status_code=500, detail=f"Gagal melakukan inference: {str(e)}")
|