Lyon28 commited on
Commit
876efd8
·
verified ·
1 Parent(s): e0aaa0a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import pipeline
4
+
5
+ # 1. Definisikan "otak" aplikasi (sama persis seperti di Gradio)
6
+ MODEL_CONFIG = {
7
+ # (Salin MODEL_CONFIG lengkap dari atas ke sini)
8
+ "Lyon28/GPT-2-Tinny": {"task": "text-generation", "display_name": "GPT-2 (Tiny)"},
9
+ "Lyon28/GPT-2": {"task": "text-generation", "display_name": "GPT-2"},
10
+ "Lyon28/Distil_GPT-2": {"task": "text-generation", "display_name": "DistilGPT-2"},
11
+ "Lyon28/GPT-Neo": {"task": "text-generation", "display_name": "GPT-Neo"},
12
+ "Lyon28/Pythia": {"task": "text-generation", "display_name": "Pythia"},
13
+ "Lyon28/Tinny-Llama": {"task": "text-generation", "display_name": "Tinny-Llama"},
14
+ "Lyon28/Bert-Tinny": {"task": "fill-mask", "display_name": "BERT (Tiny)"},
15
+ "Lyon28/Distilbert-Base-Uncased": {"task": "fill-mask", "display_name": "DistilBERT"},
16
+ "Lyon28/Albert-Base-V2": {"task": "fill-mask", "display_name": "Albert v2"},
17
+ "Lyon28/Electra-Small": {"task": "fill-mask", "display_name": "Electra (Small)"},
18
+ "Lyon28/T5-Small": {"task": "text2text-generation", "display_name": "T5 (Small)"},
19
+ }
20
+
21
+ # 2. Buat "gudang" model (sama persis)
22
+ loaded_pipelines = {}
23
+
24
+ # 3. Definisikan format request yang diterima
25
+ class InferenceRequest(BaseModel):
26
+ model_id: str
27
+ prompt: str
28
+
29
+ app = FastAPI()
30
+
31
+ @app.get("/")
32
+ def read_root():
33
+ return {"message": "Smart Inference API is running. Use the /inference endpoint."}
34
+
35
+ @app.post("/inference")
36
+ def smart_inference(request: InferenceRequest):
37
+ model_id = request.model_id
38
+
39
+ # Validasi: Cek apakah model_id ada di config kita
40
+ if model_id not in MODEL_CONFIG:
41
+ raise HTTPException(status_code=400, detail=f"Model '{model_id}' tidak valid atau tidak didukung.")
42
+
43
+ task = MODEL_CONFIG[model_id]["task"]
44
+
45
+ # Cek "gudang" (logika caching yang sama)
46
+ if model_id not in loaded_pipelines:
47
+ print(f"Memuat model: {model_id} untuk task: {task}...")
48
+ try:
49
+ pipe = pipeline(task, model=model_id, device=-1)
50
+ loaded_pipelines[model_id] = pipe
51
+ print("Model berhasil dimuat.")
52
+ except Exception as e:
53
+ raise HTTPException(status_code=500, detail=f"Gagal memuat model: {str(e)}")
54
+
55
+ pipe = loaded_pipelines[model_id]
56
+
57
+ # Jalankan inference
58
+ try:
59
+ result = pipe(request.prompt)
60
+ return {"model_used": model_id, "task": task, "input_prompt": request.prompt, "output": result}
61
+ except Exception as e:
62
+ raise HTTPException(status_code=500, detail=f"Gagal melakukan inference: {str(e)}")