Lyon28 commited on
Commit
f4764ff
·
verified ·
1 Parent(s): 055b42e

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +5 -150
Dockerfile CHANGED
@@ -1,153 +1,8 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- from transformers import pipeline
4
- import torch
5
- from fastapi.middleware.cors import CORSMiddleware
6
- from typing import Dict, Any
7
 
8
- # Inisialisasi aplikasi FastAPI
9
- app = FastAPI(
10
- title="Lyon28 Model Inference API",
11
- description="API untuk mengakses 11 model machine learning",
12
- version="1.0.0"
13
- )
14
 
15
- # Konfigurasi CORS untuk frontend eksternal
16
- app.add_middleware(
17
- CORSMiddleware,
18
- allow_origins=["*"],
19
- allow_credentials=True,
20
- allow_methods=["*"],
21
- allow_headers=["*"],
22
- )
23
 
24
- # Konfigurasi Model
25
- MODEL_MAP = {
26
- "tinny-llama": "Lyon28/Tinny-Llama",
27
- "pythia": "Lyon28/Pythia",
28
- "bert-tinny": "Lyon28/Bert-Tinny",
29
- "albert-base-v2": "Lyon28/Albert-Base-V2",
30
- "t5-small": "Lyon28/T5-Small",
31
- "gpt-2": "Lyon28/GPT-2",
32
- "gpt-neo": "Lyon28/GPT-Neo",
33
- "distilbert-base-uncased": "Lyon28/Distilbert-Base-Uncased",
34
- "distil-gpt-2": "Lyon28/Distil_GPT-2",
35
- "gpt-2-tinny": "Lyon28/GPT-2-Tinny",
36
- "electra-small": "Lyon28/Electra-Small"
37
- }
38
-
39
- TASK_MAP = {
40
- "text-generation": ["gpt-2", "gpt-neo", "distil-gpt-2", "gpt-2-tinny", "tinny-llama", "pythia"],
41
- "text-classification": ["bert-tinny", "albert-base-v2", "distilbert-base-uncased", "electra-small"],
42
- "text2text-generation": ["t5-small"]
43
- }
44
-
45
- class InferenceRequest(BaseModel):
46
- text: str
47
- max_length: int = 100
48
- temperature: float = 0.9
49
- top_p: float = 0.95
50
-
51
- # Helper functions
52
- def get_task(model_id: str) -> str:
53
- for task, models in TASK_MAP.items():
54
- if model_id in models:
55
- return task
56
- return "text-generation"
57
-
58
- # Event startup untuk inisialisasi model
59
- @app.on_event("startup")
60
- async def load_models():
61
- app.state.pipelines = {}
62
- print("🟢 Semua model siap digunakan!")
63
-
64
- # Endpoint utama
65
- @app.get("/")
66
- async def root():
67
- return {
68
- "message": "Selamat datang di Lyon28 Model API",
69
- "endpoints": {
70
- "documentation": "/docs",
71
- "model_list": "/models",
72
- "health_check": "/health",
73
- "inference": "/inference/{model_id}"
74
- },
75
- "total_models": len(MODEL_MAP)
76
- }
77
-
78
- # Endpoint untuk list model
79
- @app.get("/models")
80
- async def list_models():
81
- return {
82
- "available_models": list(MODEL_MAP.keys()),
83
- "total_models": len(MODEL_MAP)
84
- }
85
-
86
- # Endpoint health check
87
- @app.get("/health")
88
- async def health_check():
89
- return {
90
- "status": "healthy",
91
- "gpu_available": torch.cuda.is_available(),
92
- "gpu_type": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU-only"
93
- }
94
-
95
- # Endpoint inference utama
96
- @app.post("/inference/{model_id}")
97
- async def model_inference(model_id: str, request: InferenceRequest):
98
- try:
99
- # Validasi model ID
100
- if model_id not in MODEL_MAP:
101
- raise HTTPException(
102
- status_code=404,
103
- detail=f"Model {model_id} tidak ditemukan. Cek /models untuk list model yang tersedia."
104
- )
105
-
106
- # Dapatkan task yang sesuai
107
- task = get_task(model_id)
108
-
109
- # Load model jika belum ada di memory
110
- if model_id not in app.state.pipelines:
111
- app.state.pipelines[model_id] = pipeline(
112
- task=task,
113
- model=MODEL_MAP[model_id],
114
- device=0 if torch.cuda.is_available() else -1,
115
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
116
- )
117
- print(f"✅ Model {model_id} berhasil dimuat!")
118
-
119
- pipe = app.state.pipelines[model_id]
120
-
121
- # Proses berdasarkan task
122
- if task == "text-generation":
123
- result = pipe(
124
- request.text,
125
- max_length=request.max_length,
126
- temperature=request.temperature,
127
- top_p=request.top_p
128
- )[0]['generated_text']
129
-
130
- elif task == "text-classification":
131
- output = pipe(request.text)[0]
132
- result = {
133
- "label": output['label'],
134
- "confidence": round(output['score'], 4)
135
- }
136
-
137
- elif task == "text2text-generation":
138
- result = pipe(
139
- request.text,
140
- max_length=request.max_length
141
- )[0]['generated_text']
142
-
143
- return {"result": result}
144
-
145
- except Exception as e:
146
- raise HTTPException(
147
- status_code=500,
148
- detail=f"Error processing request: {str(e)}"
149
- )
150
-
151
- if __name__ == "__main__":
152
- import uvicorn
153
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ FROM python:3.9-slim
 
 
 
 
 
2
 
3
+ WORKDIR /app
4
+ COPY . .
 
 
 
 
5
 
6
+ RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
 
 
 
7
 
8
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]