Spaces:
Running
Running
File size: 4,572 Bytes
dfb23a7 2a6472d dfb23a7 2a6472d dfb23a7 2a6472d dfb23a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
import torch
from fastapi.middleware.cors import CORSMiddleware
from typing import Dict, Any
# Inisialisasi aplikasi FastAPI
app = FastAPI(
title="Lyon28 Model Inference API",
description="API untuk mengakses 11 model machine learning",
version="1.0.0"
)
# Konfigurasi CORS untuk frontend eksternal
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Konfigurasi Model
MODEL_MAP = {
"tinny-llama": "Lyon28/Tinny-Llama",
"pythia": "Lyon28/Pythia",
"bert-tinny": "Lyon28/Bert-Tinny",
"albert-base-v2": "Lyon28/Albert-Base-V2",
"t5-small": "Lyon28/T5-Small",
"gpt-2": "Lyon28/GPT-2",
"gpt-neo": "Lyon28/GPT-Neo",
"distilbert-base-uncased": "Lyon28/Distilbert-Base-Uncased",
"distil-gpt-2": "Lyon28/Distil_GPT-2",
"gpt-2-tinny": "Lyon28/GPT-2-Tinny",
"electra-small": "Lyon28/Electra-Small"
}
TASK_MAP = {
"text-generation": ["gpt-2", "gpt-neo", "distil-gpt-2", "gpt-2-tinny", "tinny-llama", "pythia"],
"text-classification": ["bert-tinny", "albert-base-v2", "distilbert-base-uncased", "electra-small"],
"text2text-generation": ["t5-small"]
}
class InferenceRequest(BaseModel):
text: str
max_length: int = 100
temperature: float = 0.9
top_p: float = 0.95
# Helper functions
def get_task(model_id: str) -> str:
for task, models in TASK_MAP.items():
if model_id in models:
return task
return "text-generation"
# Event startup untuk inisialisasi model
@app.on_event("startup")
async def load_models():
app.state.pipelines = {}
print("🟢 Semua model siap digunakan!")
# Endpoint utama
@app.get("/")
async def root():
return {
"message": "Selamat datang di Lyon28 Model API",
"endpoints": {
"documentation": "/docs",
"model_list": "/models",
"health_check": "/health",
"inference": "/inference/{model_id}"
},
"total_models": len(MODEL_MAP)
}
# Endpoint untuk list model
@app.get("/models")
async def list_models():
return {
"available_models": list(MODEL_MAP.keys()),
"total_models": len(MODEL_MAP)
}
# Endpoint health check
@app.get("/health")
async def health_check():
return {
"status": "healthy",
"gpu_available": torch.cuda.is_available(),
"gpu_type": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU-only"
}
# Endpoint inference utama
@app.post("/inference/{model_id}")
async def model_inference(model_id: str, request: InferenceRequest):
try:
# Validasi model ID
if model_id not in MODEL_MAP:
raise HTTPException(
status_code=404,
detail=f"Model {model_id} tidak ditemukan. Cek /models untuk list model yang tersedia."
)
# Dapatkan task yang sesuai
task = get_task(model_id)
# Load model jika belum ada di memory
if model_id not in app.state.pipelines:
app.state.pipelines[model_id] = pipeline(
task=task,
model=MODEL_MAP[model_id],
device=0 if torch.cuda.is_available() else -1,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
print(f"✅ Model {model_id} berhasil dimuat!")
pipe = app.state.pipelines[model_id]
# Proses berdasarkan task
if task == "text-generation":
result = pipe(
request.text,
max_length=request.max_length,
temperature=request.temperature,
top_p=request.top_p
)[0]['generated_text']
elif task == "text-classification":
output = pipe(request.text)[0]
result = {
"label": output['label'],
"confidence": round(output['score'], 4)
}
elif task == "text2text-generation":
result = pipe(
request.text,
max_length=request.max_length
)[0]['generated_text']
return {"result": result}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error processing request: {str(e)}"
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |