Spaces:

Lyon28
/

AI-Character-Chat

Running

App Files Files Community

Lyon28 commited on Jun 1

Commit

7055a09

verified ·

1 Parent(s): d3d3301

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -19

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from pydantic import BaseModel
 from transformers import pipeline
 import torch
 from fastapi.middleware.cors import CORSMiddleware
-from typing import Dict, Any
 import os # Import os module
 # Inisialisasi aplikasi FastAPI
@@ -45,6 +45,7 @@ TASK_MAP = {
 class InferenceRequest(BaseModel):
     text: str
     max_length: int = 100
     temperature: float = 0.9
     top_p: float = 0.95
@@ -66,7 +67,6 @@ async def load_models():
     os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
     os.makedirs(os.environ['HF_HOME'], exist_ok=True)
 # Endpoint utama
 @app.get("/")
 async def root():
@@ -76,9 +76,14 @@ async def root():
             "documentation": "/docs",
             "model_list": "/models",
             "health_check": "/health",
-            "inference": "/inference/{model_id}"
         },
-        "total_models": len(MODEL_MAP)
     }
 # Endpoint untuk list model
@@ -98,18 +103,34 @@ async def health_check():
         "gpu_type": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU-only"
     }
-# Endpoint inference utama
 @app.post("/inference/{model_id}")
 async def model_inference(model_id: str, request: InferenceRequest):
     try:
         # Pastikan model_id dalam lowercase agar sesuai dengan MODEL_MAP
         model_id = model_id.lower()
         # Validasi model ID
         if model_id not in MODEL_MAP:
             raise HTTPException(
                 status_code=404,
-                detail=f"Model '{model_id}' tidak ditemukan. Cek /models untuk list model yang tersedia."
             )
         # Dapatkan task yang sesuai
@@ -124,13 +145,20 @@ async def model_inference(model_id: str, request: InferenceRequest):
             # Menyesuaikan dtype berdasarkan device
             dtype_to_use = torch.float16 if torch.cuda.is_available() else torch.float32
-            app.state.pipelines[model_id] = pipeline(
-                task=task,
-                model=MODEL_MAP[model_id],
-                device=device_to_use,
-                torch_dtype=dtype_to_use
-            )
-            print(f"✅ Model {model_id} berhasil dimuat!")
         pipe = app.state.pipelines[model_id]
@@ -140,7 +168,8 @@ async def model_inference(model_id: str, request: InferenceRequest):
                 request.text,
                 max_length=request.max_length,
                 temperature=request.temperature,
-                top_p=request.top_p
             )[0]['generated_text']
         elif task == "text-classification":
@@ -165,8 +194,16 @@ async def model_inference(model_id: str, request: InferenceRequest):
                 detail=f"Tugas ({task}) untuk model {model_id} tidak didukung atau tidak dikenali."
             )
-        return {"result": result}
     except Exception as e:
         # Log error lebih detail untuk debugging
         print(f"‼️ Error saat memproses model {model_id}: {e}")
@@ -178,7 +215,17 @@ async def model_inference(model_id: str, request: InferenceRequest):
             detail=f"Error processing request: {str(e)}. Cek log server untuk detail."
         )
-# Ini tidak perlu dijalankan secara langsung di Hugging Face Spaces karena Uvicorn akan menjalankannya
-# if __name__ == "__main__":
-#     import uvicorn
-#     uvicorn.run(app, host="0.0.0.0", port=7860)

 from transformers import pipeline
 import torch
 from fastapi.middleware.cors import CORSMiddleware
+from typing import Dict, Any, Optional
 import os # Import os module
 # Inisialisasi aplikasi FastAPI
 class InferenceRequest(BaseModel):
     text: str
+    model_id: Optional[str] = "gpt-2"  # Default model
     max_length: int = 100
     temperature: float = 0.9
     top_p: float = 0.95
     os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
     os.makedirs(os.environ['HF_HOME'], exist_ok=True)
 # Endpoint utama
 @app.get("/")
 async def root():
             "documentation": "/docs",
             "model_list": "/models",
             "health_check": "/health",
+            "inference_with_model": "/inference/{model_id}",
+            "inference_general": "/inference"
         },
+        "total_models": len(MODEL_MAP),
+        "usage_examples": {
+            "specific_model": "POST /inference/gpt-2 with JSON body",
+            "general_inference": "POST /inference with model_id in JSON body"
+        }
     }
 # Endpoint untuk list model
         "gpu_type": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU-only"
     }
+# NEW: General inference endpoint (handles POST /inference)
+@app.post("/inference")
+async def general_inference(request: InferenceRequest):
+    """
+    General inference endpoint that accepts model_id in the request body
+    """
+    return await process_inference(request.model_id, request)
+# Endpoint inference dengan model_id di path
 @app.post("/inference/{model_id}")
 async def model_inference(model_id: str, request: InferenceRequest):
+    """
+    Specific model inference endpoint with model_id in path
+    """
+    return await process_inference(model_id, request)
+# Shared inference processing function
+async def process_inference(model_id: str, request: InferenceRequest):
     try:
         # Pastikan model_id dalam lowercase agar sesuai dengan MODEL_MAP
         model_id = model_id.lower()
         # Validasi model ID
         if model_id not in MODEL_MAP:
+            available_models = ", ".join(MODEL_MAP.keys())
             raise HTTPException(
                 status_code=404,
+                detail=f"Model '{model_id}' tidak ditemukan. Model yang tersedia: {available_models}"
             )
         # Dapatkan task yang sesuai
             # Menyesuaikan dtype berdasarkan device
             dtype_to_use = torch.float16 if torch.cuda.is_available() else torch.float32
+            try:
+                app.state.pipelines[model_id] = pipeline(
+                    task=task,
+                    model=MODEL_MAP[model_id],
+                    device=device_to_use,
+                    torch_dtype=dtype_to_use
+                )
+                print(f"✅ Model {model_id} berhasil dimuat!")
+            except Exception as load_error:
+                print(f"❌ Gagal memuat model {model_id}: {load_error}")
+                raise HTTPException(
+                    status_code=503,
+                    detail=f"Gagal memuat model {model_id}. Coba lagi nanti."
+                )
         pipe = app.state.pipelines[model_id]
                 request.text,
                 max_length=request.max_length,
                 temperature=request.temperature,
+                top_p=request.top_p,
+                do_sample=True
             )[0]['generated_text']
         elif task == "text-classification":
                 detail=f"Tugas ({task}) untuk model {model_id} tidak didukung atau tidak dikenali."
             )
+        return {
+            "result": result,
+            "model_used": model_id,
+            "task": task,
+            "status": "success"
+        }
+    except HTTPException as he:
+        # Re-raise HTTP exceptions
+        raise he
     except Exception as e:
         # Log error lebih detail untuk debugging
         print(f"‼️ Error saat memproses model {model_id}: {e}")
             detail=f"Error processing request: {str(e)}. Cek log server untuk detail."
         )
+# Error handler untuk 404
+@app.exception_handler(404)
+async def not_found_handler(request, exc):
+    return {
+        "error": "Endpoint tidak ditemukan",
+        "available_endpoints": [
+            "GET /",
+            "GET /models",
+            "GET /health",
+            "POST /inference",
+            "POST /inference/{model_id}"
+        ],
+        "tip": "Gunakan /docs untuk dokumentasi lengkap"
+    }