Spaces:

xuanzang
/

nggox-fastapi

Sleeping

App Files Files Community

xuanzang commited on 24 days ago

Commit

d1a05cc

1 Parent(s): adead6c

Enhance FastAPI application with model training and prediction features, including detailed response models and health check endpoint.

Browse files

Files changed (1) hide show

app.py +158 -71

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI, UploadFile, Form, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 import pandas as pd
@@ -9,10 +9,14 @@ from sklearn.model_selection import train_test_split
 from sklearn.metrics import confusion_matrix
 import json
 import io
-from typing import Dict, List, Optional
-from pydantic import BaseModel
-app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
@@ -22,117 +26,200 @@ app.add_middleware(
     allow_headers=["*"],
 )
-model = None
-feature_encoders: Dict[str, LabelEncoder] = {}
-target_encoder: Optional[LabelEncoder] = None
 class TrainOptions(BaseModel):
-    target_column: str
-    feature_columns: List[str]
 class PredictionFeatures(BaseModel):
-    features: Dict[str, str]
-@app.get("/api/health")
 async def health_check():
     return {"status": "healthy"}
-@app.post("/api/upload")
-async def upload_csv(file: UploadFile):
     if not file.filename.endswith('.csv'):
         raise HTTPException(status_code=400, detail="Only CSV files are allowed")
     try:
         contents = await file.read()
         df = pd.read_csv(io.StringIO(contents.decode()))
         columns = df.columns.tolist()
         column_types = {col: str(df[col].dtype) for col in columns}
         unique_values = {col: df[col].unique().tolist() for col in columns}
         for col, values in unique_values.items():
             unique_values[col] = [v.item() if isinstance(v, np.generic) else v for v in values]
-        return {
-            "message": "File uploaded successfully",
-            "columns": columns,
-            "column_types": column_types,
-            "unique_values": unique_values,
-            "row_count": len(df)
-        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/train")
-async def train_model(file: UploadFile, options: str = Form(...)):
-    global model, feature_encoders, target_encoder
     try:
         train_options = json.loads(options)
         target_column = train_options["target_column"]
         feature_columns = train_options["feature_columns"]
         contents = await file.read()
         df = pd.read_csv(io.StringIO(contents.decode()))
         X = pd.DataFrame()
-        feature_encoders = {}
         for column in feature_columns:
             encoder = LabelEncoder()
             X[column] = encoder.fit_transform(df[column])
-            feature_encoders[column] = encoder
-        target_encoder = LabelEncoder()
-        y = target_encoder.fit_transform(df[target_column])
         X_train, X_test, y_train, y_test = train_test_split(
             X, y, test_size=0.2, random_state=42
         )
-        model = CategoricalNB()
-        model.fit(X_train, y_train)
-        accuracy = float(model.score(X_test, y_test))
-        return {
-            "message": "Model trained successfully",
-            "accuracy": accuracy,
-            "target_classes": target_encoder.classes_.tolist()
-        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/predict")
-async def predict(features: PredictionFeatures):
-    global model, feature_encoders, target_encoder
-    if model is None:
         raise HTTPException(status_code=400, detail="Model not trained yet")
     try:
         encoded_features = {}
         for column, value in features.features.items():
-            if column in feature_encoders:
-                encoded_features[column] = feature_encoders[column].transform([value])[0]
         X = pd.DataFrame([encoded_features])
-        prediction = model.predict(X)
-        prediction_proba = model.predict_proba(X)
-        predicted_class = target_encoder.inverse_transform(prediction)[0]
         class_probabilities = {
-            target_encoder.inverse_transform([i])[0]: float(prob)
             for i, prob in enumerate(prediction_proba[0])
         }
-        return {
-            "prediction": predicted_class,
-            "probabilities": class_probabilities
-        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

+from fastapi import FastAPI, UploadFile, Form, HTTPException, Depends, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 import pandas as pd
 from sklearn.metrics import confusion_matrix
 import json
 import io
+from typing import Dict, List, Optional, Any
+from pydantic import BaseModel, Field
+app = FastAPI(
+    title="Categorical Naive Bayes API",
+    description="API for uploading CSVs, training a Categorical Naive Bayes model, and making predictions.",
+    version="1.0.0"
+)
 app.add_middleware(
     CORSMiddleware,
     allow_headers=["*"],
 )
 class TrainOptions(BaseModel):
+    target_column: str = Field(..., description="The name of the target column.")
+    feature_columns: List[str] = Field(..., description="List of feature column names.")
 class PredictionFeatures(BaseModel):
+    features: Dict[str, str] = Field(..., description="Dictionary of feature values for prediction.")
+class UploadResponse(BaseModel):
+    message: str
+    columns: List[str]
+    column_types: Dict[str, str]
+    unique_values: Dict[str, List[Any]]
+    row_count: int
+class TrainResponse(BaseModel):
+    message: str
+    accuracy: float
+    target_classes: List[str]
+class PredictResponse(BaseModel):
+    prediction: str
+    probabilities: Dict[str, float]
+class ModelState:
+    def __init__(self):
+        self.model: Optional[CategoricalNB] = None
+        self.feature_encoders: Dict[str, LabelEncoder] = {}
+        self.target_encoder: Optional[LabelEncoder] = None
+        self.X_test: Optional[pd.DataFrame] = None
+        self.y_test: Optional[np.ndarray] = None
+model_state = ModelState()
+def get_model_state():
+    return model_state
+@app.get("/api/health", tags=["Health"], summary="Health Check", response_model=Dict[str, str])
 async def health_check():
+    """Check API health."""
     return {"status": "healthy"}
+@app.post("/api/upload", tags=["Data"], summary="Upload CSV File", response_model=UploadResponse, status_code=status.HTTP_200_OK)
+async def upload_csv(file: UploadFile) -> UploadResponse:
+    """Upload a CSV file and get metadata about its columns."""
     if not file.filename.endswith('.csv'):
         raise HTTPException(status_code=400, detail="Only CSV files are allowed")
     try:
         contents = await file.read()
         df = pd.read_csv(io.StringIO(contents.decode()))
         columns = df.columns.tolist()
         column_types = {col: str(df[col].dtype) for col in columns}
         unique_values = {col: df[col].unique().tolist() for col in columns}
         for col, values in unique_values.items():
             unique_values[col] = [v.item() if isinstance(v, np.generic) else v for v in values]
+        return UploadResponse(
+            message="File uploaded successfully",
+            columns=columns,
+            column_types=column_types,
+            unique_values=unique_values,
+            row_count=len(df)
+        )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/train", tags=["Model"], summary="Train Model", response_model=TrainResponse, status_code=status.HTTP_200_OK)
+async def train_model(
+    file: UploadFile,
+    options: str = Form(...),
+    state: ModelState = Depends(get_model_state)
+) -> TrainResponse:
+    """Train a Categorical Naive Bayes model on the uploaded CSV."""
     try:
         train_options = json.loads(options)
         target_column = train_options["target_column"]
         feature_columns = train_options["feature_columns"]
         contents = await file.read()
         df = pd.read_csv(io.StringIO(contents.decode()))
         X = pd.DataFrame()
+        state.feature_encoders = {}
         for column in feature_columns:
             encoder = LabelEncoder()
             X[column] = encoder.fit_transform(df[column])
+            state.feature_encoders[column] = encoder
+        state.target_encoder = LabelEncoder()
+        y = state.target_encoder.fit_transform(df[target_column])
         X_train, X_test, y_train, y_test = train_test_split(
             X, y, test_size=0.2, random_state=42
         )
+        state.model = CategoricalNB()
+        state.model.fit(X_train, y_train)
+        accuracy = float(state.model.score(X_test, y_test))
+        state.X_test = X_test
+        state.y_test = y_test
+        return TrainResponse(
+            message="Model trained successfully",
+            accuracy=accuracy,
+            target_classes=list(state.target_encoder.classes_)
+        )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/predict", tags=["Model"], summary="Predict", response_model=PredictResponse, status_code=status.HTTP_200_OK)
+async def predict(
+    features: PredictionFeatures,
+    state: ModelState = Depends(get_model_state)
+) -> PredictResponse:
+    """Predict the target class for given features using the trained model."""
+    if state.model is None:
         raise HTTPException(status_code=400, detail="Model not trained yet")
     try:
         encoded_features = {}
         for column, value in features.features.items():
+            if column in state.feature_encoders:
+                encoded_features[column] = state.feature_encoders[column].transform([value])[0]
         X = pd.DataFrame([encoded_features])
+        prediction = state.model.predict(X)
+        prediction_proba = state.model.predict_proba(X)
+        predicted_class = state.target_encoder.inverse_transform(prediction)[0]
         class_probabilities = {
+            state.target_encoder.inverse_transform([i])[0]: float(prob)
             for i, prob in enumerate(prediction_proba[0])
         }
+        return PredictResponse(
+            prediction=predicted_class,
+            probabilities=class_probabilities
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+from fastapi.responses import StreamingResponse
+import matplotlib.pyplot as plt
+@app.get("/api/plot/confusion-matrix", tags=["Model"], summary="Confusion Matrix Plot")
+async def plot_confusion_matrix(state: ModelState = Depends(get_model_state)):
+    """Return a PNG image of the confusion matrix for the last test set."""
+    if state.model is None or state.X_test is None or state.y_test is None:
+        raise HTTPException(status_code=400, detail="Model not trained or no test data available.")
+    y_pred = state.model.predict(state.X_test)
+    cm = confusion_matrix(state.y_test, y_pred)
+    fig, ax = plt.subplots(figsize=(5, 4))
+    cax = ax.matshow(cm, cmap=plt.cm.Blues)
+    plt.title('Confusion Matrix')
+    plt.xlabel('Predicted')
+    plt.ylabel('Actual')
+    plt.colorbar(cax)
+    classes = state.target_encoder.classes_ if state.target_encoder else []
+    ax.set_xticks(np.arange(len(classes)))
+    ax.set_yticks(np.arange(len(classes)))
+    ax.set_xticklabels(classes, rotation=45, ha="left")
+    ax.set_yticklabels(classes)
+    for (i, j), z in np.ndenumerate(cm):
+        ax.text(j, i, str(z), ha='center', va='center', color='red')
+    plt.tight_layout()
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    plt.close(fig)
+    buf.seek(0)
+    return StreamingResponse(buf, media_type="image/png")
+@app.get("/api/plot/feature-log-prob", tags=["Model"], summary="Feature Log Probability Heatmap")
+async def plot_feature_log_prob(state: ModelState = Depends(get_model_state)):
+    """Return a PNG heatmap of feature log probabilities for each class."""
+    if state.model is None or state.target_encoder is None:
+        raise HTTPException(status_code=400, detail="Model not trained.")
+    try:
+        import matplotlib.pyplot as plt
+        import seaborn as sns
+        feature_names = list(state.feature_encoders.keys())
+        class_names = list(state.target_encoder.classes_)
+        # CategoricalNB: feature_log_prob_ shape (n_classes, n_features, n_categories)
+        # We'll plot for each feature, the log prob for each class and each value
+        fig, axes = plt.subplots(len(feature_names), 1, figsize=(8, 4 * len(feature_names)))
+        if len(feature_names) == 1:
+            axes = [axes]
+        for idx, feature in enumerate(feature_names):
+            encoder = state.feature_encoders[feature]
+            categories = encoder.classes_
+            data = []
+            for class_idx, class_name in enumerate(class_names):
+                # For each class, get the log prob for each value of this feature
+                log_probs = state.model.feature_log_prob_[class_idx, idx, :]
+                data.append(log_probs)
+            data = np.array(data)
+            ax = axes[idx]
+            sns.heatmap(data, annot=True, fmt=".2f", cmap="Blues", xticklabels=categories, yticklabels=class_names, ax=ax)
+            ax.set_title(f'Log Probabilities for Feature: {feature}')
+            ax.set_xlabel('Feature Value')
+            ax.set_ylabel('Class')
+        plt.tight_layout()
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png')
+        plt.close(fig)
+        buf.seek(0)
+        return StreamingResponse(buf, media_type="image/png")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))