xuanzang commited on
Commit
adead6c
·
1 Parent(s): aed535a

Add initial Dockerfile, FastAPI application, and requirements

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. app.py +141 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, Form, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import JSONResponse
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.naive_bayes import CategoricalNB
7
+ from sklearn.preprocessing import LabelEncoder
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import confusion_matrix
10
+ import json
11
+ import io
12
+ from typing import Dict, List, Optional
13
+ from pydantic import BaseModel
14
+
15
+ app = FastAPI()
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ model = None
26
+ feature_encoders: Dict[str, LabelEncoder] = {}
27
+ target_encoder: Optional[LabelEncoder] = None
28
+
29
+ class TrainOptions(BaseModel):
30
+ target_column: str
31
+ feature_columns: List[str]
32
+
33
+ class PredictionFeatures(BaseModel):
34
+ features: Dict[str, str]
35
+
36
+ @app.get("/api/health")
37
+ async def health_check():
38
+ return {"status": "healthy"}
39
+
40
+ @app.post("/api/upload")
41
+ async def upload_csv(file: UploadFile):
42
+ if not file.filename.endswith('.csv'):
43
+ raise HTTPException(status_code=400, detail="Only CSV files are allowed")
44
+
45
+ try:
46
+ contents = await file.read()
47
+ df = pd.read_csv(io.StringIO(contents.decode()))
48
+
49
+ columns = df.columns.tolist()
50
+ column_types = {col: str(df[col].dtype) for col in columns}
51
+
52
+ unique_values = {col: df[col].unique().tolist() for col in columns}
53
+
54
+ for col, values in unique_values.items():
55
+ unique_values[col] = [v.item() if isinstance(v, np.generic) else v for v in values]
56
+
57
+ return {
58
+ "message": "File uploaded successfully",
59
+ "columns": columns,
60
+ "column_types": column_types,
61
+ "unique_values": unique_values,
62
+ "row_count": len(df)
63
+ }
64
+ except Exception as e:
65
+ raise HTTPException(status_code=500, detail=str(e))
66
+
67
+ @app.post("/api/train")
68
+ async def train_model(file: UploadFile, options: str = Form(...)):
69
+ global model, feature_encoders, target_encoder
70
+
71
+ try:
72
+ train_options = json.loads(options)
73
+ target_column = train_options["target_column"]
74
+ feature_columns = train_options["feature_columns"]
75
+
76
+ contents = await file.read()
77
+ df = pd.read_csv(io.StringIO(contents.decode()))
78
+
79
+ X = pd.DataFrame()
80
+ feature_encoders = {}
81
+ for column in feature_columns:
82
+ encoder = LabelEncoder()
83
+ X[column] = encoder.fit_transform(df[column])
84
+ feature_encoders[column] = encoder
85
+
86
+ target_encoder = LabelEncoder()
87
+ y = target_encoder.fit_transform(df[target_column])
88
+
89
+ X_train, X_test, y_train, y_test = train_test_split(
90
+ X, y, test_size=0.2, random_state=42
91
+ )
92
+
93
+ model = CategoricalNB()
94
+ model.fit(X_train, y_train)
95
+
96
+ accuracy = float(model.score(X_test, y_test))
97
+
98
+ return {
99
+ "message": "Model trained successfully",
100
+ "accuracy": accuracy,
101
+ "target_classes": target_encoder.classes_.tolist()
102
+ }
103
+
104
+ except Exception as e:
105
+ raise HTTPException(status_code=500, detail=str(e))
106
+
107
+ @app.post("/api/predict")
108
+ async def predict(features: PredictionFeatures):
109
+ global model, feature_encoders, target_encoder
110
+
111
+ if model is None:
112
+ raise HTTPException(status_code=400, detail="Model not trained yet")
113
+
114
+ try:
115
+ encoded_features = {}
116
+ for column, value in features.features.items():
117
+ if column in feature_encoders:
118
+ encoded_features[column] = feature_encoders[column].transform([value])[0]
119
+
120
+ X = pd.DataFrame([encoded_features])
121
+
122
+ prediction = model.predict(X)
123
+ prediction_proba = model.predict_proba(X)
124
+
125
+ predicted_class = target_encoder.inverse_transform(prediction)[0]
126
+
127
+ class_probabilities = {
128
+ target_encoder.inverse_transform([i])[0]: float(prob)
129
+ for i, prob in enumerate(prediction_proba[0])
130
+ }
131
+
132
+ return {
133
+ "prediction": predicted_class,
134
+ "probabilities": class_probabilities
135
+ }
136
+ except Exception as e:
137
+ raise HTTPException(status_code=500, detail=str(e))
138
+
139
+ if __name__ == "__main__":
140
+ import uvicorn
141
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ pandas
5
+ scikit-learn
6
+ numpy
7
+ matplotlib
8
+ gunicorn