Spaces:
Running
Running
Merge pull request #9 from YakobusIP/production
Browse files- cloudbuild-endpoint.yaml +8 -19
- core-model-prediction/Dockerfile +9 -2
- core-model-prediction/hypothesis.py +12 -6
- core-model-prediction/models/random_forest.joblib +0 -0
- core-model-prediction/random_forest_model.py +2 -1
- core-model-prediction/requirements.txt +3 -3
- core-model-prediction/scalers/rf_scaler.joblib +0 -0
- core-model-prediction/scalers/{torch-scaler-normalized-text-length.joblib → scaler-normalized-text-length.joblib} +0 -0
- core-model-prediction/scalers/{torch-scaler-not-normalized.joblib → scaler-not-normalized.joblib} +0 -0
cloudbuild-endpoint.yaml
CHANGED
@@ -2,7 +2,6 @@ steps:
|
|
2 |
- name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
|
3 |
entrypoint: "bash"
|
4 |
id: upload-model
|
5 |
-
timeout: 600s
|
6 |
args:
|
7 |
- "-c"
|
8 |
- |
|
@@ -14,39 +13,29 @@ steps:
|
|
14 |
--container-health-route="/health" \
|
15 |
--display-name="interview-ai-detector-model"
|
16 |
|
17 |
-
- name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
|
18 |
-
entrypoint: "bash"
|
19 |
-
id: fetch-model
|
20 |
-
waitFor: upload-model
|
21 |
-
timeout: 600s
|
22 |
-
args:
|
23 |
-
- "-c"
|
24 |
-
- |
|
25 |
-
MODEL_ID=$(gcloud ai models list --region=us-central1 --format="value(name)" | head -n 1)
|
26 |
-
echo MODEL_ID > /workspace/model_id.txt
|
27 |
-
|
28 |
- name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
|
29 |
entrypoint: "bash"
|
30 |
id: create-endpoint
|
31 |
-
waitFor:
|
32 |
args:
|
33 |
- "-c"
|
34 |
- |
|
35 |
-
|
36 |
--region="us-central1" \
|
37 |
--display-name="interview-ai-detector-endpoint" \
|
38 |
-
--format="value(name)"
|
39 |
-
echo ENDPOINT_ID > /workspace/endpoint_id.txt
|
40 |
|
41 |
- name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
|
42 |
entrypoint: "bash"
|
43 |
-
waitFor: create-endpoint
|
44 |
args:
|
45 |
- "-c"
|
46 |
- |
|
47 |
-
gcloud ai
|
|
|
|
|
48 |
--region="us-central1" \
|
49 |
-
--model
|
50 |
--display-name="interview-ai-detector-deployment" \
|
51 |
--machine-type="n1-standard-4" \
|
52 |
--accelerator="count=1,type=nvidia-tesla-t4" \
|
|
|
2 |
- name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
|
3 |
entrypoint: "bash"
|
4 |
id: upload-model
|
|
|
5 |
args:
|
6 |
- "-c"
|
7 |
- |
|
|
|
13 |
--container-health-route="/health" \
|
14 |
--display-name="interview-ai-detector-model"
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
- name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
|
17 |
entrypoint: "bash"
|
18 |
id: create-endpoint
|
19 |
+
waitFor: ["upload-model"]
|
20 |
args:
|
21 |
- "-c"
|
22 |
- |
|
23 |
+
gcloud ai endpoints create \
|
24 |
--region="us-central1" \
|
25 |
--display-name="interview-ai-detector-endpoint" \
|
26 |
+
--format="value(name)"
|
|
|
27 |
|
28 |
- name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
|
29 |
entrypoint: "bash"
|
30 |
+
waitFor: ["create-endpoint"]
|
31 |
args:
|
32 |
- "-c"
|
33 |
- |
|
34 |
+
_MODEL_ID=$(gcloud ai models list --region=us-central1 --format="value(name)" | head -n 1) \
|
35 |
+
_ENDPOINT_ID=$(gcloud ai endpoints list --region=us-central1 --format="value(name)" | head -n 1) \
|
36 |
+
gcloud ai endpoints deploy-model $_ENDPOINT_ID \
|
37 |
--region="us-central1" \
|
38 |
+
--model=$_MODEL_ID \
|
39 |
--display-name="interview-ai-detector-deployment" \
|
40 |
--machine-type="n1-standard-4" \
|
41 |
--accelerator="count=1,type=nvidia-tesla-t4" \
|
core-model-prediction/Dockerfile
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# Use an official Python runtime as a base image
|
2 |
-
FROM pytorch/pytorch:2.
|
3 |
|
4 |
# Set the working directory in the container
|
5 |
WORKDIR /app
|
@@ -8,7 +8,14 @@ WORKDIR /app
|
|
8 |
COPY . /app
|
9 |
|
10 |
# Install any needed packages specified in requirements.txt
|
11 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Make port 8080 available to the world outside this container
|
14 |
EXPOSE 8080
|
|
|
1 |
# Use an official Python runtime as a base image
|
2 |
+
FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
|
3 |
|
4 |
# Set the working directory in the container
|
5 |
WORKDIR /app
|
|
|
8 |
COPY . /app
|
9 |
|
10 |
# Install any needed packages specified in requirements.txt
|
11 |
+
RUN apt-get update && apt-get install -y unzip \
|
12 |
+
&& pip install --no-cache-dir -r requirements.txt
|
13 |
+
|
14 |
+
# Download NLTK data
|
15 |
+
RUN python -m nltk.downloader punkt wordnet averaged_perceptron_tagger
|
16 |
+
|
17 |
+
# Unzip wordnet
|
18 |
+
RUN unzip /root/nltk_data/corpora/wordnet.zip -d /root/nltk_data/corpora/
|
19 |
|
20 |
# Make port 8080 available to the world outside this container
|
21 |
EXPOSE 8080
|
core-model-prediction/hypothesis.py
CHANGED
@@ -8,14 +8,12 @@ from collections import defaultdict
|
|
8 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
9 |
from gemma2b_dependencies import Gemma2BDependencies
|
10 |
from string import punctuation
|
|
|
|
|
11 |
|
12 |
|
13 |
class BaseModelHypothesis:
|
14 |
def __init__(self):
|
15 |
-
nltk.download('punkt')
|
16 |
-
nltk.download('wordnet')
|
17 |
-
nltk.download('averaged_perceptron_tagger')
|
18 |
-
|
19 |
self.analyzer = SentimentIntensityAnalyzer()
|
20 |
self.lexicon_df = pd.read_csv(
|
21 |
"https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
|
@@ -64,7 +62,15 @@ class BaseModelHypothesis:
|
|
64 |
not_normalized_features = self.calculate_not_normalized_features(text)
|
65 |
all_features = normalized_text_length_features + not_normalized_features
|
66 |
features_df = pd.DataFrame(
|
67 |
-
[all_features], columns=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
# Scaling features
|
70 |
features_df[self.features_normalized_text_length] = self.scaler_normalized_text_length.transform(
|
@@ -84,7 +90,7 @@ class BaseModelHypothesis:
|
|
84 |
return features
|
85 |
|
86 |
def calculate_not_normalized_features(self, text: str) -> List[float]:
|
87 |
-
sentiment_intensity = self.measure_sentiment_intensity(text)
|
88 |
readability_scores = self.measure_readability(text)
|
89 |
perplexity = [self.gemma2bdependencies.calculate_perplexity(text)]
|
90 |
burstiness = [self.gemma2bdependencies.calculate_burstiness(text)]
|
|
|
8 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
9 |
from gemma2b_dependencies import Gemma2BDependencies
|
10 |
from string import punctuation
|
11 |
+
import os
|
12 |
+
import zipfile
|
13 |
|
14 |
|
15 |
class BaseModelHypothesis:
|
16 |
def __init__(self):
|
|
|
|
|
|
|
|
|
17 |
self.analyzer = SentimentIntensityAnalyzer()
|
18 |
self.lexicon_df = pd.read_csv(
|
19 |
"https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
|
|
|
62 |
not_normalized_features = self.calculate_not_normalized_features(text)
|
63 |
all_features = normalized_text_length_features + not_normalized_features
|
64 |
features_df = pd.DataFrame(
|
65 |
+
[all_features], columns=[
|
66 |
+
"nn_ratio", "nns_ratio", "jj_ratio", "in_ratio", "dt_ratio", "vb_ratio", "prp_ratio", "rb_ratio",
|
67 |
+
"negative_emotion_proportions", "positive_emotion_proportions", "fear_emotion_proportions",
|
68 |
+
"anger_emotion_proportions", "trust_emotion_proportions", "sadness_emotion_proportions",
|
69 |
+
"disgust_emotion_proportions", "anticipation_emotion_proportions", "joy_emotion_proportions",
|
70 |
+
"surprise_emotion_proportions", "unique_words_ratio",
|
71 |
+
"compound_score", "gunning_fog", "smog_index", "dale_chall_score",
|
72 |
+
"perplexity", "burstiness"
|
73 |
+
])
|
74 |
|
75 |
# Scaling features
|
76 |
features_df[self.features_normalized_text_length] = self.scaler_normalized_text_length.transform(
|
|
|
90 |
return features
|
91 |
|
92 |
def calculate_not_normalized_features(self, text: str) -> List[float]:
|
93 |
+
sentiment_intensity = [self.measure_sentiment_intensity(text)]
|
94 |
readability_scores = self.measure_readability(text)
|
95 |
perplexity = [self.gemma2bdependencies.calculate_perplexity(text)]
|
96 |
burstiness = [self.gemma2bdependencies.calculate_burstiness(text)]
|
core-model-prediction/models/random_forest.joblib
CHANGED
Binary files a/core-model-prediction/models/random_forest.joblib and b/core-model-prediction/models/random_forest.joblib differ
|
|
core-model-prediction/random_forest_model.py
CHANGED
@@ -15,7 +15,8 @@ class RandomForestModel:
|
|
15 |
def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
|
16 |
features_df = pd.DataFrame([secondary_model_features], columns=[
|
17 |
self.secondary_model_features])
|
18 |
-
features_df = self.scaler.transform(
|
|
|
19 |
return features_df.values.astype(np.float32).reshape(1, -1)
|
20 |
|
21 |
def predict(self, secondary_model_features: List[float]):
|
|
|
15 |
def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
|
16 |
features_df = pd.DataFrame([secondary_model_features], columns=[
|
17 |
self.secondary_model_features])
|
18 |
+
features_df[self.secondary_model_features] = self.scaler.transform(
|
19 |
+
features_df[self.secondary_model_features])
|
20 |
return features_df.values.astype(np.float32).reshape(1, -1)
|
21 |
|
22 |
def predict(self, secondary_model_features: List[float]):
|
core-model-prediction/requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
nltk
|
2 |
-
vaderSentiment
|
3 |
pandas
|
4 |
-
textstat
|
5 |
scikit-learn==1.2.2
|
6 |
transformers==4.38.2
|
7 |
fastapi
|
|
|
1 |
+
nltk==3.2.4
|
2 |
+
vaderSentiment==3.3.2
|
3 |
pandas
|
4 |
+
textstat==0.7.3
|
5 |
scikit-learn==1.2.2
|
6 |
transformers==4.38.2
|
7 |
fastapi
|
core-model-prediction/scalers/rf_scaler.joblib
CHANGED
Binary files a/core-model-prediction/scalers/rf_scaler.joblib and b/core-model-prediction/scalers/rf_scaler.joblib differ
|
|
core-model-prediction/scalers/{torch-scaler-normalized-text-length.joblib → scaler-normalized-text-length.joblib}
RENAMED
File without changes
|
core-model-prediction/scalers/{torch-scaler-not-normalized.joblib → scaler-not-normalized.joblib}
RENAMED
File without changes
|