Yakobus Iryanto Prasethio commited on
Commit
68a5136
·
unverified ·
2 Parent(s): 1c1651d df00cec

Merge pull request #9 from YakobusIP/production

Browse files
cloudbuild-endpoint.yaml CHANGED
@@ -2,7 +2,6 @@ steps:
2
  - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
3
  entrypoint: "bash"
4
  id: upload-model
5
- timeout: 600s
6
  args:
7
  - "-c"
8
  - |
@@ -14,39 +13,29 @@ steps:
14
  --container-health-route="/health" \
15
  --display-name="interview-ai-detector-model"
16
 
17
- - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
18
- entrypoint: "bash"
19
- id: fetch-model
20
- waitFor: upload-model
21
- timeout: 600s
22
- args:
23
- - "-c"
24
- - |
25
- MODEL_ID=$(gcloud ai models list --region=us-central1 --format="value(name)" | head -n 1)
26
- echo MODEL_ID > /workspace/model_id.txt
27
-
28
  - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
29
  entrypoint: "bash"
30
  id: create-endpoint
31
- waitFor: fetch-model
32
  args:
33
  - "-c"
34
  - |
35
- ENDPOINT_ID=$(gcloud ai endpoints create \
36
  --region="us-central1" \
37
  --display-name="interview-ai-detector-endpoint" \
38
- --format="value(name)")
39
- echo ENDPOINT_ID > /workspace/endpoint_id.txt
40
 
41
  - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
42
  entrypoint: "bash"
43
- waitFor: create-endpoint
44
  args:
45
  - "-c"
46
  - |
47
- gcloud ai endpoints deploy-model "${$(cat /workspace/endpoint_id.txt)}" \
 
 
48
  --region="us-central1" \
49
- --model="${$(cat /workspace/model_id.txt)}" \
50
  --display-name="interview-ai-detector-deployment" \
51
  --machine-type="n1-standard-4" \
52
  --accelerator="count=1,type=nvidia-tesla-t4" \
 
2
  - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
3
  entrypoint: "bash"
4
  id: upload-model
 
5
  args:
6
  - "-c"
7
  - |
 
13
  --container-health-route="/health" \
14
  --display-name="interview-ai-detector-model"
15
 
 
 
 
 
 
 
 
 
 
 
 
16
  - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
17
  entrypoint: "bash"
18
  id: create-endpoint
19
+ waitFor: ["upload-model"]
20
  args:
21
  - "-c"
22
  - |
23
+ gcloud ai endpoints create \
24
  --region="us-central1" \
25
  --display-name="interview-ai-detector-endpoint" \
26
+ --format="value(name)"
 
27
 
28
  - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
29
  entrypoint: "bash"
30
+ waitFor: ["create-endpoint"]
31
  args:
32
  - "-c"
33
  - |
34
+ _MODEL_ID=$(gcloud ai models list --region=us-central1 --format="value(name)" | head -n 1) \
35
+ _ENDPOINT_ID=$(gcloud ai endpoints list --region=us-central1 --format="value(name)" | head -n 1) \
36
+ gcloud ai endpoints deploy-model $_ENDPOINT_ID \
37
  --region="us-central1" \
38
+ --model=$_MODEL_ID \
39
  --display-name="interview-ai-detector-deployment" \
40
  --machine-type="n1-standard-4" \
41
  --accelerator="count=1,type=nvidia-tesla-t4" \
core-model-prediction/Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Use an official Python runtime as a base image
2
- FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel
3
 
4
  # Set the working directory in the container
5
  WORKDIR /app
@@ -8,7 +8,14 @@ WORKDIR /app
8
  COPY . /app
9
 
10
  # Install any needed packages specified in requirements.txt
11
- RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
 
 
 
12
 
13
  # Make port 8080 available to the world outside this container
14
  EXPOSE 8080
 
1
  # Use an official Python runtime as a base image
2
+ FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
3
 
4
  # Set the working directory in the container
5
  WORKDIR /app
 
8
  COPY . /app
9
 
10
  # Install any needed packages specified in requirements.txt
11
+ RUN apt-get update && apt-get install -y unzip \
12
+ && pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Download NLTK data
15
+ RUN python -m nltk.downloader punkt wordnet averaged_perceptron_tagger
16
+
17
+ # Unzip wordnet
18
+ RUN unzip /root/nltk_data/corpora/wordnet.zip -d /root/nltk_data/corpora/
19
 
20
  # Make port 8080 available to the world outside this container
21
  EXPOSE 8080
core-model-prediction/hypothesis.py CHANGED
@@ -8,14 +8,12 @@ from collections import defaultdict
8
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
  from gemma2b_dependencies import Gemma2BDependencies
10
  from string import punctuation
 
 
11
 
12
 
13
  class BaseModelHypothesis:
14
  def __init__(self):
15
- nltk.download('punkt')
16
- nltk.download('wordnet')
17
- nltk.download('averaged_perceptron_tagger')
18
-
19
  self.analyzer = SentimentIntensityAnalyzer()
20
  self.lexicon_df = pd.read_csv(
21
  "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
@@ -64,7 +62,15 @@ class BaseModelHypothesis:
64
  not_normalized_features = self.calculate_not_normalized_features(text)
65
  all_features = normalized_text_length_features + not_normalized_features
66
  features_df = pd.DataFrame(
67
- [all_features], columns=self.additional_feature_columns)
 
 
 
 
 
 
 
 
68
 
69
  # Scaling features
70
  features_df[self.features_normalized_text_length] = self.scaler_normalized_text_length.transform(
@@ -84,7 +90,7 @@ class BaseModelHypothesis:
84
  return features
85
 
86
  def calculate_not_normalized_features(self, text: str) -> List[float]:
87
- sentiment_intensity = self.measure_sentiment_intensity(text)
88
  readability_scores = self.measure_readability(text)
89
  perplexity = [self.gemma2bdependencies.calculate_perplexity(text)]
90
  burstiness = [self.gemma2bdependencies.calculate_burstiness(text)]
 
8
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
  from gemma2b_dependencies import Gemma2BDependencies
10
  from string import punctuation
11
+ import os
12
+ import zipfile
13
 
14
 
15
  class BaseModelHypothesis:
16
  def __init__(self):
 
 
 
 
17
  self.analyzer = SentimentIntensityAnalyzer()
18
  self.lexicon_df = pd.read_csv(
19
  "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
 
62
  not_normalized_features = self.calculate_not_normalized_features(text)
63
  all_features = normalized_text_length_features + not_normalized_features
64
  features_df = pd.DataFrame(
65
+ [all_features], columns=[
66
+ "nn_ratio", "nns_ratio", "jj_ratio", "in_ratio", "dt_ratio", "vb_ratio", "prp_ratio", "rb_ratio",
67
+ "negative_emotion_proportions", "positive_emotion_proportions", "fear_emotion_proportions",
68
+ "anger_emotion_proportions", "trust_emotion_proportions", "sadness_emotion_proportions",
69
+ "disgust_emotion_proportions", "anticipation_emotion_proportions", "joy_emotion_proportions",
70
+ "surprise_emotion_proportions", "unique_words_ratio",
71
+ "compound_score", "gunning_fog", "smog_index", "dale_chall_score",
72
+ "perplexity", "burstiness"
73
+ ])
74
 
75
  # Scaling features
76
  features_df[self.features_normalized_text_length] = self.scaler_normalized_text_length.transform(
 
90
  return features
91
 
92
  def calculate_not_normalized_features(self, text: str) -> List[float]:
93
+ sentiment_intensity = [self.measure_sentiment_intensity(text)]
94
  readability_scores = self.measure_readability(text)
95
  perplexity = [self.gemma2bdependencies.calculate_perplexity(text)]
96
  burstiness = [self.gemma2bdependencies.calculate_burstiness(text)]
core-model-prediction/models/random_forest.joblib CHANGED
Binary files a/core-model-prediction/models/random_forest.joblib and b/core-model-prediction/models/random_forest.joblib differ
 
core-model-prediction/random_forest_model.py CHANGED
@@ -15,7 +15,8 @@ class RandomForestModel:
15
  def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
16
  features_df = pd.DataFrame([secondary_model_features], columns=[
17
  self.secondary_model_features])
18
- features_df = self.scaler.transform(features_df)
 
19
  return features_df.values.astype(np.float32).reshape(1, -1)
20
 
21
  def predict(self, secondary_model_features: List[float]):
 
15
  def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
16
  features_df = pd.DataFrame([secondary_model_features], columns=[
17
  self.secondary_model_features])
18
+ features_df[self.secondary_model_features] = self.scaler.transform(
19
+ features_df[self.secondary_model_features])
20
  return features_df.values.astype(np.float32).reshape(1, -1)
21
 
22
  def predict(self, secondary_model_features: List[float]):
core-model-prediction/requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- nltk
2
- vaderSentiment
3
  pandas
4
- textstat
5
  scikit-learn==1.2.2
6
  transformers==4.38.2
7
  fastapi
 
1
+ nltk==3.2.4
2
+ vaderSentiment==3.3.2
3
  pandas
4
+ textstat==0.7.3
5
  scikit-learn==1.2.2
6
  transformers==4.38.2
7
  fastapi
core-model-prediction/scalers/rf_scaler.joblib CHANGED
Binary files a/core-model-prediction/scalers/rf_scaler.joblib and b/core-model-prediction/scalers/rf_scaler.joblib differ
 
core-model-prediction/scalers/{torch-scaler-normalized-text-length.joblib → scaler-normalized-text-length.joblib} RENAMED
File without changes
core-model-prediction/scalers/{torch-scaler-not-normalized.joblib → scaler-not-normalized.joblib} RENAMED
File without changes