Spaces:

panduwana
/

interview-ai-detector

Running

App Files Files Community

Yakobus Iryanto Prasethio commited on May 3, 2024

Commit

68a5136

unverified ·

2 Parent(s): 1c1651d df00cec

Merge pull request #9 from YakobusIP/production

Browse files

Files changed (9) hide show

cloudbuild-endpoint.yaml +8 -19
core-model-prediction/Dockerfile +9 -2
core-model-prediction/hypothesis.py +12 -6
core-model-prediction/models/random_forest.joblib +0 -0
core-model-prediction/random_forest_model.py +2 -1
core-model-prediction/requirements.txt +3 -3
core-model-prediction/scalers/rf_scaler.joblib +0 -0
core-model-prediction/scalers/{torch-scaler-normalized-text-length.joblib → scaler-normalized-text-length.joblib} +0 -0
core-model-prediction/scalers/{torch-scaler-not-normalized.joblib → scaler-not-normalized.joblib} +0 -0

cloudbuild-endpoint.yaml CHANGED Viewed

@@ -2,7 +2,6 @@ steps:
   - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
     entrypoint: "bash"
     id: upload-model
-    timeout: 600s
     args:
       - "-c"
       - |
@@ -14,39 +13,29 @@ steps:
           --container-health-route="/health" \
           --display-name="interview-ai-detector-model"
-  - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
-    entrypoint: "bash"
-    id: fetch-model
-    waitFor: upload-model
-    timeout: 600s
-    args:
-      - "-c"
-      - |
-        MODEL_ID=$(gcloud ai models list --region=us-central1 --format="value(name)" | head -n 1)
-        echo MODEL_ID > /workspace/model_id.txt
   - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
     entrypoint: "bash"
     id: create-endpoint
-    waitFor: fetch-model
     args:
       - "-c"
       - |
-        ENDPOINT_ID=$(gcloud ai endpoints create \
           --region="us-central1" \
           --display-name="interview-ai-detector-endpoint" \
-          --format="value(name)")
-        echo ENDPOINT_ID > /workspace/endpoint_id.txt
   - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
     entrypoint: "bash"
-    waitFor: create-endpoint
     args:
       - "-c"
       - |
-        gcloud ai endpoints deploy-model "${$(cat /workspace/endpoint_id.txt)}" \
           --region="us-central1" \
-          --model="${$(cat /workspace/model_id.txt)}" \
           --display-name="interview-ai-detector-deployment" \
           --machine-type="n1-standard-4" \
           --accelerator="count=1,type=nvidia-tesla-t4" \

   - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
     entrypoint: "bash"
     id: upload-model
     args:
       - "-c"
       - |
           --container-health-route="/health" \
           --display-name="interview-ai-detector-model"
   - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
     entrypoint: "bash"
     id: create-endpoint
+    waitFor: ["upload-model"]
     args:
       - "-c"
       - |
+        gcloud ai endpoints create \
           --region="us-central1" \
           --display-name="interview-ai-detector-endpoint" \
+          --format="value(name)"
   - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
     entrypoint: "bash"
+    waitFor: ["create-endpoint"]
     args:
       - "-c"
       - |
+        _MODEL_ID=$(gcloud ai models list --region=us-central1 --format="value(name)" | head -n 1) \
+        _ENDPOINT_ID=$(gcloud ai endpoints list --region=us-central1 --format="value(name)" | head -n 1) \
+        gcloud ai endpoints deploy-model $_ENDPOINT_ID \
           --region="us-central1" \
+          --model=$_MODEL_ID \
           --display-name="interview-ai-detector-deployment" \
           --machine-type="n1-standard-4" \
           --accelerator="count=1,type=nvidia-tesla-t4" \

core-model-prediction/Dockerfile CHANGED Viewed

@@ -1,5 +1,5 @@
 # Use an official Python runtime as a base image
-FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel
 # Set the working directory in the container
 WORKDIR /app
@@ -8,7 +8,14 @@ WORKDIR /app
 COPY . /app
 # Install any needed packages specified in requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
 # Make port 8080 available to the world outside this container
 EXPOSE 8080

 # Use an official Python runtime as a base image
+FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
 # Set the working directory in the container
 WORKDIR /app
 COPY . /app
 # Install any needed packages specified in requirements.txt
+RUN apt-get update && apt-get install -y unzip \
+    && pip install --no-cache-dir -r requirements.txt
+# Download NLTK data
+RUN python -m nltk.downloader punkt wordnet averaged_perceptron_tagger
+# Unzip wordnet
+RUN unzip /root/nltk_data/corpora/wordnet.zip -d /root/nltk_data/corpora/
 # Make port 8080 available to the world outside this container
 EXPOSE 8080

core-model-prediction/hypothesis.py CHANGED Viewed

@@ -8,14 +8,12 @@ from collections import defaultdict
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 from gemma2b_dependencies import Gemma2BDependencies
 from string import punctuation
 class BaseModelHypothesis:
     def __init__(self):
-        nltk.download('punkt')
-        nltk.download('wordnet')
-        nltk.download('averaged_perceptron_tagger')
         self.analyzer = SentimentIntensityAnalyzer()
         self.lexicon_df = pd.read_csv(
             "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
@@ -64,7 +62,15 @@ class BaseModelHypothesis:
         not_normalized_features = self.calculate_not_normalized_features(text)
         all_features = normalized_text_length_features + not_normalized_features
         features_df = pd.DataFrame(
-            [all_features], columns=self.additional_feature_columns)
         # Scaling features
         features_df[self.features_normalized_text_length] = self.scaler_normalized_text_length.transform(
@@ -84,7 +90,7 @@ class BaseModelHypothesis:
         return features
     def calculate_not_normalized_features(self, text: str) -> List[float]:
-        sentiment_intensity = self.measure_sentiment_intensity(text)
         readability_scores = self.measure_readability(text)
         perplexity = [self.gemma2bdependencies.calculate_perplexity(text)]
         burstiness = [self.gemma2bdependencies.calculate_burstiness(text)]

 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 from gemma2b_dependencies import Gemma2BDependencies
 from string import punctuation
+import os
+import zipfile
 class BaseModelHypothesis:
     def __init__(self):
         self.analyzer = SentimentIntensityAnalyzer()
         self.lexicon_df = pd.read_csv(
             "https://storage.googleapis.com/interview-ai-detector/higher-accuracy-final-model/NRC-Emotion-Lexicon.csv")
         not_normalized_features = self.calculate_not_normalized_features(text)
         all_features = normalized_text_length_features + not_normalized_features
         features_df = pd.DataFrame(
+            [all_features], columns=[
+                "nn_ratio", "nns_ratio", "jj_ratio", "in_ratio", "dt_ratio", "vb_ratio", "prp_ratio", "rb_ratio",
+                "negative_emotion_proportions", "positive_emotion_proportions", "fear_emotion_proportions",
+                "anger_emotion_proportions", "trust_emotion_proportions", "sadness_emotion_proportions",
+                "disgust_emotion_proportions", "anticipation_emotion_proportions", "joy_emotion_proportions",
+                "surprise_emotion_proportions", "unique_words_ratio",
+                "compound_score", "gunning_fog", "smog_index", "dale_chall_score",
+                "perplexity", "burstiness"
+            ])
         # Scaling features
         features_df[self.features_normalized_text_length] = self.scaler_normalized_text_length.transform(
         return features
     def calculate_not_normalized_features(self, text: str) -> List[float]:
+        sentiment_intensity = [self.measure_sentiment_intensity(text)]
         readability_scores = self.measure_readability(text)
         perplexity = [self.gemma2bdependencies.calculate_perplexity(text)]
         burstiness = [self.gemma2bdependencies.calculate_burstiness(text)]

core-model-prediction/models/random_forest.joblib CHANGED Viewed

Binary files a/core-model-prediction/models/random_forest.joblib and b/core-model-prediction/models/random_forest.joblib differ

core-model-prediction/random_forest_model.py CHANGED Viewed

@@ -15,7 +15,8 @@ class RandomForestModel:
     def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
         features_df = pd.DataFrame([secondary_model_features], columns=[
                                    self.secondary_model_features])
-        features_df = self.scaler.transform(features_df)
         return features_df.values.astype(np.float32).reshape(1, -1)
     def predict(self, secondary_model_features: List[float]):

     def preprocess_input(self, secondary_model_features: List[float]) -> np.ndarray:
         features_df = pd.DataFrame([secondary_model_features], columns=[
                                    self.secondary_model_features])
+        features_df[self.secondary_model_features] = self.scaler.transform(
+            features_df[self.secondary_model_features])
         return features_df.values.astype(np.float32).reshape(1, -1)
     def predict(self, secondary_model_features: List[float]):

core-model-prediction/requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-nltk
-vaderSentiment
 pandas
-textstat
 scikit-learn==1.2.2
 transformers==4.38.2
 fastapi

+nltk==3.2.4
+vaderSentiment==3.3.2
 pandas
+textstat==0.7.3
 scikit-learn==1.2.2
 transformers==4.38.2
 fastapi

core-model-prediction/scalers/rf_scaler.joblib CHANGED Viewed

Binary files a/core-model-prediction/scalers/rf_scaler.joblib and b/core-model-prediction/scalers/rf_scaler.joblib differ

core-model-prediction/scalers/{torch-scaler-normalized-text-length.joblib → scaler-normalized-text-length.joblib} RENAMED Viewed

File without changes

core-model-prediction/scalers/{torch-scaler-not-normalized.joblib → scaler-not-normalized.joblib} RENAMED Viewed

File without changes