bearking58 commited on
Commit
7a92e6c
·
1 Parent(s): cacd064

feat: switch to 4 features on 2nd model

Browse files
core-model-prediction/models/secondary_weights.joblib CHANGED
Binary files a/core-model-prediction/models/secondary_weights.joblib and b/core-model-prediction/models/secondary_weights.joblib differ
 
core-model-prediction/prediction.py CHANGED
@@ -56,8 +56,8 @@ def process_instance(data: PredictRequest):
56
  # Data preparation for 2nd model
57
  secondary_model_dependencies = SecondaryModelDependencies()
58
  secondary_model_features = secondary_model_dependencies.calculate_features(
59
- answer, main_model_probability, backspace_count, typing_duration,
60
- letter_click_counts, gpt35_answer, gpt4o_answer)
61
 
62
  # 2nd model prediction
63
  secondary_model = SecondaryModel()
@@ -67,13 +67,13 @@ def process_instance(data: PredictRequest):
67
  return {
68
  "predicted_class": "AI" if secondary_model_probability > 0.57 else "HUMAN",
69
  "main_model_probability": str(main_model_probability),
70
- "secondary_model_probability": secondary_model_probability,
71
  "confidence": get_confidence(main_model_probability, secondary_model_probability)
72
  }
73
 
74
 
75
  def get_confidence(main_model_output: float, secondary_model_output: int):
76
- threshold = 0.57
77
  if (main_model_output >= 0.8 and secondary_model_output >= threshold) or (main_model_output <= 0.2 and secondary_model_output <= 1 - threshold):
78
  return 'High Confidence'
79
  elif (0.5 < main_model_output < 0.8 and secondary_model_output >= threshold) or (0.2 < main_model_output <= 0.5 and secondary_model_output < threshold):
 
56
  # Data preparation for 2nd model
57
  secondary_model_dependencies = SecondaryModelDependencies()
58
  secondary_model_features = secondary_model_dependencies.calculate_features(
59
+ answer, main_model_probability, backspace_count,
60
+ letter_click_counts, gpt4o_answer)
61
 
62
  # 2nd model prediction
63
  secondary_model = SecondaryModel()
 
67
  return {
68
  "predicted_class": "AI" if secondary_model_probability > 0.57 else "HUMAN",
69
  "main_model_probability": str(main_model_probability),
70
+ "secondary_model_probability": str(secondary_model_probability),
71
  "confidence": get_confidence(main_model_probability, secondary_model_probability)
72
  }
73
 
74
 
75
  def get_confidence(main_model_output: float, secondary_model_output: int):
76
+ threshold = 0.54
77
  if (main_model_output >= 0.8 and secondary_model_output >= threshold) or (main_model_output <= 0.2 and secondary_model_output <= 1 - threshold):
78
  return 'High Confidence'
79
  elif (0.5 < main_model_output < 0.8 and secondary_model_output >= threshold) or (0.2 < main_model_output <= 0.5 and secondary_model_output < threshold):
core-model-prediction/scalers/secondary_scaler.joblib CHANGED
Binary files a/core-model-prediction/scalers/secondary_scaler.joblib and b/core-model-prediction/scalers/secondary_scaler.joblib differ
 
core-model-prediction/secondary_model.py CHANGED
@@ -9,8 +9,8 @@ class SecondaryModel:
9
  self.scaler = joblib.load("scalers/secondary_scaler.joblib")
10
  self.model = joblib.load("models/secondary_weights.joblib")
11
  self.secondary_model_features = [
12
- "machine_probability", "backspace_count_normalized", "typing_duration_normalized",
13
- "letter_discrepancy_normalized", "cosine_sim_gpt35", "cosine_sim_gpt4o"
14
  ]
15
 
16
  def preprocess_input(self, secondary_model_features: List[float]) -> pd.DataFrame:
 
9
  self.scaler = joblib.load("scalers/secondary_scaler.joblib")
10
  self.model = joblib.load("models/secondary_weights.joblib")
11
  self.secondary_model_features = [
12
+ "machine_probability", "backspace_count_normalized",
13
+ "letter_discrepancy_normalized", "cosine_sim_gpt4o"
14
  ]
15
 
16
  def preprocess_input(self, secondary_model_features: List[float]) -> pd.DataFrame:
core-model-prediction/secondary_model_dependencies.py CHANGED
@@ -7,21 +7,16 @@ class SecondaryModelDependencies:
7
  self.text_similarity_model = SentenceTransformer(
8
  'sentence-transformers/all-mpnet-base-v2')
9
 
10
- def calculate_features(self, answer: str, probability: float, backspace_count: int, typing_duration: int,
11
- letter_click_counts: dict[str, int], gpt35_answer: str, gpt4o_answer: str):
12
  backspace_count_normalized = backspace_count / len(answer)
13
- typing_duration_normalized = typing_duration / len(answer)
14
  letter_discrepancy = self.calculate_letter_discrepancy(
15
  answer, letter_click_counts)
16
-
17
- cosine_sim_gpt35 = self.calculate_similarity_gpt35(
18
- answer, gpt35_answer)
19
  cosine_sim_gpt4o = self.calculate_similarity_gpt4o(
20
  answer, gpt4o_answer)
21
 
22
  return [
23
- probability, backspace_count_normalized, typing_duration_normalized,
24
- letter_discrepancy, cosine_sim_gpt35, cosine_sim_gpt4o
25
  ]
26
 
27
  def calculate_letter_discrepancy(self, text: str, letter_click_counts: dict[str, int]):
@@ -39,14 +34,6 @@ class SecondaryModelDependencies:
39
 
40
  return discrepancy_ratio_normalized
41
 
42
- def calculate_similarity_gpt35(self, answer: str, gpt35_answer: str) -> float:
43
- embedding1 = self.text_similarity_model.encode(
44
- [answer], convert_to_tensor=True)
45
- embedding2 = self.text_similarity_model.encode(
46
- [gpt35_answer], convert_to_tensor=True)
47
- cosine_scores = util.cos_sim(embedding1, embedding2)
48
- return cosine_scores.item()
49
-
50
  def calculate_similarity_gpt4o(self, answer: str, gpt4o_answer: str) -> float:
51
  embedding1 = self.text_similarity_model.encode(
52
  [answer], convert_to_tensor=True)
 
7
  self.text_similarity_model = SentenceTransformer(
8
  'sentence-transformers/all-mpnet-base-v2')
9
 
10
+ def calculate_features(self, answer: str, probability: float, backspace_count: int,
11
+ letter_click_counts: dict[str, int], gpt4o_answer: str):
12
  backspace_count_normalized = backspace_count / len(answer)
 
13
  letter_discrepancy = self.calculate_letter_discrepancy(
14
  answer, letter_click_counts)
 
 
 
15
  cosine_sim_gpt4o = self.calculate_similarity_gpt4o(
16
  answer, gpt4o_answer)
17
 
18
  return [
19
+ probability, backspace_count_normalized, letter_discrepancy, cosine_sim_gpt4o
 
20
  ]
21
 
22
  def calculate_letter_discrepancy(self, text: str, letter_click_counts: dict[str, int]):
 
34
 
35
  return discrepancy_ratio_normalized
36
 
 
 
 
 
 
 
 
 
37
  def calculate_similarity_gpt4o(self, answer: str, gpt4o_answer: str) -> float:
38
  embedding1 = self.text_similarity_model.encode(
39
  [answer], convert_to_tensor=True)