hw01558 commited on
Commit
1fd311d
·
verified ·
1 Parent(s): 5d7b3b3

Add new pipelines and adjust labels

Browse files

Added new pipelines in PIPELINES
Made sure onto torch goes onto cpu (Unsure if this is needed)
Changed requestResults as new way of dealing with labels in training of models
Tried to add logs back in

Files changed (1) hide show
  1. app.py +62 -52
app.py CHANGED
@@ -2,48 +2,64 @@ from flask import Flask, render_template, request, redirect, url_for
2
  from joblib import load
3
  import pandas as pd
4
  import re
5
- from customFunctions import *
6
  import json
7
  import datetime
 
8
 
9
  pd.set_option('display.max_colwidth', 1000)
10
 
 
 
 
 
 
 
 
 
 
 
11
  PIPELINES = [
12
  {
13
  'id': 1,
14
  'name': 'Baseline',
15
- 'pipeline': load("pipeline_ex1_s1.joblib")
16
  },
17
  {
18
  'id': 2,
19
  'name': 'Trained on a FeedForward NN',
20
- 'pipeline': load("pipeline_ex1_s2.joblib")
21
  },
22
  {
23
  'id': 3,
24
  'name': 'Trained on a CRF',
25
- 'pipeline': load("pipeline_ex1_s3.joblib")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
- #{
28
- # 'id': 4,
29
- # 'name': 'Trained on a small dataset',
30
- # 'pipeline': load("pipeline_ex2_s1.joblib")
31
- #},
32
- #{
33
- # 'id': 5,
34
- # 'name': 'Trained on a large dataset',
35
- # 'pipeline': load("pipeline_ex2_s2.joblib")
36
- #},
37
- #{
38
- # 'id': 6,
39
- # 'name': 'Embedded using TFIDF',
40
- # 'pipeline': load("pipeline_ex3_s1.joblib")
41
- #},
42
- #{
43
- # 'id': 7,
44
- # 'name': 'Embedded using ?',
45
- # 'pipeline': load("pipeline_ex3_s2.joblib")
46
- #},
47
 
48
  ]
49
 
@@ -59,30 +75,29 @@ def get_name_by_id(pipelines, pipeline_id):
59
 
60
  def requestResults(text, pipeline):
61
  labels = pipeline.predict(text)
62
- print(labels.ndim)
63
- if labels.ndim != 1:
64
- flattened_predictions = []
65
- for sentence in labels:
66
- for tag in sentence:
67
- flattened_predictions.append(tag)
68
- labels = flattened_predictions
69
- print(labels)
70
- labels = [int(label) for label in labels]
71
- tag_encoder = LabelEncoder()
72
- tag_encoder.fit(['B-AC', 'O', 'B-LF', 'I-LF'])
73
- decoded_labels = tag_encoder.inverse_transform(labels)
74
- return decoded_labels
75
-
76
- LOG_FILE = "usage_log.jsonl" # Each line is a JSON object
77
 
78
- def log_interaction(user_input, model_name, predictions):
79
- print("====== Interaction Log ======")
80
- print("Timestamp:", datetime.datetime.utcnow().isoformat())
81
- print("Model:", model_name)
82
- print("Input:", user_input)
83
- print("Predictions:", predictions)
84
- print("=============================")
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  app = Flask(__name__)
@@ -108,10 +123,7 @@ def get_data():
108
  labels = requestResults(tokens_fomatted, pipeline)
109
  results = dict(zip(tokens, labels))
110
 
111
- print(f"[INFO] Model: {name}")
112
- print(f"[INFO] Input: {text}")
113
- print(f"[INFO] Output: {results}")
114
-
115
 
116
  return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
117
 
@@ -119,5 +131,3 @@ def get_data():
119
  if __name__ == '__main__':
120
  app.run(host="0.0.0.0", port=7860)
121
 
122
- #if __name__ == '__main__':
123
- #app.run(host="0.0.0.0", port=7860)
 
2
  from joblib import load
3
  import pandas as pd
4
  import re
5
+ from customFunctions2 import *
6
  import json
7
  import datetime
8
+ import numpy as np
9
 
10
  pd.set_option('display.max_colwidth', 1000)
11
 
12
+ import torch
13
+
14
+ # Patch torch.load to always load on CPU
15
+ original_torch_load = torch.load
16
+ def cpu_load(*args, **kwargs):
17
+ return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)
18
+
19
+ torch.load = cpu_load
20
+
21
+
22
  PIPELINES = [
23
  {
24
  'id': 1,
25
  'name': 'Baseline',
26
+ 'pipeline': load("pipelines/pipeline_ex1_s1.joblib")
27
  },
28
  {
29
  'id': 2,
30
  'name': 'Trained on a FeedForward NN',
31
+ 'pipeline': load("pipelines/pipeline_ex1_s2.joblib")
32
  },
33
  {
34
  'id': 3,
35
  'name': 'Trained on a CRF',
36
+ 'pipeline': load("pipelines/pipeline_ex1_s3.joblib")
37
+ },
38
+ {
39
+ 'id': 4,
40
+ 'name': 'Trained on a small dataset',
41
+ 'pipeline': load("pipelines/pipeline_ex2_s3.joblib")
42
+ },
43
+ {
44
+ 'id': 5,
45
+ 'name': 'Trained on a large dataset',
46
+ 'pipeline': load("pipelines/pipeline_ex2_s2.joblib")
47
+ },
48
+ {
49
+ 'id': 6,
50
+ 'name': 'Embedded using TFIDF',
51
+ 'pipeline': load("pipelines/pipeline_ex3_s2.joblib")
52
+ },
53
+ {
54
+ 'id': 7,
55
+ 'name': 'Embedded using GloVe',
56
+ 'pipeline': load("pipelines/pipeline_ex3_s3.joblib")
57
+ },
58
+ {
59
+ 'id': 8,
60
+ 'name': 'Embedded using Bio2Vec',
61
+ 'pipeline': load("pipelines/pipeline_ex3_s4.joblib")
62
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  ]
65
 
 
75
 
76
  def requestResults(text, pipeline):
77
  labels = pipeline.predict(text)
78
+ if isinstance(labels, np.ndarray):
79
+ labels = labels.tolist()
80
+ return labels[0]
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ import os
83
+
84
+ LOG_FILE = "/tmp/usage_log.jsonl" # Use temporary file path for Hugging Face Spaces
 
 
 
 
85
 
86
+ def log_interaction(user_input, model_name, predictions):
87
+ log_entry = {
88
+ "timestamp": datetime.datetime.utcnow().isoformat(),
89
+ "model": model_name,
90
+ "user_input": user_input,
91
+ "predictions": predictions
92
+ }
93
+
94
+ try:
95
+ os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) # Ensure the directory exists
96
+ with open(LOG_FILE, "a") as log_file:
97
+ log_file.write(json.dumps(log_entry) + "\n")
98
+ except Exception as e:
99
+ print(f"Error writing to log: {e}")
100
+ # You could also return a response with the error, or raise an error to stop the process
101
 
102
 
103
  app = Flask(__name__)
 
123
  labels = requestResults(tokens_fomatted, pipeline)
124
  results = dict(zip(tokens, labels))
125
 
126
+ log_interaction(text, name, results)
 
 
 
127
 
128
  return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
129
 
 
131
  if __name__ == '__main__':
132
  app.run(host="0.0.0.0", port=7860)
133