Add new pipelines and adjust labels

#3
by hw01558 - opened
Files changed (1) hide show
  1. app.py +62 -52
app.py CHANGED
@@ -2,48 +2,64 @@ from flask import Flask, render_template, request, redirect, url_for
2
  from joblib import load
3
  import pandas as pd
4
  import re
5
- from customFunctions import *
6
  import json
7
  import datetime
 
8
 
9
  pd.set_option('display.max_colwidth', 1000)
10
 
 
 
 
 
 
 
 
 
 
 
11
  PIPELINES = [
12
  {
13
  'id': 1,
14
  'name': 'Baseline',
15
- 'pipeline': load("pipeline_ex1_s1.joblib")
16
  },
17
  {
18
  'id': 2,
19
  'name': 'Trained on a FeedForward NN',
20
- 'pipeline': load("pipeline_ex1_s2.joblib")
21
  },
22
  {
23
  'id': 3,
24
  'name': 'Trained on a CRF',
25
- 'pipeline': load("pipeline_ex1_s3.joblib")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
- #{
28
- # 'id': 4,
29
- # 'name': 'Trained on a small dataset',
30
- # 'pipeline': load("pipeline_ex2_s1.joblib")
31
- #},
32
- #{
33
- # 'id': 5,
34
- # 'name': 'Trained on a large dataset',
35
- # 'pipeline': load("pipeline_ex2_s2.joblib")
36
- #},
37
- #{
38
- # 'id': 6,
39
- # 'name': 'Embedded using TFIDF',
40
- # 'pipeline': load("pipeline_ex3_s1.joblib")
41
- #},
42
- #{
43
- # 'id': 7,
44
- # 'name': 'Embedded using ?',
45
- # 'pipeline': load("pipeline_ex3_s2.joblib")
46
- #},
47
 
48
  ]
49
 
@@ -59,30 +75,29 @@ def get_name_by_id(pipelines, pipeline_id):
59
 
60
  def requestResults(text, pipeline):
61
  labels = pipeline.predict(text)
62
- print(labels.ndim)
63
- if labels.ndim != 1:
64
- flattened_predictions = []
65
- for sentence in labels:
66
- for tag in sentence:
67
- flattened_predictions.append(tag)
68
- labels = flattened_predictions
69
- print(labels)
70
- labels = [int(label) for label in labels]
71
- tag_encoder = LabelEncoder()
72
- tag_encoder.fit(['B-AC', 'O', 'B-LF', 'I-LF'])
73
- decoded_labels = tag_encoder.inverse_transform(labels)
74
- return decoded_labels
75
-
76
- LOG_FILE = "usage_log.jsonl" # Each line is a JSON object
77
 
78
- def log_interaction(user_input, model_name, predictions):
79
- print("====== Interaction Log ======")
80
- print("Timestamp:", datetime.datetime.utcnow().isoformat())
81
- print("Model:", model_name)
82
- print("Input:", user_input)
83
- print("Predictions:", predictions)
84
- print("=============================")
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  app = Flask(__name__)
@@ -108,10 +123,7 @@ def get_data():
108
  labels = requestResults(tokens_fomatted, pipeline)
109
  results = dict(zip(tokens, labels))
110
 
111
- print(f"[INFO] Model: {name}")
112
- print(f"[INFO] Input: {text}")
113
- print(f"[INFO] Output: {results}")
114
-
115
 
116
  return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
117
 
@@ -119,5 +131,3 @@ def get_data():
119
  if __name__ == '__main__':
120
  app.run(host="0.0.0.0", port=7860)
121
 
122
- #if __name__ == '__main__':
123
- #app.run(host="0.0.0.0", port=7860)
 
2
  from joblib import load
3
  import pandas as pd
4
  import re
5
+ from customFunctions2 import *
6
  import json
7
  import datetime
8
+ import numpy as np
9
 
10
  pd.set_option('display.max_colwidth', 1000)
11
 
12
+ import torch
13
+
14
+ # Patch torch.load to always load on CPU
15
+ original_torch_load = torch.load
16
+ def cpu_load(*args, **kwargs):
17
+ return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)
18
+
19
+ torch.load = cpu_load
20
+
21
+
22
  PIPELINES = [
23
  {
24
  'id': 1,
25
  'name': 'Baseline',
26
+ 'pipeline': load("pipelines/pipeline_ex1_s1.joblib")
27
  },
28
  {
29
  'id': 2,
30
  'name': 'Trained on a FeedForward NN',
31
+ 'pipeline': load("pipelines/pipeline_ex1_s2.joblib")
32
  },
33
  {
34
  'id': 3,
35
  'name': 'Trained on a CRF',
36
+ 'pipeline': load("pipelines/pipeline_ex1_s3.joblib")
37
+ },
38
+ {
39
+ 'id': 4,
40
+ 'name': 'Trained on a small dataset',
41
+ 'pipeline': load("pipelines/pipeline_ex2_s3.joblib")
42
+ },
43
+ {
44
+ 'id': 5,
45
+ 'name': 'Trained on a large dataset',
46
+ 'pipeline': load("pipelines/pipeline_ex2_s2.joblib")
47
+ },
48
+ {
49
+ 'id': 6,
50
+ 'name': 'Embedded using TFIDF',
51
+ 'pipeline': load("pipelines/pipeline_ex3_s2.joblib")
52
+ },
53
+ {
54
+ 'id': 7,
55
+ 'name': 'Embedded using GloVe',
56
+ 'pipeline': load("pipelines/pipeline_ex3_s3.joblib")
57
+ },
58
+ {
59
+ 'id': 8,
60
+ 'name': 'Embedded using Bio2Vec',
61
+ 'pipeline': load("pipelines/pipeline_ex3_s4.joblib")
62
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  ]
65
 
 
75
 
76
  def requestResults(text, pipeline):
77
  labels = pipeline.predict(text)
78
+ if isinstance(labels, np.ndarray):
79
+ labels = labels.tolist()
80
+ return labels[0]
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ import os
83
+
84
+ LOG_FILE = "/tmp/usage_log.jsonl" # Use temporary file path for Hugging Face Spaces
 
 
 
 
85
 
86
+ def log_interaction(user_input, model_name, predictions):
87
+ log_entry = {
88
+ "timestamp": datetime.datetime.utcnow().isoformat(),
89
+ "model": model_name,
90
+ "user_input": user_input,
91
+ "predictions": predictions
92
+ }
93
+
94
+ try:
95
+ os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) # Ensure the directory exists
96
+ with open(LOG_FILE, "a") as log_file:
97
+ log_file.write(json.dumps(log_entry) + "\n")
98
+ except Exception as e:
99
+ print(f"Error writing to log: {e}")
100
+ # You could also return a response with the error, or raise an error to stop the process
101
 
102
 
103
  app = Flask(__name__)
 
123
  labels = requestResults(tokens_fomatted, pipeline)
124
  results = dict(zip(tokens, labels))
125
 
126
+ log_interaction(text, name, results)
 
 
 
127
 
128
  return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
129
 
 
131
  if __name__ == '__main__':
132
  app.run(host="0.0.0.0", port=7860)
133