Spaces:
Sleeping
Sleeping
Add new pipelines and adjust labels (#3)
Browse files- Add new pipelines and adjust labels (1fd311db24e931dfd0c020ed6cd7adaa22f764bd)
Co-authored-by: H Wilks <[email protected]>
app.py
CHANGED
@@ -2,48 +2,64 @@ from flask import Flask, render_template, request, redirect, url_for
|
|
2 |
from joblib import load
|
3 |
import pandas as pd
|
4 |
import re
|
5 |
-
from
|
6 |
import json
|
7 |
import datetime
|
|
|
8 |
|
9 |
pd.set_option('display.max_colwidth', 1000)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
PIPELINES = [
|
12 |
{
|
13 |
'id': 1,
|
14 |
'name': 'Baseline',
|
15 |
-
'pipeline': load("pipeline_ex1_s1.joblib")
|
16 |
},
|
17 |
{
|
18 |
'id': 2,
|
19 |
'name': 'Trained on a FeedForward NN',
|
20 |
-
'pipeline': load("pipeline_ex1_s2.joblib")
|
21 |
},
|
22 |
{
|
23 |
'id': 3,
|
24 |
'name': 'Trained on a CRF',
|
25 |
-
'pipeline': load("pipeline_ex1_s3.joblib")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
},
|
27 |
-
#{
|
28 |
-
# 'id': 4,
|
29 |
-
# 'name': 'Trained on a small dataset',
|
30 |
-
# 'pipeline': load("pipeline_ex2_s1.joblib")
|
31 |
-
#},
|
32 |
-
#{
|
33 |
-
# 'id': 5,
|
34 |
-
# 'name': 'Trained on a large dataset',
|
35 |
-
# 'pipeline': load("pipeline_ex2_s2.joblib")
|
36 |
-
#},
|
37 |
-
#{
|
38 |
-
# 'id': 6,
|
39 |
-
# 'name': 'Embedded using TFIDF',
|
40 |
-
# 'pipeline': load("pipeline_ex3_s1.joblib")
|
41 |
-
#},
|
42 |
-
#{
|
43 |
-
# 'id': 7,
|
44 |
-
# 'name': 'Embedded using ?',
|
45 |
-
# 'pipeline': load("pipeline_ex3_s2.joblib")
|
46 |
-
#},
|
47 |
|
48 |
]
|
49 |
|
@@ -59,30 +75,29 @@ def get_name_by_id(pipelines, pipeline_id):
|
|
59 |
|
60 |
def requestResults(text, pipeline):
|
61 |
labels = pipeline.predict(text)
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
for sentence in labels:
|
66 |
-
for tag in sentence:
|
67 |
-
flattened_predictions.append(tag)
|
68 |
-
labels = flattened_predictions
|
69 |
-
print(labels)
|
70 |
-
labels = [int(label) for label in labels]
|
71 |
-
tag_encoder = LabelEncoder()
|
72 |
-
tag_encoder.fit(['B-AC', 'O', 'B-LF', 'I-LF'])
|
73 |
-
decoded_labels = tag_encoder.inverse_transform(labels)
|
74 |
-
return decoded_labels
|
75 |
-
|
76 |
-
LOG_FILE = "usage_log.jsonl" # Each line is a JSON object
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
print("Model:", model_name)
|
82 |
-
print("Input:", user_input)
|
83 |
-
print("Predictions:", predictions)
|
84 |
-
print("=============================")
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
|
88 |
app = Flask(__name__)
|
@@ -108,10 +123,7 @@ def get_data():
|
|
108 |
labels = requestResults(tokens_fomatted, pipeline)
|
109 |
results = dict(zip(tokens, labels))
|
110 |
|
111 |
-
|
112 |
-
print(f"[INFO] Input: {text}")
|
113 |
-
print(f"[INFO] Output: {results}")
|
114 |
-
|
115 |
|
116 |
return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
|
117 |
|
@@ -119,5 +131,3 @@ def get_data():
|
|
119 |
if __name__ == '__main__':
|
120 |
app.run(host="0.0.0.0", port=7860)
|
121 |
|
122 |
-
#if __name__ == '__main__':
|
123 |
-
#app.run(host="0.0.0.0", port=7860)
|
|
|
2 |
from joblib import load
|
3 |
import pandas as pd
|
4 |
import re
|
5 |
+
from customFunctions2 import *
|
6 |
import json
|
7 |
import datetime
|
8 |
+
import numpy as np
|
9 |
|
10 |
pd.set_option('display.max_colwidth', 1000)
|
11 |
|
12 |
+
import torch
|
13 |
+
|
14 |
+
# Patch torch.load to always load on CPU
|
15 |
+
original_torch_load = torch.load
|
16 |
+
def cpu_load(*args, **kwargs):
|
17 |
+
return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)
|
18 |
+
|
19 |
+
torch.load = cpu_load
|
20 |
+
|
21 |
+
|
22 |
PIPELINES = [
|
23 |
{
|
24 |
'id': 1,
|
25 |
'name': 'Baseline',
|
26 |
+
'pipeline': load("pipelines/pipeline_ex1_s1.joblib")
|
27 |
},
|
28 |
{
|
29 |
'id': 2,
|
30 |
'name': 'Trained on a FeedForward NN',
|
31 |
+
'pipeline': load("pipelines/pipeline_ex1_s2.joblib")
|
32 |
},
|
33 |
{
|
34 |
'id': 3,
|
35 |
'name': 'Trained on a CRF',
|
36 |
+
'pipeline': load("pipelines/pipeline_ex1_s3.joblib")
|
37 |
+
},
|
38 |
+
{
|
39 |
+
'id': 4,
|
40 |
+
'name': 'Trained on a small dataset',
|
41 |
+
'pipeline': load("pipelines/pipeline_ex2_s3.joblib")
|
42 |
+
},
|
43 |
+
{
|
44 |
+
'id': 5,
|
45 |
+
'name': 'Trained on a large dataset',
|
46 |
+
'pipeline': load("pipelines/pipeline_ex2_s2.joblib")
|
47 |
+
},
|
48 |
+
{
|
49 |
+
'id': 6,
|
50 |
+
'name': 'Embedded using TFIDF',
|
51 |
+
'pipeline': load("pipelines/pipeline_ex3_s2.joblib")
|
52 |
+
},
|
53 |
+
{
|
54 |
+
'id': 7,
|
55 |
+
'name': 'Embedded using GloVe',
|
56 |
+
'pipeline': load("pipelines/pipeline_ex3_s3.joblib")
|
57 |
+
},
|
58 |
+
{
|
59 |
+
'id': 8,
|
60 |
+
'name': 'Embedded using Bio2Vec',
|
61 |
+
'pipeline': load("pipelines/pipeline_ex3_s4.joblib")
|
62 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
]
|
65 |
|
|
|
75 |
|
76 |
def requestResults(text, pipeline):
|
77 |
labels = pipeline.predict(text)
|
78 |
+
if isinstance(labels, np.ndarray):
|
79 |
+
labels = labels.tolist()
|
80 |
+
return labels[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
import os
|
83 |
+
|
84 |
+
LOG_FILE = "/tmp/usage_log.jsonl" # Use temporary file path for Hugging Face Spaces
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
def log_interaction(user_input, model_name, predictions):
|
87 |
+
log_entry = {
|
88 |
+
"timestamp": datetime.datetime.utcnow().isoformat(),
|
89 |
+
"model": model_name,
|
90 |
+
"user_input": user_input,
|
91 |
+
"predictions": predictions
|
92 |
+
}
|
93 |
+
|
94 |
+
try:
|
95 |
+
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) # Ensure the directory exists
|
96 |
+
with open(LOG_FILE, "a") as log_file:
|
97 |
+
log_file.write(json.dumps(log_entry) + "\n")
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Error writing to log: {e}")
|
100 |
+
# You could also return a response with the error, or raise an error to stop the process
|
101 |
|
102 |
|
103 |
app = Flask(__name__)
|
|
|
123 |
labels = requestResults(tokens_fomatted, pipeline)
|
124 |
results = dict(zip(tokens, labels))
|
125 |
|
126 |
+
log_interaction(text, name, results)
|
|
|
|
|
|
|
127 |
|
128 |
return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
|
129 |
|
|
|
131 |
if __name__ == '__main__':
|
132 |
app.run(host="0.0.0.0", port=7860)
|
133 |
|
|
|
|