Spaces:
Sleeping
Sleeping
Add new pipelines and adjust labels
Browse filesAdded new pipelines in PIPELINES
Made sure onto torch goes onto cpu (Unsure if this is needed)
Changed requestResults as new way of dealing with labels in training of models
Tried to add logs back in
app.py
CHANGED
@@ -2,48 +2,64 @@ from flask import Flask, render_template, request, redirect, url_for
|
|
2 |
from joblib import load
|
3 |
import pandas as pd
|
4 |
import re
|
5 |
-
from
|
6 |
import json
|
7 |
import datetime
|
|
|
8 |
|
9 |
pd.set_option('display.max_colwidth', 1000)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
PIPELINES = [
|
12 |
{
|
13 |
'id': 1,
|
14 |
'name': 'Baseline',
|
15 |
-
'pipeline': load("pipeline_ex1_s1.joblib")
|
16 |
},
|
17 |
{
|
18 |
'id': 2,
|
19 |
'name': 'Trained on a FeedForward NN',
|
20 |
-
'pipeline': load("pipeline_ex1_s2.joblib")
|
21 |
},
|
22 |
{
|
23 |
'id': 3,
|
24 |
'name': 'Trained on a CRF',
|
25 |
-
'pipeline': load("pipeline_ex1_s3.joblib")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
},
|
27 |
-
#{
|
28 |
-
# 'id': 4,
|
29 |
-
# 'name': 'Trained on a small dataset',
|
30 |
-
# 'pipeline': load("pipeline_ex2_s1.joblib")
|
31 |
-
#},
|
32 |
-
#{
|
33 |
-
# 'id': 5,
|
34 |
-
# 'name': 'Trained on a large dataset',
|
35 |
-
# 'pipeline': load("pipeline_ex2_s2.joblib")
|
36 |
-
#},
|
37 |
-
#{
|
38 |
-
# 'id': 6,
|
39 |
-
# 'name': 'Embedded using TFIDF',
|
40 |
-
# 'pipeline': load("pipeline_ex3_s1.joblib")
|
41 |
-
#},
|
42 |
-
#{
|
43 |
-
# 'id': 7,
|
44 |
-
# 'name': 'Embedded using ?',
|
45 |
-
# 'pipeline': load("pipeline_ex3_s2.joblib")
|
46 |
-
#},
|
47 |
|
48 |
]
|
49 |
|
@@ -59,30 +75,29 @@ def get_name_by_id(pipelines, pipeline_id):
|
|
59 |
|
60 |
def requestResults(text, pipeline):
|
61 |
labels = pipeline.predict(text)
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
for sentence in labels:
|
66 |
-
for tag in sentence:
|
67 |
-
flattened_predictions.append(tag)
|
68 |
-
labels = flattened_predictions
|
69 |
-
print(labels)
|
70 |
-
labels = [int(label) for label in labels]
|
71 |
-
tag_encoder = LabelEncoder()
|
72 |
-
tag_encoder.fit(['B-AC', 'O', 'B-LF', 'I-LF'])
|
73 |
-
decoded_labels = tag_encoder.inverse_transform(labels)
|
74 |
-
return decoded_labels
|
75 |
-
|
76 |
-
LOG_FILE = "usage_log.jsonl" # Each line is a JSON object
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
print("Model:", model_name)
|
82 |
-
print("Input:", user_input)
|
83 |
-
print("Predictions:", predictions)
|
84 |
-
print("=============================")
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
|
88 |
app = Flask(__name__)
|
@@ -108,10 +123,7 @@ def get_data():
|
|
108 |
labels = requestResults(tokens_fomatted, pipeline)
|
109 |
results = dict(zip(tokens, labels))
|
110 |
|
111 |
-
|
112 |
-
print(f"[INFO] Input: {text}")
|
113 |
-
print(f"[INFO] Output: {results}")
|
114 |
-
|
115 |
|
116 |
return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
|
117 |
|
@@ -119,5 +131,3 @@ def get_data():
|
|
119 |
if __name__ == '__main__':
|
120 |
app.run(host="0.0.0.0", port=7860)
|
121 |
|
122 |
-
#if __name__ == '__main__':
|
123 |
-
#app.run(host="0.0.0.0", port=7860)
|
|
|
2 |
from joblib import load
|
3 |
import pandas as pd
|
4 |
import re
|
5 |
+
from customFunctions2 import *
|
6 |
import json
|
7 |
import datetime
|
8 |
+
import numpy as np
|
9 |
|
10 |
pd.set_option('display.max_colwidth', 1000)
|
11 |
|
12 |
+
import torch
|
13 |
+
|
14 |
+
# Patch torch.load to always load on CPU
|
15 |
+
original_torch_load = torch.load
|
16 |
+
def cpu_load(*args, **kwargs):
|
17 |
+
return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)
|
18 |
+
|
19 |
+
torch.load = cpu_load
|
20 |
+
|
21 |
+
|
22 |
PIPELINES = [
|
23 |
{
|
24 |
'id': 1,
|
25 |
'name': 'Baseline',
|
26 |
+
'pipeline': load("pipelines/pipeline_ex1_s1.joblib")
|
27 |
},
|
28 |
{
|
29 |
'id': 2,
|
30 |
'name': 'Trained on a FeedForward NN',
|
31 |
+
'pipeline': load("pipelines/pipeline_ex1_s2.joblib")
|
32 |
},
|
33 |
{
|
34 |
'id': 3,
|
35 |
'name': 'Trained on a CRF',
|
36 |
+
'pipeline': load("pipelines/pipeline_ex1_s3.joblib")
|
37 |
+
},
|
38 |
+
{
|
39 |
+
'id': 4,
|
40 |
+
'name': 'Trained on a small dataset',
|
41 |
+
'pipeline': load("pipelines/pipeline_ex2_s3.joblib")
|
42 |
+
},
|
43 |
+
{
|
44 |
+
'id': 5,
|
45 |
+
'name': 'Trained on a large dataset',
|
46 |
+
'pipeline': load("pipelines/pipeline_ex2_s2.joblib")
|
47 |
+
},
|
48 |
+
{
|
49 |
+
'id': 6,
|
50 |
+
'name': 'Embedded using TFIDF',
|
51 |
+
'pipeline': load("pipelines/pipeline_ex3_s2.joblib")
|
52 |
+
},
|
53 |
+
{
|
54 |
+
'id': 7,
|
55 |
+
'name': 'Embedded using GloVe',
|
56 |
+
'pipeline': load("pipelines/pipeline_ex3_s3.joblib")
|
57 |
+
},
|
58 |
+
{
|
59 |
+
'id': 8,
|
60 |
+
'name': 'Embedded using Bio2Vec',
|
61 |
+
'pipeline': load("pipelines/pipeline_ex3_s4.joblib")
|
62 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
]
|
65 |
|
|
|
75 |
|
76 |
def requestResults(text, pipeline):
|
77 |
labels = pipeline.predict(text)
|
78 |
+
if isinstance(labels, np.ndarray):
|
79 |
+
labels = labels.tolist()
|
80 |
+
return labels[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
import os
|
83 |
+
|
84 |
+
LOG_FILE = "/tmp/usage_log.jsonl" # Use temporary file path for Hugging Face Spaces
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
def log_interaction(user_input, model_name, predictions):
|
87 |
+
log_entry = {
|
88 |
+
"timestamp": datetime.datetime.utcnow().isoformat(),
|
89 |
+
"model": model_name,
|
90 |
+
"user_input": user_input,
|
91 |
+
"predictions": predictions
|
92 |
+
}
|
93 |
+
|
94 |
+
try:
|
95 |
+
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) # Ensure the directory exists
|
96 |
+
with open(LOG_FILE, "a") as log_file:
|
97 |
+
log_file.write(json.dumps(log_entry) + "\n")
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Error writing to log: {e}")
|
100 |
+
# You could also return a response with the error, or raise an error to stop the process
|
101 |
|
102 |
|
103 |
app = Flask(__name__)
|
|
|
123 |
labels = requestResults(tokens_fomatted, pipeline)
|
124 |
results = dict(zip(tokens, labels))
|
125 |
|
126 |
+
log_interaction(text, name, results)
|
|
|
|
|
|
|
127 |
|
128 |
return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
|
129 |
|
|
|
131 |
if __name__ == '__main__':
|
132 |
app.run(host="0.0.0.0", port=7860)
|
133 |
|
|
|
|