Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,121 +1,124 @@
|
|
1 |
-
from flask import Flask, render_template, request, redirect, url_for
|
2 |
-
from joblib import load
|
3 |
-
import pandas as pd
|
4 |
-
import re
|
5 |
-
from customFunctions import *
|
6 |
-
import json
|
7 |
-
import datetime
|
8 |
-
|
9 |
-
pd.set_option('display.max_colwidth', 1000)
|
10 |
-
|
11 |
-
PIPELINES = [
|
12 |
-
{
|
13 |
-
'id': 1,
|
14 |
-
'name': 'Baseline',
|
15 |
-
'pipeline': load("pipeline_ex1_s1.joblib")
|
16 |
-
},
|
17 |
-
{
|
18 |
-
'id': 2,
|
19 |
-
'name': 'Trained on a FeedForward NN',
|
20 |
-
'pipeline': load("pipeline_ex1_s2.joblib")
|
21 |
-
},
|
22 |
-
{
|
23 |
-
'id': 3,
|
24 |
-
'name': 'Trained on a CRF',
|
25 |
-
'pipeline': load("pipeline_ex1_s3.joblib")
|
26 |
-
},
|
27 |
-
#{
|
28 |
-
# 'id': 4,
|
29 |
-
# 'name': 'Trained on a small dataset',
|
30 |
-
# 'pipeline': load("pipeline_ex2_s1.joblib")
|
31 |
-
#},
|
32 |
-
#{
|
33 |
-
# 'id': 5,
|
34 |
-
# 'name': 'Trained on a large dataset',
|
35 |
-
# 'pipeline': load("pipeline_ex2_s2.joblib")
|
36 |
-
#},
|
37 |
-
#{
|
38 |
-
# 'id': 6,
|
39 |
-
# 'name': 'Embedded using TFIDF',
|
40 |
-
# 'pipeline': load("pipeline_ex3_s1.joblib")
|
41 |
-
#},
|
42 |
-
#{
|
43 |
-
# 'id': 7,
|
44 |
-
# 'name': 'Embedded using ?',
|
45 |
-
# 'pipeline': load("pipeline_ex3_s2.joblib")
|
46 |
-
#},
|
47 |
-
|
48 |
-
]
|
49 |
-
|
50 |
-
pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES]
|
51 |
-
|
52 |
-
def get_pipeline_by_id(pipelines, pipeline_id):
|
53 |
-
return next((p['pipeline'] for p in pipelines if p['id'] == pipeline_id), None)
|
54 |
-
|
55 |
-
def get_name_by_id(pipelines, pipeline_id):
|
56 |
-
return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None)
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
def requestResults(text, pipeline):
|
61 |
-
labels = pipeline.predict(text)
|
62 |
-
print(labels.ndim)
|
63 |
-
if labels.ndim != 1:
|
64 |
-
flattened_predictions = []
|
65 |
-
for sentence in labels:
|
66 |
-
for tag in sentence:
|
67 |
-
flattened_predictions.append(tag)
|
68 |
-
labels = flattened_predictions
|
69 |
-
print(labels)
|
70 |
-
labels = [int(label) for label in labels]
|
71 |
-
tag_encoder = LabelEncoder()
|
72 |
-
tag_encoder.fit(['B-AC', 'O', 'B-LF', 'I-LF'])
|
73 |
-
decoded_labels = tag_encoder.inverse_transform(labels)
|
74 |
-
return decoded_labels
|
75 |
-
|
76 |
-
LOG_FILE = "usage_log.jsonl" # Each line is a JSON object
|
77 |
-
|
78 |
-
def log_interaction(user_input, model_name, predictions):
|
79 |
-
log_entry = {
|
80 |
-
"timestamp": datetime.datetime.utcnow().isoformat(),
|
81 |
-
"user_input": user_input,
|
82 |
-
"model": model_name,
|
83 |
-
"predictions": predictions
|
84 |
-
}
|
85 |
-
with open(LOG_FILE, "a") as f:
|
86 |
-
f.write(json.dumps(log_entry) + "\n")
|
87 |
-
|
88 |
-
|
89 |
-
app = Flask(__name__)
|
90 |
-
|
91 |
-
|
92 |
-
@app.route('/')
|
93 |
-
def index():
|
94 |
-
return render_template('index.html', pipelines= pipeline_metadata)
|
95 |
-
|
96 |
-
|
97 |
-
@app.route('/', methods=['POST'])
|
98 |
-
def get_data():
|
99 |
-
if request.method == 'POST':
|
100 |
-
|
101 |
-
text = request.form['search']
|
102 |
-
tokens = re.findall(r"\w+|[^\w\s]", text)
|
103 |
-
tokens_fomatted = pd.Series([pd.Series(tokens)])
|
104 |
-
|
105 |
-
pipeline_id = int(request.form['pipeline_select'])
|
106 |
-
pipeline = get_pipeline_by_id(PIPELINES, pipeline_id)
|
107 |
-
name = get_name_by_id(PIPELINES, pipeline_id)
|
108 |
-
|
109 |
-
labels = requestResults(tokens_fomatted, pipeline)
|
110 |
-
results = dict(zip(tokens, labels))
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, redirect, url_for
|
2 |
+
from joblib import load
|
3 |
+
import pandas as pd
|
4 |
+
import re
|
5 |
+
from customFunctions import *
|
6 |
+
import json
|
7 |
+
import datetime
|
8 |
+
|
9 |
+
pd.set_option('display.max_colwidth', 1000)
|
10 |
+
|
11 |
+
PIPELINES = [
|
12 |
+
{
|
13 |
+
'id': 1,
|
14 |
+
'name': 'Baseline',
|
15 |
+
'pipeline': load("pipeline_ex1_s1.joblib")
|
16 |
+
},
|
17 |
+
{
|
18 |
+
'id': 2,
|
19 |
+
'name': 'Trained on a FeedForward NN',
|
20 |
+
'pipeline': load("pipeline_ex1_s2.joblib")
|
21 |
+
},
|
22 |
+
{
|
23 |
+
'id': 3,
|
24 |
+
'name': 'Trained on a CRF',
|
25 |
+
'pipeline': load("pipeline_ex1_s3.joblib")
|
26 |
+
},
|
27 |
+
#{
|
28 |
+
# 'id': 4,
|
29 |
+
# 'name': 'Trained on a small dataset',
|
30 |
+
# 'pipeline': load("pipeline_ex2_s1.joblib")
|
31 |
+
#},
|
32 |
+
#{
|
33 |
+
# 'id': 5,
|
34 |
+
# 'name': 'Trained on a large dataset',
|
35 |
+
# 'pipeline': load("pipeline_ex2_s2.joblib")
|
36 |
+
#},
|
37 |
+
#{
|
38 |
+
# 'id': 6,
|
39 |
+
# 'name': 'Embedded using TFIDF',
|
40 |
+
# 'pipeline': load("pipeline_ex3_s1.joblib")
|
41 |
+
#},
|
42 |
+
#{
|
43 |
+
# 'id': 7,
|
44 |
+
# 'name': 'Embedded using ?',
|
45 |
+
# 'pipeline': load("pipeline_ex3_s2.joblib")
|
46 |
+
#},
|
47 |
+
|
48 |
+
]
|
49 |
+
|
50 |
+
pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES]
|
51 |
+
|
52 |
+
def get_pipeline_by_id(pipelines, pipeline_id):
|
53 |
+
return next((p['pipeline'] for p in pipelines if p['id'] == pipeline_id), None)
|
54 |
+
|
55 |
+
def get_name_by_id(pipelines, pipeline_id):
|
56 |
+
return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None)
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
def requestResults(text, pipeline):
|
61 |
+
labels = pipeline.predict(text)
|
62 |
+
print(labels.ndim)
|
63 |
+
if labels.ndim != 1:
|
64 |
+
flattened_predictions = []
|
65 |
+
for sentence in labels:
|
66 |
+
for tag in sentence:
|
67 |
+
flattened_predictions.append(tag)
|
68 |
+
labels = flattened_predictions
|
69 |
+
print(labels)
|
70 |
+
labels = [int(label) for label in labels]
|
71 |
+
tag_encoder = LabelEncoder()
|
72 |
+
tag_encoder.fit(['B-AC', 'O', 'B-LF', 'I-LF'])
|
73 |
+
decoded_labels = tag_encoder.inverse_transform(labels)
|
74 |
+
return decoded_labels
|
75 |
+
|
76 |
+
LOG_FILE = "usage_log.jsonl" # Each line is a JSON object
|
77 |
+
|
78 |
+
def log_interaction(user_input, model_name, predictions):
|
79 |
+
log_entry = {
|
80 |
+
"timestamp": datetime.datetime.utcnow().isoformat(),
|
81 |
+
"user_input": user_input,
|
82 |
+
"model": model_name,
|
83 |
+
"predictions": predictions
|
84 |
+
}
|
85 |
+
with open(LOG_FILE, "a") as f:
|
86 |
+
f.write(json.dumps(log_entry) + "\n")
|
87 |
+
|
88 |
+
|
89 |
+
app = Flask(__name__)
|
90 |
+
|
91 |
+
|
92 |
+
@app.route('/')
|
93 |
+
def index():
|
94 |
+
return render_template('index.html', pipelines= pipeline_metadata)
|
95 |
+
|
96 |
+
|
97 |
+
@app.route('/', methods=['POST'])
|
98 |
+
def get_data():
|
99 |
+
if request.method == 'POST':
|
100 |
+
|
101 |
+
text = request.form['search']
|
102 |
+
tokens = re.findall(r"\w+|[^\w\s]", text)
|
103 |
+
tokens_fomatted = pd.Series([pd.Series(tokens)])
|
104 |
+
|
105 |
+
pipeline_id = int(request.form['pipeline_select'])
|
106 |
+
pipeline = get_pipeline_by_id(PIPELINES, pipeline_id)
|
107 |
+
name = get_name_by_id(PIPELINES, pipeline_id)
|
108 |
+
|
109 |
+
labels = requestResults(tokens_fomatted, pipeline)
|
110 |
+
results = dict(zip(tokens, labels))
|
111 |
+
|
112 |
+
print(f"[INFO] Model: {name}")
|
113 |
+
print(f"[INFO] Input: {text}")
|
114 |
+
print(f"[INFO] Output: {results}")
|
115 |
+
|
116 |
+
|
117 |
+
return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
|
118 |
+
|
119 |
+
|
120 |
+
if __name__ == '__main__':
|
121 |
+
app.run(host="0.0.0.0", port=7860)
|
122 |
+
|
123 |
+
#if __name__ == '__main__':
|
124 |
+
#app.run(host="0.0.0.0", port=7860)
|