mo01018 commited on
Commit
f49f7d2
·
verified ·
1 Parent(s): ac7815c

Update loggıng

Browse files
Files changed (1) hide show
  1. app.py +40 -96
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from flask import Flask, render_template, request, redirect, url_for
2
  from joblib import load
3
  import pandas as pd
4
  import re
@@ -9,156 +9,100 @@ import numpy as np
9
  from huggingface_hub import hf_hub_download
10
  import torch
11
  import os
 
12
 
 
13
  pd.set_option('display.max_colwidth', 1000)
14
 
15
-
16
  # Patch torch.load to always load on CPU
17
  original_torch_load = torch.load
18
  def cpu_load(*args, **kwargs):
19
  return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)
20
-
21
  torch.load = cpu_load
22
 
23
- def load_pipeline_from_hub(filename):
24
- cache_dir = "/tmp/hf_cache"
25
- os.environ["HF_HUB_CACHE"] = cache_dir # optional but informative
26
-
27
- repo_id = 'hw01558/nlp-coursework-pipelines'
28
- local_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
29
- return load(local_path)
30
-
31
- #repo_id = 'hw01558/nlp-coursework-pipelines'
32
- #local_path = hf_hub_download(repo_id=repo_id, filename=filename)
33
- #return load(local_path)
34
 
 
 
 
 
 
 
 
 
 
 
 
35
  PIPELINES = [
36
- {
37
- 'id': 8,
38
- 'name': 'Embedded using BioWordVec',
39
- 'filename': "pipeline_ex3_s4.joblib"
40
- },
41
- {
42
- 'id': 1,
43
- 'name': 'Baseline',
44
- 'filename': "pipeline_ex1_s1.joblib"
45
- },
46
- {
47
- 'id': 2,
48
- 'name': 'Trained on a FeedForward NN',
49
- 'filename': "pipeline_ex1_s2.joblib"
50
- },
51
- {
52
- 'id': 3,
53
- 'name': 'Trained on a CRF',
54
- 'filename': "pipeline_ex1_s3.joblib"
55
- },
56
- {
57
- 'id': 4,
58
- 'name': 'Trained on a small dataset',
59
- 'filename': "pipeline_ex2_s3.joblib"
60
- },
61
- {
62
- 'id': 5,
63
- 'name': 'Trained on a large dataset',
64
- 'filename': "pipeline_ex2_s2.joblib"
65
- },
66
- {
67
- 'id': 6,
68
- 'name': 'Embedded using TFIDF',
69
- 'filename': "pipeline_ex3_s2.joblib"
70
- },
71
- {
72
- 'id': 7,
73
- 'name': 'Embedded using GloVe',
74
- 'filename': "pipeline_ex3_s3.joblib"
75
- },
76
-
77
-
78
  ]
79
 
80
  pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES]
81
 
 
 
 
 
 
 
 
 
82
  def get_pipeline_by_id(pipelines, pipeline_id):
83
  return next((p['filename'] for p in pipelines if p['id'] == pipeline_id), None)
84
 
85
  def get_name_by_id(pipelines, pipeline_id):
86
  return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None)
87
 
88
-
89
-
90
  def requestResults(text, pipeline):
91
  labels = pipeline.predict(text)
92
  if isinstance(labels, np.ndarray):
93
  labels = labels.tolist()
94
  return labels[0]
95
 
96
- import os
97
- import logging
98
-
99
- #logging.basicConfig(
100
- # level=logging.INFO,
101
- # format='%(asctime)s [%(levelname)s] %(message)s',
102
- # handlers=[
103
- # logging.FileHandler("app.log",mode='w')
104
- #
105
- #]
106
-
107
- #)
108
-
109
- LOG_FILE = "./usage_log.jsonl" # Use temporary file path for Hugging Face Spaces
110
- LOG_FILE = os.path.join("logs", "usage_log.jsonl")
111
-
112
  def log_interaction(user_input, model_name, predictions):
113
- # https://betterstack.com/community/guides/logging/how-to-start-logging-with-python/
114
- logging.basicConfig(filename=LOG_FILE, level=logging.INFO)
115
  log_entry = {
116
  "timestamp": datetime.datetime.utcnow().isoformat(),
117
  "model": model_name,
118
  "user_input": user_input,
119
  "predictions": predictions
120
  }
121
-
122
  try:
123
- os.makedirs("logs", exist_ok=True)
124
- # with open(LOG_FILE, "a") as log_file:
125
- # log_file.write(json.dumps(log_entry) + "\n")
126
- logging.info(log_entry)
127
  print("[INFO] Logged interaction successfully.")
128
  except Exception as e:
129
  print(f"[ERROR] Could not write log entry: {e}")
130
 
131
-
132
- app = Flask(__name__)
133
-
134
-
135
  @app.route('/')
136
  def index():
137
- return render_template('index.html', pipelines= pipeline_metadata)
138
-
139
 
140
  @app.route('/', methods=['POST'])
141
  def get_data():
142
  if request.method == 'POST':
143
-
144
  text = request.form['search']
145
  tokens = re.findall(r"\w+|[^\w\s]", text)
146
- tokens_fomatted = pd.Series([pd.Series(tokens)])
147
 
148
  pipeline_id = int(request.form['pipeline_select'])
149
  pipeline = load_pipeline_from_hub(get_pipeline_by_id(PIPELINES, pipeline_id))
150
  name = get_name_by_id(PIPELINES, pipeline_id)
151
-
152
- labels = requestResults(tokens_fomatted, pipeline)
153
  results = dict(zip(tokens, labels))
154
 
155
  log_interaction(text, name, results)
156
 
157
- return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)
158
-
159
 
 
160
  if __name__ == '__main__':
161
  app.run(host="0.0.0.0", port=7860)
162
-
163
- #if __name__ == '__main__':
164
- #app.run(host="0.0.0.0", port=7860)
 
1
+ from flask import Flask, render_template, request
2
  from joblib import load
3
  import pandas as pd
4
  import re
 
9
  from huggingface_hub import hf_hub_download
10
  import torch
11
  import os
12
+ import logging
13
 
14
+ # Ensure proper display for debugging
15
  pd.set_option('display.max_colwidth', 1000)
16
 
 
17
  # Patch torch.load to always load on CPU
18
  original_torch_load = torch.load
19
  def cpu_load(*args, **kwargs):
20
  return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)
 
21
  torch.load = cpu_load
22
 
23
+ # Flask app setup
24
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
25
 
26
+ # Logging setup
27
+ LOG_DIR = "/tmp/logs" # Use a universally writable directory
28
+ LOG_FILE = os.path.join(LOG_DIR, "usage_log.jsonl")
29
+ os.makedirs(LOG_DIR, exist_ok=True)
30
+ logging.basicConfig(
31
+ filename=LOG_FILE,
32
+ level=logging.INFO,
33
+ format='%(asctime)s [%(levelname)s] %(message)s'
34
+ )
35
+
36
+ # Define pipelines
37
  PIPELINES = [
38
+ {'id': 8, 'name': 'Embedded using BioWordVec', 'filename': "pipeline_ex3_s4.joblib"},
39
+ {'id': 1, 'name': 'Baseline', 'filename': "pipeline_ex1_s1.joblib"},
40
+ {'id': 2, 'name': 'Trained on a FeedForward NN', 'filename': "pipeline_ex1_s2.joblib"},
41
+ {'id': 3, 'name': 'Trained on a CRF', 'filename': "pipeline_ex1_s3.joblib"},
42
+ {'id': 4, 'name': 'Trained on a small dataset', 'filename': "pipeline_ex2_s3.joblib"},
43
+ {'id': 5, 'name': 'Trained on a large dataset', 'filename': "pipeline_ex2_s2.joblib"},
44
+ {'id': 6, 'name': 'Embedded using TFIDF', 'filename': "pipeline_ex3_s2.joblib"},
45
+ {'id': 7, 'name': 'Embedded using GloVe', 'filename': "pipeline_ex3_s3.joblib"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  ]
47
 
48
  pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES]
49
 
50
+ # Helper functions
51
+ def load_pipeline_from_hub(filename):
52
+ cache_dir = "/tmp/hf_cache"
53
+ os.environ["HF_HUB_CACHE"] = cache_dir
54
+ repo_id = 'hw01558/nlp-coursework-pipelines'
55
+ local_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
56
+ return load(local_path)
57
+
58
  def get_pipeline_by_id(pipelines, pipeline_id):
59
  return next((p['filename'] for p in pipelines if p['id'] == pipeline_id), None)
60
 
61
  def get_name_by_id(pipelines, pipeline_id):
62
  return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None)
63
 
 
 
64
  def requestResults(text, pipeline):
65
  labels = pipeline.predict(text)
66
  if isinstance(labels, np.ndarray):
67
  labels = labels.tolist()
68
  return labels[0]
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def log_interaction(user_input, model_name, predictions):
 
 
71
  log_entry = {
72
  "timestamp": datetime.datetime.utcnow().isoformat(),
73
  "model": model_name,
74
  "user_input": user_input,
75
  "predictions": predictions
76
  }
 
77
  try:
78
+ logging.info(json.dumps(log_entry))
 
 
 
79
  print("[INFO] Logged interaction successfully.")
80
  except Exception as e:
81
  print(f"[ERROR] Could not write log entry: {e}")
82
 
83
+ # Routes
 
 
 
84
  @app.route('/')
85
  def index():
86
+ return render_template('index.html', pipelines=pipeline_metadata)
 
87
 
88
  @app.route('/', methods=['POST'])
89
  def get_data():
90
  if request.method == 'POST':
 
91
  text = request.form['search']
92
  tokens = re.findall(r"\w+|[^\w\s]", text)
93
+ tokens_formatted = pd.Series([pd.Series(tokens)])
94
 
95
  pipeline_id = int(request.form['pipeline_select'])
96
  pipeline = load_pipeline_from_hub(get_pipeline_by_id(PIPELINES, pipeline_id))
97
  name = get_name_by_id(PIPELINES, pipeline_id)
98
+
99
+ labels = requestResults(tokens_formatted, pipeline)
100
  results = dict(zip(tokens, labels))
101
 
102
  log_interaction(text, name, results)
103
 
104
+ return render_template('index.html', results=results, name=name, pipelines=pipeline_metadata)
 
105
 
106
+ # Run the app
107
  if __name__ == '__main__':
108
  app.run(host="0.0.0.0", port=7860)