devendergarg14 commited on
Commit
4310b84
·
verified ·
1 Parent(s): f599190

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -82
app.py CHANGED
@@ -11,35 +11,48 @@ import socket
11
  app = Flask(__name__, static_folder='.', static_url_path='')
12
 
13
  # --- Configuration ---
14
- DATA_FILE = "data.json"
15
- PING_INTERVAL_SECONDS = 60 # Backend pings every 60 seconds
16
- HISTORY_DURATION_SECONDS = 60 * 60 # Store history for 1 hour
17
-
18
- # --- Data Store ---
19
- # Structure: { "id": "uuid", "url": "string", "status": "pending/ok/error/checking",
20
- # "ip": "string", "responseTime": float_ms, "lastChecked": "iso_string_utc",
21
- # "history": [{"timestamp": float_unix_ts_seconds, "status": "ok/error"}],
22
- # "_thread": threading.Thread_object, "_stop_event": threading.Event_object }
23
- monitored_urls_store = {} # In-memory store: id -> url_data
24
- lock = threading.Lock() # To protect access to monitored_urls_store
25
 
26
  # --- Helper Functions ---
 
 
 
 
 
 
 
 
 
 
 
27
  def save_data_to_json():
28
- # This function must be called with 'lock' acquired
29
- serializable_data = {}
30
- for url_id, data in monitored_urls_store.items():
31
- s_data = data.copy()
32
- s_data.pop("_thread", None)
33
- s_data.pop("_stop_event", None)
34
- serializable_data[url_id] = s_data
35
- try:
36
- with open(DATA_FILE, 'w') as f:
37
- json.dump(serializable_data, f, indent=2)
38
- except IOError as e:
39
- print(f"Error saving data to {DATA_FILE}: {e}")
 
40
 
41
  def load_data_from_json():
42
  global monitored_urls_store
 
43
  if os.path.exists(DATA_FILE):
44
  try:
45
  with open(DATA_FILE, 'r') as f:
@@ -47,7 +60,6 @@ def load_data_from_json():
47
 
48
  temp_store = {}
49
  for url_id_key, data_item in loaded_json_data.items():
50
- # Ensure essential fields and use 'id' from data if present, else key
51
  data_item.setdefault('id', url_id_key)
52
  current_id = data_item['id']
53
  data_item.setdefault('status', 'pending')
@@ -57,7 +69,7 @@ def load_data_from_json():
57
  data_item.setdefault('history', data_item.get('history', []))
58
  temp_store[current_id] = data_item
59
 
60
- with lock: # Lock before modifying global monitored_urls_store
61
  monitored_urls_store = temp_store
62
 
63
  except json.JSONDecodeError:
@@ -67,6 +79,7 @@ def load_data_from_json():
67
  print(f"Error loading data from {DATA_FILE}: {e}. Starting fresh.")
68
  with lock: monitored_urls_store = {}
69
  else:
 
70
  with lock: monitored_urls_store = {}
71
 
72
  url_ids_to_start_monitoring = []
@@ -78,11 +91,9 @@ def load_data_from_json():
78
 
79
  def get_host_ip_address(hostname_str):
80
  try:
81
- # Check if hostname_str is already a valid IP address
82
- socket.inet_aton(hostname_str) # Throws an OSError if not a valid IPv4 string
83
  return hostname_str
84
  except OSError:
85
- # It's not an IP, so try to resolve it as a hostname
86
  try:
87
  ip_address = socket.gethostbyname(hostname_str)
88
  return ip_address
@@ -94,7 +105,6 @@ def get_host_ip_address(hostname_str):
94
  return 'N/A'
95
 
96
  def prune_url_history(url_data_entry):
97
- # Assumes 'lock' is acquired or called from the thread managing this entry
98
  cutoff_time = time.time() - HISTORY_DURATION_SECONDS
99
  url_data_entry['history'] = [
100
  entry for entry in url_data_entry.get('history', []) if entry['timestamp'] >= cutoff_time
@@ -110,21 +120,19 @@ def execute_url_check(url_id_to_check):
110
 
111
  print(f"Checking {current_url_data['url']} (ID: {url_id_to_check})...")
112
  current_url_data['status'] = 'checking'
113
- url_config_snapshot = current_url_data.copy() # Snapshot for use outside lock
114
 
115
  if not url_config_snapshot: return
116
 
117
  check_start_time = time.perf_counter()
118
  final_check_status = 'error'
119
  http_response_time_ms = None
120
- # Identify your bot to website owners
121
  http_headers = {'User-Agent': 'URLPinger/1.0 (HuggingFace Space Bot)'}
122
 
123
  try:
124
- # Attempt HEAD request first
125
  try:
126
  head_response = requests.head(url_config_snapshot['url'], timeout=10, allow_redirects=True, headers=http_headers)
127
- if 200 <= head_response.status_code < 400: # OK or Redirect
128
  final_check_status = 'ok'
129
  else:
130
  print(f"HEAD for {url_config_snapshot['url']} returned {head_response.status_code}. Trying GET.")
@@ -133,11 +141,10 @@ def execute_url_check(url_id_to_check):
133
  except requests.RequestException as e_head:
134
  print(f"HEAD failed for {url_config_snapshot['url']}: {e_head}. Trying GET...")
135
 
136
- # If HEAD was not conclusive, try GET
137
  if final_check_status != 'ok':
138
  try:
139
  get_response = requests.get(url_config_snapshot['url'], timeout=15, allow_redirects=True, headers=http_headers)
140
- if get_response.ok: # Only 2xx status codes
141
  final_check_status = 'ok'
142
  else:
143
  print(f"GET for {url_config_snapshot['url']} status: {get_response.status_code}")
@@ -157,25 +164,24 @@ def execute_url_check(url_id_to_check):
157
  final_check_status = 'error'
158
 
159
  with lock:
160
- if url_id_to_check not in monitored_urls_store: return # URL might have been removed during check
161
 
162
  live_url_data = monitored_urls_store[url_id_to_check]
163
  live_url_data['status'] = final_check_status
164
  live_url_data['responseTime'] = round(http_response_time_ms) if http_response_time_ms is not None else None
165
- live_url_data['lastChecked'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) # ISO 8601 UTC
166
 
167
  current_history_list = live_url_data.get('history', [])
168
- current_history_list.append({'timestamp': time.time(), 'status': final_check_status}) # timestamp in seconds
169
  live_url_data['history'] = current_history_list
170
  prune_url_history(live_url_data)
171
 
172
- save_data_to_json()
173
  print(f"Finished check for {live_url_data['url']}: {final_check_status}, {http_response_time_ms} ms")
174
 
175
  def pinger_thread_function(url_id_param, stop_event_param):
176
  while not stop_event_param.is_set():
177
  execute_url_check(url_id_param)
178
- # Sleep for PING_INTERVAL_SECONDS, but check stop_event periodically
179
  for _ in range(PING_INTERVAL_SECONDS):
180
  if stop_event_param.is_set(): break
181
  time.sleep(1)
@@ -189,15 +195,13 @@ def start_url_monitoring_thread(target_url_id):
189
 
190
  url_data_entry = monitored_urls_store[target_url_id]
191
 
192
- # Stop existing thread if it's alive
193
  if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
194
  print(f"Monitor for URL ID {target_url_id} already running. Attempting to restart.")
195
- if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]: # Check if _stop_event exists
196
  url_data_entry["_stop_event"].set()
197
- url_data_entry["_thread"].join(timeout=3) # Wait for thread to stop
198
 
199
  new_stop_event = threading.Event()
200
- # daemon=True allows main program to exit even if threads are running
201
  new_thread = threading.Thread(target=pinger_thread_function, args=(target_url_id, new_stop_event), daemon=True)
202
 
203
  url_data_entry["_thread"] = new_thread
@@ -207,16 +211,15 @@ def start_url_monitoring_thread(target_url_id):
207
  print(f"Started/Restarted monitoring for URL ID {target_url_id}: {url_data_entry['url']}")
208
 
209
  def stop_url_monitoring_thread(target_url_id):
210
- # This function must be called with 'lock' acquired
211
- if target_url_id in monitored_urls_store:
212
- url_data_entry = monitored_urls_store[target_url_id]
213
- if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
214
- print(f"Signaling stop for monitor thread of URL ID {target_url_id}")
215
- if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]: # Check if _stop_event exists
216
- url_data_entry["_stop_event"].set()
217
- # Not joining here to keep API responsive, daemon thread will exit.
218
- url_data_entry.pop("_thread", None)
219
- url_data_entry.pop("_stop_event", None)
220
 
221
  # --- API Endpoints ---
222
  @app.route('/')
@@ -226,7 +229,6 @@ def serve_index():
226
  @app.route('/api/urls', methods=['GET'])
227
  def get_all_urls():
228
  with lock:
229
- # Prepare data for sending: list of url data, no thread objects
230
  response_list = []
231
  for data_item in monitored_urls_store.values():
232
  display_item = data_item.copy()
@@ -244,7 +246,7 @@ def add_new_url():
244
  input_url = request_data['url'].strip()
245
 
246
  if not input_url.startswith('http://') and not input_url.startswith('https://'):
247
- input_url = 'https://' + input_url # Default to https
248
 
249
  try:
250
  parsed_input_url = urlparse(input_url)
@@ -255,12 +257,11 @@ def add_new_url():
255
  return jsonify({"error": "Invalid URL format"}), 400
256
 
257
  with lock:
258
- # Check for duplicates (case-insensitive, ignoring trailing slashes)
259
  normalized_new_url = input_url.rstrip('/').lower()
260
- for existing_url_id in list(monitored_urls_store.keys()): # Iterate over keys to avoid issues if store is modified
261
  existing_url_data = monitored_urls_store.get(existing_url_id)
262
  if existing_url_data and existing_url_data['url'].rstrip('/').lower() == normalized_new_url:
263
- return jsonify({"error": "URL already monitored"}), 409 # Conflict
264
 
265
  new_url_id = str(uuid.uuid4())
266
  resolved_ip = get_host_ip_address(url_hostname) if url_hostname else 'N/A'
@@ -270,19 +271,13 @@ def add_new_url():
270
  "ip": resolved_ip, "responseTime": None, "lastChecked": None, "history": []
271
  }
272
 
273
- # Make a copy of the entry for the response *before* it's potentially modified
274
- # by start_url_monitoring_thread with non-serializable objects.
275
  response_payload = url_entry_to_add.copy()
276
-
277
- monitored_urls_store[new_url_id] = url_entry_to_add # url_entry_to_add will be modified by start_url_monitoring_thread
278
  save_data_to_json()
279
 
280
- start_url_monitoring_thread(new_url_id) # This will add _thread and _stop_event to monitored_urls_store[new_url_id]
281
-
282
- # Return the clean response_payload, which does not have _thread or _stop_event
283
  return jsonify(response_payload), 201
284
 
285
-
286
  @app.route('/api/urls/<string:target_url_id>', methods=['DELETE'])
287
  def delete_existing_url(target_url_id):
288
  with lock:
@@ -291,8 +286,7 @@ def delete_existing_url(target_url_id):
291
  removed_url_entry = monitored_urls_store.pop(target_url_id)
292
  save_data_to_json()
293
 
294
- # Prepare data for response (without thread objects)
295
- response_data = removed_url_entry.copy() # Copy before potential modification if stop_url_monitoring_thread didn't pop everything
296
  response_data.pop("_thread", None)
297
  response_data.pop("_stop_event", None)
298
  print(f"Deleted URL ID {target_url_id}")
@@ -301,20 +295,13 @@ def delete_existing_url(target_url_id):
301
  return jsonify({"error": "URL not found"}), 404
302
 
303
  # --- Main Execution / Gunicorn Entry Point ---
304
- # Load data once when the application module is initialized
305
- # This handles both `flask run` and gunicorn scenarios.
306
- if os.environ.get('WERKZEUG_RUN_MAIN') != 'true': # Avoids double load in Flask debug mode
307
  load_data_from_json()
308
 
309
  if __name__ == '__main__':
310
- # This block is for local development (e.g., `python app.py`)
311
- # `load_data_from_json()` is called above unless Werkzeug reloader is active.
312
- # If using Flask's reloader, load_data_from_json will be called twice:
313
- # once by the main process, once by the reloader's child process.
314
- # The check for WERKZEUG_RUN_MAIN ensures it only loads in the main one or the child.
315
- if os.environ.get('WERKZEUG_RUN_MAIN') == 'true': # Ensure data is loaded in the reloaded process too
316
  load_data_from_json()
317
- app.run(debug=True, host='0.0.0.0', port=7860)
318
-
319
- # When run with Gunicorn, Gunicorn imports `app` from this `app.py` file.
320
- # `load_data_from_json()` will have been called during that import (due to the WERKZEUG_RUN_MAIN check).
 
11
  app = Flask(__name__, static_folder='.', static_url_path='')
12
 
13
  # --- Configuration ---
14
+ DATA_DIR = "/data" # Writable directory on Hugging Face Spaces
15
+ if not os.path.exists(DATA_DIR) and not os.environ.get('SPACE_ID'): # For local dev if /data isn't preset
16
+ print(f"Warning: {DATA_DIR} not found. Using current directory for data.json (local dev mode).")
17
+ DATA_DIR = "." # Fallback to current dir for local testing if /data does not exist
18
+
19
+ DATA_FILE = os.path.join(DATA_DIR, "data.json")
20
+ PING_INTERVAL_SECONDS = 60
21
+ HISTORY_DURATION_SECONDS = 60 * 60
22
+
23
+ monitored_urls_store = {}
24
+ lock = threading.Lock()
25
 
26
  # --- Helper Functions ---
27
+ def ensure_data_dir_exists():
28
+ """Ensures the data directory exists."""
29
+ if DATA_DIR == ".": # No need to create current directory
30
+ return
31
+ if not os.path.exists(DATA_DIR):
32
+ try:
33
+ os.makedirs(DATA_DIR)
34
+ print(f"Created data directory: {DATA_DIR}")
35
+ except OSError as e:
36
+ print(f"Error creating data directory {DATA_DIR}: {e}. Data persistence may fail.")
37
+
38
  def save_data_to_json():
39
+ with lock:
40
+ ensure_data_dir_exists() # Ensure directory exists before attempting to write
41
+ serializable_data = {}
42
+ for url_id, data in monitored_urls_store.items():
43
+ s_data = data.copy()
44
+ s_data.pop("_thread", None)
45
+ s_data.pop("_stop_event", None)
46
+ serializable_data[url_id] = s_data
47
+ try:
48
+ with open(DATA_FILE, 'w') as f:
49
+ json.dump(serializable_data, f, indent=2)
50
+ except IOError as e:
51
+ print(f"Error saving data to {DATA_FILE}: {e}") # This is where your error was logged
52
 
53
  def load_data_from_json():
54
  global monitored_urls_store
55
+ ensure_data_dir_exists() # Ensure directory exists before attempting to read
56
  if os.path.exists(DATA_FILE):
57
  try:
58
  with open(DATA_FILE, 'r') as f:
 
60
 
61
  temp_store = {}
62
  for url_id_key, data_item in loaded_json_data.items():
 
63
  data_item.setdefault('id', url_id_key)
64
  current_id = data_item['id']
65
  data_item.setdefault('status', 'pending')
 
69
  data_item.setdefault('history', data_item.get('history', []))
70
  temp_store[current_id] = data_item
71
 
72
+ with lock:
73
  monitored_urls_store = temp_store
74
 
75
  except json.JSONDecodeError:
 
79
  print(f"Error loading data from {DATA_FILE}: {e}. Starting fresh.")
80
  with lock: monitored_urls_store = {}
81
  else:
82
+ print(f"{DATA_FILE} not found. Starting with an empty list.")
83
  with lock: monitored_urls_store = {}
84
 
85
  url_ids_to_start_monitoring = []
 
91
 
92
  def get_host_ip_address(hostname_str):
93
  try:
94
+ socket.inet_aton(hostname_str)
 
95
  return hostname_str
96
  except OSError:
 
97
  try:
98
  ip_address = socket.gethostbyname(hostname_str)
99
  return ip_address
 
105
  return 'N/A'
106
 
107
  def prune_url_history(url_data_entry):
 
108
  cutoff_time = time.time() - HISTORY_DURATION_SECONDS
109
  url_data_entry['history'] = [
110
  entry for entry in url_data_entry.get('history', []) if entry['timestamp'] >= cutoff_time
 
120
 
121
  print(f"Checking {current_url_data['url']} (ID: {url_id_to_check})...")
122
  current_url_data['status'] = 'checking'
123
+ url_config_snapshot = current_url_data.copy()
124
 
125
  if not url_config_snapshot: return
126
 
127
  check_start_time = time.perf_counter()
128
  final_check_status = 'error'
129
  http_response_time_ms = None
 
130
  http_headers = {'User-Agent': 'URLPinger/1.0 (HuggingFace Space Bot)'}
131
 
132
  try:
 
133
  try:
134
  head_response = requests.head(url_config_snapshot['url'], timeout=10, allow_redirects=True, headers=http_headers)
135
+ if 200 <= head_response.status_code < 400:
136
  final_check_status = 'ok'
137
  else:
138
  print(f"HEAD for {url_config_snapshot['url']} returned {head_response.status_code}. Trying GET.")
 
141
  except requests.RequestException as e_head:
142
  print(f"HEAD failed for {url_config_snapshot['url']}: {e_head}. Trying GET...")
143
 
 
144
  if final_check_status != 'ok':
145
  try:
146
  get_response = requests.get(url_config_snapshot['url'], timeout=15, allow_redirects=True, headers=http_headers)
147
+ if get_response.ok:
148
  final_check_status = 'ok'
149
  else:
150
  print(f"GET for {url_config_snapshot['url']} status: {get_response.status_code}")
 
164
  final_check_status = 'error'
165
 
166
  with lock:
167
+ if url_id_to_check not in monitored_urls_store: return
168
 
169
  live_url_data = monitored_urls_store[url_id_to_check]
170
  live_url_data['status'] = final_check_status
171
  live_url_data['responseTime'] = round(http_response_time_ms) if http_response_time_ms is not None else None
172
+ live_url_data['lastChecked'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
173
 
174
  current_history_list = live_url_data.get('history', [])
175
+ current_history_list.append({'timestamp': time.time(), 'status': final_check_status})
176
  live_url_data['history'] = current_history_list
177
  prune_url_history(live_url_data)
178
 
179
+ save_data_to_json() # This will now try to save to /data/data.json
180
  print(f"Finished check for {live_url_data['url']}: {final_check_status}, {http_response_time_ms} ms")
181
 
182
  def pinger_thread_function(url_id_param, stop_event_param):
183
  while not stop_event_param.is_set():
184
  execute_url_check(url_id_param)
 
185
  for _ in range(PING_INTERVAL_SECONDS):
186
  if stop_event_param.is_set(): break
187
  time.sleep(1)
 
195
 
196
  url_data_entry = monitored_urls_store[target_url_id]
197
 
 
198
  if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
199
  print(f"Monitor for URL ID {target_url_id} already running. Attempting to restart.")
200
+ if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]:
201
  url_data_entry["_stop_event"].set()
202
+ url_data_entry["_thread"].join(timeout=3)
203
 
204
  new_stop_event = threading.Event()
 
205
  new_thread = threading.Thread(target=pinger_thread_function, args=(target_url_id, new_stop_event), daemon=True)
206
 
207
  url_data_entry["_thread"] = new_thread
 
211
  print(f"Started/Restarted monitoring for URL ID {target_url_id}: {url_data_entry['url']}")
212
 
213
  def stop_url_monitoring_thread(target_url_id):
214
+ with lock:
215
+ if target_url_id in monitored_urls_store:
216
+ url_data_entry = monitored_urls_store[target_url_id]
217
+ if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
218
+ print(f"Signaling stop for monitor thread of URL ID {target_url_id}")
219
+ if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]:
220
+ url_data_entry["_stop_event"].set()
221
+ url_data_entry.pop("_thread", None)
222
+ url_data_entry.pop("_stop_event", None)
 
223
 
224
  # --- API Endpoints ---
225
  @app.route('/')
 
229
  @app.route('/api/urls', methods=['GET'])
230
  def get_all_urls():
231
  with lock:
 
232
  response_list = []
233
  for data_item in monitored_urls_store.values():
234
  display_item = data_item.copy()
 
246
  input_url = request_data['url'].strip()
247
 
248
  if not input_url.startswith('http://') and not input_url.startswith('https://'):
249
+ input_url = 'https://' + input_url
250
 
251
  try:
252
  parsed_input_url = urlparse(input_url)
 
257
  return jsonify({"error": "Invalid URL format"}), 400
258
 
259
  with lock:
 
260
  normalized_new_url = input_url.rstrip('/').lower()
261
+ for existing_url_id in list(monitored_urls_store.keys()):
262
  existing_url_data = monitored_urls_store.get(existing_url_id)
263
  if existing_url_data and existing_url_data['url'].rstrip('/').lower() == normalized_new_url:
264
+ return jsonify({"error": "URL already monitored"}), 409
265
 
266
  new_url_id = str(uuid.uuid4())
267
  resolved_ip = get_host_ip_address(url_hostname) if url_hostname else 'N/A'
 
271
  "ip": resolved_ip, "responseTime": None, "lastChecked": None, "history": []
272
  }
273
 
 
 
274
  response_payload = url_entry_to_add.copy()
275
+ monitored_urls_store[new_url_id] = url_entry_to_add
 
276
  save_data_to_json()
277
 
278
+ start_url_monitoring_thread(new_url_id)
 
 
279
  return jsonify(response_payload), 201
280
 
 
281
  @app.route('/api/urls/<string:target_url_id>', methods=['DELETE'])
282
  def delete_existing_url(target_url_id):
283
  with lock:
 
286
  removed_url_entry = monitored_urls_store.pop(target_url_id)
287
  save_data_to_json()
288
 
289
+ response_data = removed_url_entry.copy()
 
290
  response_data.pop("_thread", None)
291
  response_data.pop("_stop_event", None)
292
  print(f"Deleted URL ID {target_url_id}")
 
295
  return jsonify({"error": "URL not found"}), 404
296
 
297
  # --- Main Execution / Gunicorn Entry Point ---
298
+ if os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
299
+ ensure_data_dir_exists() # Ensure data dir exists before loading
 
300
  load_data_from_json()
301
 
302
  if __name__ == '__main__':
303
+ if os.environ.get('WERKZEUG_RUN_MAIN') == 'true':
304
+ ensure_data_dir_exists() # Ensure data dir exists before loading in reloader
 
 
 
 
305
  load_data_from_json()
306
+ # The DATA_DIR fallback for local dev will be used here if /data doesn't exist
307
+ app.run(debug=True, host='0.0.0.0', port=7860)