Spaces:
Sleeping
Sleeping
from flask import Flask, request, jsonify, send_from_directory | |
import requests | |
import threading | |
import time | |
import uuid | |
import json | |
import os | |
from urllib.parse import urlparse | |
import socket | |
app = Flask(__name__, static_folder='.', static_url_path='') | |
# --- Configuration --- | |
DATA_FILE = "/tmp/data.json" | |
PING_INTERVAL_SECONDS = 60 | |
HISTORY_DURATION_SECONDS = 60 * 60 | |
# --- Data Store --- | |
# Structure: { "id": "uuid", "url": "string", "status": "pending/ok/error/checking", | |
# "ip": "string", "responseTime": float_ms, "lastChecked": "iso_string_utc", | |
# "history": [{"timestamp": float_unix_ts_seconds, "status": "ok/error"}], | |
# "userId": "string_user_uuid", | |
# "_thread": threading.Thread_object, "_stop_event": threading.Event_object } | |
monitored_urls_store = {} | |
lock = threading.Lock() # Non-reentrant lock | |
# --- Helper Functions --- | |
def save_data_to_json(): | |
# This function must be called with 'lock' acquired | |
serializable_data = {} | |
for url_id, data in monitored_urls_store.items(): | |
s_data = data.copy() | |
s_data.pop("_thread", None) | |
s_data.pop("_stop_event", None) | |
serializable_data[url_id] = s_data | |
try: | |
with open(DATA_FILE, 'w') as f: | |
json.dump(serializable_data, f, indent=2) | |
print(f"Data saved to {DATA_FILE}", flush=True) | |
except IOError as e: | |
print(f"Error saving data to {DATA_FILE}: {e}", flush=True) | |
def load_data_from_json(): | |
global monitored_urls_store | |
if os.path.exists(DATA_FILE): | |
try: | |
with open(DATA_FILE, 'r') as f: | |
loaded_json_data = json.load(f) | |
print(f"Data loaded from {DATA_FILE}", flush=True) | |
temp_store = {} | |
for url_id_key, data_item in loaded_json_data.items(): | |
data_item.setdefault('id', url_id_key) | |
current_id = data_item['id'] | |
data_item.setdefault('status', 'pending') | |
data_item.setdefault('ip', data_item.get('ip', 'N/A')) | |
data_item.setdefault('responseTime', None) | |
data_item.setdefault('lastChecked', None) | |
data_item.setdefault('history', data_item.get('history', [])) | |
data_item.setdefault('userId', None) | |
temp_store[current_id] = data_item | |
with lock: | |
monitored_urls_store = temp_store | |
except json.JSONDecodeError: | |
print(f"Warning: Could not decode {DATA_FILE}. Starting with an empty list.", flush=True) | |
with lock: monitored_urls_store = {} | |
except Exception as e: | |
print(f"Error loading data from {DATA_FILE}: {e}. Starting fresh.", flush=True) | |
with lock: monitored_urls_store = {} | |
else: | |
print(f"{DATA_FILE} not found. Starting with an empty list.", flush=True) | |
with lock: monitored_urls_store = {} | |
url_ids_to_start_monitoring = [] | |
with lock: | |
url_ids_to_start_monitoring = list(monitored_urls_store.keys()) | |
for url_id in url_ids_to_start_monitoring: | |
start_url_monitoring_thread(url_id) | |
def get_host_ip_address(hostname_str): | |
try: | |
socket.inet_aton(hostname_str) | |
return hostname_str | |
except OSError: | |
try: | |
ip_address = socket.gethostbyname(hostname_str) | |
return ip_address | |
except socket.gaierror: | |
print(f"Could not resolve hostname: {hostname_str}", flush=True) | |
return 'N/A' | |
except Exception as e: | |
print(f"Error processing hostname/IP for {hostname_str}: {e}", flush=True) | |
return 'N/A' | |
def prune_url_history(url_data_entry): | |
cutoff_time = time.time() - HISTORY_DURATION_SECONDS | |
url_data_entry['history'] = [ | |
entry for entry in url_data_entry.get('history', []) if entry['timestamp'] >= cutoff_time | |
] | |
def execute_url_check(url_id_to_check): | |
url_config_snapshot = None | |
with lock: | |
if url_id_to_check not in monitored_urls_store: return | |
current_url_data = monitored_urls_store[url_id_to_check] | |
if current_url_data.get('_stop_event') and current_url_data['_stop_event'].is_set(): return | |
print(f"Checking {current_url_data['url']} (ID: {url_id_to_check})...", flush=True) | |
current_url_data['status'] = 'checking' | |
url_config_snapshot = current_url_data.copy() | |
if not url_config_snapshot: return | |
check_start_time = time.perf_counter() | |
final_check_status = 'error' | |
http_response_time_ms = None | |
http_headers = {'User-Agent': 'URLPinger/1.0 (HuggingFace Space Bot)'} | |
try: | |
try: | |
head_response = requests.head(url_config_snapshot['url'], timeout=10, allow_redirects=True, headers=http_headers) | |
if 200 <= head_response.status_code < 400: | |
final_check_status = 'ok' | |
else: | |
print(f"HEAD for {url_config_snapshot['url']} returned {head_response.status_code}. Trying GET.", flush=True) | |
except requests.exceptions.Timeout: | |
print(f"HEAD timeout for {url_config_snapshot['url']}. Trying GET...", flush=True) | |
except requests.RequestException as e_head: | |
print(f"HEAD failed for {url_config_snapshot['url']}: {e_head}. Trying GET...", flush=True) | |
if final_check_status != 'ok': | |
try: | |
get_response = requests.get(url_config_snapshot['url'], timeout=15, allow_redirects=True, headers=http_headers) | |
if get_response.ok: | |
final_check_status = 'ok' | |
else: | |
print(f"GET for {url_config_snapshot['url']} status: {get_response.status_code}", flush=True) | |
final_check_status = 'error' | |
except requests.exceptions.Timeout: | |
print(f"GET timeout for {url_config_snapshot['url']}", flush=True) | |
final_check_status = 'error' | |
except requests.RequestException as e_get: | |
print(f"GET failed for {url_config_snapshot['url']}: {e_get}", flush=True) | |
final_check_status = 'error' | |
if final_check_status == 'ok': | |
http_response_time_ms = (time.perf_counter() - check_start_time) * 1000 | |
except Exception as e: | |
print(f"Outer check exception for {url_config_snapshot['url']}: {e}", flush=True) | |
final_check_status = 'error' | |
with lock: | |
if url_id_to_check not in monitored_urls_store: return | |
live_url_data = monitored_urls_store[url_id_to_check] | |
live_url_data['status'] = final_check_status | |
live_url_data['responseTime'] = round(http_response_time_ms) if http_response_time_ms is not None else None | |
live_url_data['lastChecked'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) | |
current_history_list = live_url_data.get('history', []) | |
current_history_list.append({'timestamp': time.time(), 'status': final_check_status}) | |
live_url_data['history'] = current_history_list | |
prune_url_history(live_url_data) | |
save_data_to_json() # Called while lock is held | |
print(f"Finished check for {live_url_data['url']}: {final_check_status}, {http_response_time_ms} ms", flush=True) | |
def pinger_thread_function(url_id_param, stop_event_param): | |
while not stop_event_param.is_set(): | |
execute_url_check(url_id_param) | |
for _ in range(PING_INTERVAL_SECONDS): | |
if stop_event_param.is_set(): break | |
time.sleep(1) | |
print(f"PingerThread for {url_id_param} stopped.", flush=True) | |
def start_url_monitoring_thread(target_url_id): | |
with lock: | |
if target_url_id not in monitored_urls_store: | |
print(f"Cannot start monitoring: URL ID {target_url_id} not found.", flush=True) | |
return | |
url_data_entry = monitored_urls_store[target_url_id] | |
if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive(): | |
print(f"Monitor for URL ID {target_url_id} already running. Attempting to restart.", flush=True) | |
if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]: | |
url_data_entry["_stop_event"].set() | |
url_data_entry["_thread"].join(timeout=3) | |
new_stop_event = threading.Event() | |
new_thread = threading.Thread(target=pinger_thread_function, args=(target_url_id, new_stop_event), daemon=True) | |
url_data_entry["_thread"] = new_thread | |
url_data_entry["_stop_event"] = new_stop_event | |
new_thread.start() | |
print(f"Started/Restarted monitoring for URL ID {target_url_id}: {url_data_entry['url']}", flush=True) | |
def stop_url_monitoring_thread(target_url_id): | |
# This function must be called with 'lock' acquired | |
if target_url_id in monitored_urls_store: | |
url_data_entry = monitored_urls_store[target_url_id] | |
if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive(): | |
print(f"Signaling stop for monitor thread of URL ID {target_url_id}", flush=True) | |
if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]: | |
url_data_entry["_stop_event"].set() | |
url_data_entry.pop("_thread", None) | |
url_data_entry.pop("_stop_event", None) | |
# --- API Endpoints --- | |
def serve_index(): | |
return send_from_directory(app.static_folder, 'index.html') | |
def get_all_urls_for_user(): | |
user_id = request.headers.get('X-User-ID') | |
if not user_id: | |
return jsonify([]), 200 | |
with lock: | |
response_list = [] | |
for data_item in monitored_urls_store.values(): | |
if data_item.get('userId') == user_id: | |
display_item = data_item.copy() | |
display_item.pop("_thread", None) | |
display_item.pop("_stop_event", None) | |
response_list.append(display_item) | |
return jsonify(response_list) | |
def add_new_url_for_user(): | |
user_id = request.headers.get('X-User-ID') | |
if not user_id: | |
return jsonify({"error": "User ID (X-User-ID header) is required"}), 400 | |
request_data = request.get_json() | |
if not request_data or 'url' not in request_data: | |
return jsonify({"error": "URL is required"}), 400 | |
input_url = request_data['url'].strip() | |
if not input_url.startswith('http://') and not input_url.startswith('https://'): | |
input_url = 'https://' + input_url | |
try: | |
parsed_input_url = urlparse(input_url) | |
if not parsed_input_url.scheme or not parsed_input_url.netloc: | |
raise ValueError("Invalid URL structure") | |
url_hostname = parsed_input_url.hostname | |
except ValueError: | |
return jsonify({"error": "Invalid URL format"}), 400 | |
with lock: | |
normalized_new_url = input_url.rstrip('/').lower() | |
for existing_url_data in monitored_urls_store.values(): | |
if existing_url_data.get('userId') == user_id and \ | |
existing_url_data['url'].rstrip('/').lower() == normalized_new_url: | |
return jsonify({"error": "URL already monitored by you"}), 409 | |
new_url_id = str(uuid.uuid4()) | |
resolved_ip = get_host_ip_address(url_hostname) if url_hostname else 'N/A' | |
url_entry_to_add = { | |
"id": new_url_id, "url": input_url, "status": 'pending', | |
"ip": resolved_ip, "responseTime": None, "lastChecked": None, "history": [], | |
"userId": user_id | |
} | |
response_payload = url_entry_to_add.copy() | |
monitored_urls_store[new_url_id] = url_entry_to_add | |
save_data_to_json() # Called while lock is held | |
start_url_monitoring_thread(new_url_id) | |
return jsonify(response_payload), 201 | |
def delete_existing_url_for_user(target_url_id): | |
user_id = request.headers.get('X-User-ID') | |
if not user_id: | |
return jsonify({"error": "User ID (X-User-ID header) is required"}), 400 | |
with lock: | |
if target_url_id in monitored_urls_store: | |
url_entry_to_check = monitored_urls_store[target_url_id] | |
if url_entry_to_check.get('userId') != user_id: | |
return jsonify({"error": "URL not found or permission denied"}), 404 | |
stop_url_monitoring_thread(target_url_id) # Must be called with lock held | |
removed_url_entry = monitored_urls_store.pop(target_url_id) | |
save_data_to_json() # Called while lock is held | |
response_data = removed_url_entry.copy() | |
response_data.pop("_thread", None) | |
response_data.pop("_stop_event", None) | |
print(f"Deleted URL ID {target_url_id} for user {user_id}", flush=True) | |
return jsonify({"message": "URL removed", "url": response_data}), 200 | |
else: | |
return jsonify({"error": "URL not found"}), 404 | |
# --- Main Execution / Gunicorn Entry Point --- | |
if os.environ.get('WERKZEUG_RUN_MAIN') != 'true': | |
load_data_from_json() | |
if __name__ == '__main__': | |
if os.environ.get('WERKZEUG_RUN_MAIN') == 'true': | |
load_data_from_json() | |
app.run(debug=True, host='0.0.0.0', port=7860) |