Pingurls / app.py
devendergarg14's picture
Upload app.py
4310b84 verified
raw
history blame
12.9 kB
from flask import Flask, request, jsonify, send_from_directory
import requests
import threading
import time
import uuid
import json
import os
from urllib.parse import urlparse
import socket
app = Flask(__name__, static_folder='.', static_url_path='')
# --- Configuration ---
DATA_DIR = "/data" # Writable directory on Hugging Face Spaces
if not os.path.exists(DATA_DIR) and not os.environ.get('SPACE_ID'): # For local dev if /data isn't preset
print(f"Warning: {DATA_DIR} not found. Using current directory for data.json (local dev mode).")
DATA_DIR = "." # Fallback to current dir for local testing if /data does not exist
DATA_FILE = os.path.join(DATA_DIR, "data.json")
PING_INTERVAL_SECONDS = 60
HISTORY_DURATION_SECONDS = 60 * 60
monitored_urls_store = {}
lock = threading.Lock()
# --- Helper Functions ---
def ensure_data_dir_exists():
"""Ensures the data directory exists."""
if DATA_DIR == ".": # No need to create current directory
return
if not os.path.exists(DATA_DIR):
try:
os.makedirs(DATA_DIR)
print(f"Created data directory: {DATA_DIR}")
except OSError as e:
print(f"Error creating data directory {DATA_DIR}: {e}. Data persistence may fail.")
def save_data_to_json():
with lock:
ensure_data_dir_exists() # Ensure directory exists before attempting to write
serializable_data = {}
for url_id, data in monitored_urls_store.items():
s_data = data.copy()
s_data.pop("_thread", None)
s_data.pop("_stop_event", None)
serializable_data[url_id] = s_data
try:
with open(DATA_FILE, 'w') as f:
json.dump(serializable_data, f, indent=2)
except IOError as e:
print(f"Error saving data to {DATA_FILE}: {e}") # This is where your error was logged
def load_data_from_json():
global monitored_urls_store
ensure_data_dir_exists() # Ensure directory exists before attempting to read
if os.path.exists(DATA_FILE):
try:
with open(DATA_FILE, 'r') as f:
loaded_json_data = json.load(f)
temp_store = {}
for url_id_key, data_item in loaded_json_data.items():
data_item.setdefault('id', url_id_key)
current_id = data_item['id']
data_item.setdefault('status', 'pending')
data_item.setdefault('ip', data_item.get('ip', 'N/A'))
data_item.setdefault('responseTime', None)
data_item.setdefault('lastChecked', None)
data_item.setdefault('history', data_item.get('history', []))
temp_store[current_id] = data_item
with lock:
monitored_urls_store = temp_store
except json.JSONDecodeError:
print(f"Warning: Could not decode {DATA_FILE}. Starting with an empty list.")
with lock: monitored_urls_store = {}
except Exception as e:
print(f"Error loading data from {DATA_FILE}: {e}. Starting fresh.")
with lock: monitored_urls_store = {}
else:
print(f"{DATA_FILE} not found. Starting with an empty list.")
with lock: monitored_urls_store = {}
url_ids_to_start_monitoring = []
with lock:
url_ids_to_start_monitoring = list(monitored_urls_store.keys())
for url_id in url_ids_to_start_monitoring:
start_url_monitoring_thread(url_id)
def get_host_ip_address(hostname_str):
try:
socket.inet_aton(hostname_str)
return hostname_str
except OSError:
try:
ip_address = socket.gethostbyname(hostname_str)
return ip_address
except socket.gaierror:
print(f"Could not resolve hostname: {hostname_str}")
return 'N/A'
except Exception as e:
print(f"Error processing hostname/IP for {hostname_str}: {e}")
return 'N/A'
def prune_url_history(url_data_entry):
cutoff_time = time.time() - HISTORY_DURATION_SECONDS
url_data_entry['history'] = [
entry for entry in url_data_entry.get('history', []) if entry['timestamp'] >= cutoff_time
]
def execute_url_check(url_id_to_check):
url_config_snapshot = None
with lock:
if url_id_to_check not in monitored_urls_store: return
current_url_data = monitored_urls_store[url_id_to_check]
if current_url_data.get('_stop_event') and current_url_data['_stop_event'].is_set(): return
print(f"Checking {current_url_data['url']} (ID: {url_id_to_check})...")
current_url_data['status'] = 'checking'
url_config_snapshot = current_url_data.copy()
if not url_config_snapshot: return
check_start_time = time.perf_counter()
final_check_status = 'error'
http_response_time_ms = None
http_headers = {'User-Agent': 'URLPinger/1.0 (HuggingFace Space Bot)'}
try:
try:
head_response = requests.head(url_config_snapshot['url'], timeout=10, allow_redirects=True, headers=http_headers)
if 200 <= head_response.status_code < 400:
final_check_status = 'ok'
else:
print(f"HEAD for {url_config_snapshot['url']} returned {head_response.status_code}. Trying GET.")
except requests.exceptions.Timeout:
print(f"HEAD timeout for {url_config_snapshot['url']}. Trying GET...")
except requests.RequestException as e_head:
print(f"HEAD failed for {url_config_snapshot['url']}: {e_head}. Trying GET...")
if final_check_status != 'ok':
try:
get_response = requests.get(url_config_snapshot['url'], timeout=15, allow_redirects=True, headers=http_headers)
if get_response.ok:
final_check_status = 'ok'
else:
print(f"GET for {url_config_snapshot['url']} status: {get_response.status_code}")
final_check_status = 'error'
except requests.exceptions.Timeout:
print(f"GET timeout for {url_config_snapshot['url']}")
final_check_status = 'error'
except requests.RequestException as e_get:
print(f"GET failed for {url_config_snapshot['url']}: {e_get}")
final_check_status = 'error'
if final_check_status == 'ok':
http_response_time_ms = (time.perf_counter() - check_start_time) * 1000
except Exception as e:
print(f"Outer check exception for {url_config_snapshot['url']}: {e}")
final_check_status = 'error'
with lock:
if url_id_to_check not in monitored_urls_store: return
live_url_data = monitored_urls_store[url_id_to_check]
live_url_data['status'] = final_check_status
live_url_data['responseTime'] = round(http_response_time_ms) if http_response_time_ms is not None else None
live_url_data['lastChecked'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
current_history_list = live_url_data.get('history', [])
current_history_list.append({'timestamp': time.time(), 'status': final_check_status})
live_url_data['history'] = current_history_list
prune_url_history(live_url_data)
save_data_to_json() # This will now try to save to /data/data.json
print(f"Finished check for {live_url_data['url']}: {final_check_status}, {http_response_time_ms} ms")
def pinger_thread_function(url_id_param, stop_event_param):
while not stop_event_param.is_set():
execute_url_check(url_id_param)
for _ in range(PING_INTERVAL_SECONDS):
if stop_event_param.is_set(): break
time.sleep(1)
print(f"PingerThread for {url_id_param} stopped.")
def start_url_monitoring_thread(target_url_id):
with lock:
if target_url_id not in monitored_urls_store:
print(f"Cannot start monitoring: URL ID {target_url_id} not found.")
return
url_data_entry = monitored_urls_store[target_url_id]
if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
print(f"Monitor for URL ID {target_url_id} already running. Attempting to restart.")
if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]:
url_data_entry["_stop_event"].set()
url_data_entry["_thread"].join(timeout=3)
new_stop_event = threading.Event()
new_thread = threading.Thread(target=pinger_thread_function, args=(target_url_id, new_stop_event), daemon=True)
url_data_entry["_thread"] = new_thread
url_data_entry["_stop_event"] = new_stop_event
new_thread.start()
print(f"Started/Restarted monitoring for URL ID {target_url_id}: {url_data_entry['url']}")
def stop_url_monitoring_thread(target_url_id):
with lock:
if target_url_id in monitored_urls_store:
url_data_entry = monitored_urls_store[target_url_id]
if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
print(f"Signaling stop for monitor thread of URL ID {target_url_id}")
if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]:
url_data_entry["_stop_event"].set()
url_data_entry.pop("_thread", None)
url_data_entry.pop("_stop_event", None)
# --- API Endpoints ---
@app.route('/')
def serve_index():
return send_from_directory(app.static_folder, 'index.html')
@app.route('/api/urls', methods=['GET'])
def get_all_urls():
with lock:
response_list = []
for data_item in monitored_urls_store.values():
display_item = data_item.copy()
display_item.pop("_thread", None)
display_item.pop("_stop_event", None)
response_list.append(display_item)
return jsonify(response_list)
@app.route('/api/urls', methods=['POST'])
def add_new_url():
request_data = request.get_json()
if not request_data or 'url' not in request_data:
return jsonify({"error": "URL is required"}), 400
input_url = request_data['url'].strip()
if not input_url.startswith('http://') and not input_url.startswith('https://'):
input_url = 'https://' + input_url
try:
parsed_input_url = urlparse(input_url)
if not parsed_input_url.scheme or not parsed_input_url.netloc:
raise ValueError("Invalid URL structure")
url_hostname = parsed_input_url.hostname
except ValueError:
return jsonify({"error": "Invalid URL format"}), 400
with lock:
normalized_new_url = input_url.rstrip('/').lower()
for existing_url_id in list(monitored_urls_store.keys()):
existing_url_data = monitored_urls_store.get(existing_url_id)
if existing_url_data and existing_url_data['url'].rstrip('/').lower() == normalized_new_url:
return jsonify({"error": "URL already monitored"}), 409
new_url_id = str(uuid.uuid4())
resolved_ip = get_host_ip_address(url_hostname) if url_hostname else 'N/A'
url_entry_to_add = {
"id": new_url_id, "url": input_url, "status": 'pending',
"ip": resolved_ip, "responseTime": None, "lastChecked": None, "history": []
}
response_payload = url_entry_to_add.copy()
monitored_urls_store[new_url_id] = url_entry_to_add
save_data_to_json()
start_url_monitoring_thread(new_url_id)
return jsonify(response_payload), 201
@app.route('/api/urls/<string:target_url_id>', methods=['DELETE'])
def delete_existing_url(target_url_id):
with lock:
if target_url_id in monitored_urls_store:
stop_url_monitoring_thread(target_url_id)
removed_url_entry = monitored_urls_store.pop(target_url_id)
save_data_to_json()
response_data = removed_url_entry.copy()
response_data.pop("_thread", None)
response_data.pop("_stop_event", None)
print(f"Deleted URL ID {target_url_id}")
return jsonify({"message": "URL removed", "url": response_data}), 200
else:
return jsonify({"error": "URL not found"}), 404
# --- Main Execution / Gunicorn Entry Point ---
if os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
ensure_data_dir_exists() # Ensure data dir exists before loading
load_data_from_json()
if __name__ == '__main__':
if os.environ.get('WERKZEUG_RUN_MAIN') == 'true':
ensure_data_dir_exists() # Ensure data dir exists before loading in reloader
load_data_from_json()
# The DATA_DIR fallback for local dev will be used here if /data doesn't exist
app.run(debug=True, host='0.0.0.0', port=7860)