Spaces:

kokluch
/

phishing-detector-api

Sleeping

File size: 1,078 Bytes

d9f1916

import re
from urllib.parse import urlparse, urlunparse
import httpx

def extract_urls(text: str):
    """Extract URLs from raw text."""
    url_pattern = r"""(?:(?:https?:\/\/|www\.)?[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})(?:\/[^\s<>"']*)?"""
    return re.findall(url_pattern, text)


def normalize_url(url: str) -> str:
    """Ensure the URL has a scheme and is normalized."""
    parsed = urlparse(url, scheme="http")
    if not parsed.netloc:
        parsed = urlparse("http://" + url)
    return urlunparse(parsed)

def resolve_short_url(url: str) -> str:
    """Make a HEAD request without following redirects, return the Location if redirected."""
    url = normalize_url(url)
    try:
        with httpx.Client(follow_redirects=False, timeout=5) as client:
            response = client.head(url, headers={"User-Agent": "Mozilla/5.0"})
            if response.status_code in {301, 302, 303, 307, 308}:
                return response.headers.get("location")
            return url  # No redirect
    except httpx.RequestError as e:
        print(f"Error: {e}")
        return url