File size: 1,078 Bytes
d9f1916
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import re
from urllib.parse import urlparse, urlunparse
import httpx

def extract_urls(text: str):
    """Extract URLs from raw text."""
    url_pattern = r"""(?:(?:https?:\/\/|www\.)?[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})(?:\/[^\s<>"']*)?"""
    return re.findall(url_pattern, text)


def normalize_url(url: str) -> str:
    """Ensure the URL has a scheme and is normalized."""
    parsed = urlparse(url, scheme="http")
    if not parsed.netloc:
        parsed = urlparse("http://" + url)
    return urlunparse(parsed)

def resolve_short_url(url: str) -> str:
    """Make a HEAD request without following redirects, return the Location if redirected."""
    url = normalize_url(url)
    try:
        with httpx.Client(follow_redirects=False, timeout=5) as client:
            response = client.head(url, headers={"User-Agent": "Mozilla/5.0"})
            if response.status_code in {301, 302, 303, 307, 308}:
                return response.headers.get("location")
            return url  # No redirect
    except httpx.RequestError as e:
        print(f"Error: {e}")
        return url