|
import logging |
|
import posixpath |
|
import ssl |
|
import sys |
|
import urllib |
|
from urllib.error import HTTPError |
|
from urllib.parse import urlparse, urljoin |
|
import urllib.request |
|
|
|
|
|
def _parsed_url(url): |
|
parsed_url = urlparse(url) |
|
prefix = parsed_url.scheme + '://' + parsed_url.netloc |
|
base_path = posixpath.normpath(parsed_url.path + '/..') |
|
return urljoin(prefix, base_path) |
|
|
|
|
|
class DefaultHTTPClient: |
|
|
|
def __init__(self, proxies=None): |
|
self.proxies = proxies |
|
self.base_uri = None |
|
self.logger = None |
|
|
|
def download(self, uri, timeout=9, headers={}, verify_ssl=True, http_session=None): |
|
content = self.get_uri(uri, timeout, headers, verify_ssl, http_session) |
|
return content, self.base_uri |
|
|
|
def get_uri(self, _uri, _timeout, _headers, _verify_ssl, _http_session): |
|
if self.logger is None: |
|
self.logger = logging.getLogger(__name__) |
|
|
|
if _http_session: |
|
resp = _http_session.get(_uri, headers=_headers, timeout=_timeout) |
|
x = resp.text |
|
self.base_uri = _parsed_url(str(resp.url)) |
|
resp.raise_for_status() |
|
return x |
|
|
|
else: |
|
proxy_handler = urllib.request.ProxyHandler(self.proxies) |
|
https_handler = HTTPSHandler(verify_ssl=_verify_ssl) |
|
opener = urllib.request.build_opener(proxy_handler, https_handler) |
|
opener.addheaders = _headers.items() |
|
resource = opener.open(_uri, timeout=_timeout) |
|
self.base_uri = _parsed_url(resource.geturl()) |
|
content = resource.read().decode( |
|
resource.headers.get_content_charset(failobj="utf-8") |
|
) |
|
return content |
|
|
|
|
|
class HTTPSHandler: |
|
|
|
def __new__(self, verify_ssl=True): |
|
context = ssl.create_default_context() |
|
if not verify_ssl: |
|
context.check_hostname = False |
|
context.verify_mode = ssl.CERT_NONE |
|
return urllib.request.HTTPSHandler(context=context) |
|
|