cfdi-txt/source/helper/mureq.py

394 lines
14 KiB
Python

"""
mureq is a replacement for python-requests, intended to be vendored
in-tree by Linux systems software and other lightweight applications.
mureq is copyright 2021 by its contributors and is released under the
0BSD ("zero-clause BSD") license.
"""
import contextlib
import io
import os.path
import socket
import ssl
import sys
import urllib.parse
from http.client import HTTPConnection, HTTPSConnection, HTTPMessage, HTTPException
__version__ = '0.2.0'
__all__ = ['HTTPException', 'TooManyRedirects', 'Response',
'yield_response', 'request', 'get', 'post', 'head', 'put', 'patch', 'delete']
DEFAULT_TIMEOUT = 15.0
# e.g. "Python 3.8.10"
DEFAULT_UA = "Python " + sys.version.split()[0]
def request(method, url, *, read_limit=None, **kwargs):
"""request performs an HTTP request and reads the entire response body.
:param str method: HTTP method to request (e.g. 'GET', 'POST')
:param str url: URL to request
:param read_limit: maximum number of bytes to read from the body, or None for no limit
:type read_limit: int or None
:param kwargs: optional arguments defined by yield_response
:return: Response object
:rtype: Response
:raises: HTTPException
"""
with yield_response(method, url, **kwargs) as response:
try:
body = response.read(read_limit)
except HTTPException:
raise
except IOError as e:
raise HTTPException(str(e)) from e
return Response(response.url, response.status, _prepare_incoming_headers(response.headers), body)
def get(url, **kwargs):
"""get performs an HTTP GET request."""
return request('GET', url=url, **kwargs)
def post(url, body=None, **kwargs):
"""post performs an HTTP POST request."""
return request('POST', url=url, body=body, **kwargs)
def head(url, **kwargs):
"""head performs an HTTP HEAD request."""
return request('HEAD', url=url, **kwargs)
def put(url, body=None, **kwargs):
"""put performs an HTTP PUT request."""
return request('PUT', url=url, body=body, **kwargs)
def patch(url, body=None, **kwargs):
"""patch performs an HTTP PATCH request."""
return request('PATCH', url=url, body=body, **kwargs)
def delete(url, **kwargs):
"""delete performs an HTTP DELETE request."""
return request('DELETE', url=url, **kwargs)
@contextlib.contextmanager
def yield_response(method, url, *, unix_socket=None, timeout=DEFAULT_TIMEOUT, headers=None,
params=None, body=None, form=None, json=None, verify=True, source_address=None,
max_redirects=None, ssl_context=None):
"""yield_response is a low-level API that exposes the actual
http.client.HTTPResponse via a contextmanager.
Note that unlike mureq.Response, http.client.HTTPResponse does not
automatically canonicalize multiple appearances of the same header by
joining them together with a comma delimiter. To retrieve canonicalized
headers from the response, use response.getheader():
https://docs.python.org/3/library/http.client.html#http.client.HTTPResponse.getheader
:param str method: HTTP method to request (e.g. 'GET', 'POST')
:param str url: URL to request
:param unix_socket: path to Unix domain socket to query, or None for a normal TCP request
:type unix_socket: str or None
:param timeout: timeout in seconds, or None for no timeout (default: 15 seconds)
:type timeout: float or None
:param headers: HTTP headers as a mapping or list of key-value pairs
:param params: parameters to be URL-encoded and added to the query string, as a mapping or list of key-value pairs
:param body: payload body of the request
:type body: bytes or None
:param form: parameters to be form-encoded and sent as the payload body, as a mapping or list of key-value pairs
:param json: object to be serialized as JSON and sent as the payload body
:param bool verify: whether to verify TLS certificates (default: True)
:param source_address: source address to bind to for TCP
:type source_address: str or tuple(str, int) or None
:param max_redirects: maximum number of redirects to follow, or None (the default) for no redirection
:type max_redirects: int or None
:param ssl_context: TLS config to control certificate validation, or None for default behavior
:type ssl_context: ssl.SSLContext or None
:return: http.client.HTTPResponse, yielded as context manager
:rtype: http.client.HTTPResponse
:raises: HTTPException
"""
method = method.upper()
headers = _prepare_outgoing_headers(headers)
enc_params = _prepare_params(params)
body = _prepare_body(body, form, json, headers)
visited_urls = []
while max_redirects is None or len(visited_urls) <= max_redirects:
url, conn, path = _prepare_request(method, url, enc_params=enc_params, timeout=timeout, unix_socket=unix_socket, verify=verify, source_address=source_address, ssl_context=ssl_context)
enc_params = '' # don't reappend enc_params if we get redirected
visited_urls.append(url)
try:
try:
conn.request(method, path, headers=headers, body=body)
response = conn.getresponse()
except HTTPException:
raise
except IOError as e:
# wrap any IOError that is not already an HTTPException
# in HTTPException, exposing a uniform API for remote errors
raise HTTPException(str(e)) from e
redirect_url = _check_redirect(url, response.status, response.headers)
if max_redirects is None or redirect_url is None:
response.url = url # https://bugs.python.org/issue42062
yield response
return
else:
url = redirect_url
if response.status == 303:
# 303 See Other: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/303
method = 'GET'
finally:
conn.close()
raise TooManyRedirects(visited_urls)
class Response:
"""Response contains a completely consumed HTTP response.
:ivar str url: the retrieved URL, indicating whether a redirection occurred
:ivar int status_code: the HTTP status code
:ivar http.client.HTTPMessage headers: the HTTP headers
:ivar bytes body: the payload body of the response
"""
__slots__ = ('url', 'status_code', 'headers', 'body')
def __init__(self, url, status_code, headers, body):
self.url, self.status_code, self.headers, self.body = url, status_code, headers, body
def __repr__(self):
return f"Response(status_code={self.status_code:d})"
@property
def ok(self):
"""ok returns whether the response had a successful status code
(anything other than a 40x or 50x)."""
return not (400 <= self.status_code < 600)
@property
def content(self):
"""content returns the response body (the `body` member). This is an
alias for compatibility with requests.Response."""
return self.body
def raise_for_status(self):
"""raise_for_status checks the response's success code, raising an
exception for error codes."""
if not self.ok:
raise HTTPErrorStatus(self.status_code)
def json(self):
"""Attempts to deserialize the response body as UTF-8 encoded JSON."""
import json as jsonlib
return jsonlib.loads(self.body)
def _debugstr(self):
buf = io.StringIO()
print("HTTP", self.status_code, file=buf)
for k, v in self.headers.items():
print(f"{k}: {v}", file=buf)
print(file=buf)
try:
print(self.body.decode('utf-8'), file=buf)
except UnicodeDecodeError:
print(f"<{len(self.body)} bytes binary data>", file=buf)
return buf.getvalue()
class TooManyRedirects(HTTPException):
"""TooManyRedirects is raised when automatic following of redirects was
enabled, but the server redirected too many times without completing."""
pass
class HTTPErrorStatus(HTTPException):
"""HTTPErrorStatus is raised by Response.raise_for_status() to indicate an
HTTP error code (a 40x or a 50x). Note that a well-formed response with an
error code does not result in an exception unless raise_for_status() is
called explicitly.
"""
def __init__(self, status_code):
self.status_code = status_code
def __str__(self):
return f"HTTP response returned error code {self.status_code:d}"
# end public API, begin internal implementation details
_JSON_CONTENTTYPE = 'application/json'
_FORM_CONTENTTYPE = 'application/x-www-form-urlencoded'
class UnixHTTPConnection(HTTPConnection):
"""UnixHTTPConnection is a subclass of HTTPConnection that connects to a
Unix domain stream socket instead of a TCP address.
"""
def __init__(self, path, timeout=DEFAULT_TIMEOUT):
super(UnixHTTPConnection, self).__init__('localhost', timeout=timeout)
self._unix_path = path
def connect(self):
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
sock.settimeout(self.timeout)
sock.connect(self._unix_path)
except Exception:
sock.close()
raise
self.sock = sock
def _check_redirect(url, status, response_headers):
"""Return the URL to redirect to, or None for no redirection."""
if status not in (301, 302, 303, 307, 308):
return None
location = response_headers.get('Location')
if not location:
return None
parsed_location = urllib.parse.urlparse(location)
if parsed_location.scheme:
# absolute URL
return location
old_url = urllib.parse.urlparse(url)
if location.startswith('/'):
# absolute path on old hostname
return urllib.parse.urlunparse((old_url.scheme, old_url.netloc,
parsed_location.path, parsed_location.params,
parsed_location.query, parsed_location.fragment))
# relative path on old hostname
old_dir, _old_file = os.path.split(old_url.path)
new_path = os.path.join(old_dir, location)
return urllib.parse.urlunparse((old_url.scheme, old_url.netloc,
new_path, parsed_location.params,
parsed_location.query, parsed_location.fragment))
def _prepare_outgoing_headers(headers):
if headers is None:
headers = HTTPMessage()
elif not isinstance(headers, HTTPMessage):
new_headers = HTTPMessage()
if hasattr(headers, 'items'):
iterator = headers.items()
else:
iterator = iter(headers)
for k, v in iterator:
new_headers[k] = v
headers = new_headers
_setdefault_header(headers, 'User-Agent', DEFAULT_UA)
return headers
# XXX join multi-headers together so that get(), __getitem__(),
# etc. behave intuitively, then stuff them back in an HTTPMessage.
def _prepare_incoming_headers(headers):
headers_dict = {}
for k, v in headers.items():
headers_dict.setdefault(k, []).append(v)
result = HTTPMessage()
# note that iterating over headers_dict preserves the original
# insertion order in all versions since Python 3.6:
for k, vlist in headers_dict.items():
result[k] = ','.join(vlist)
return result
def _setdefault_header(headers, name, value):
if name not in headers:
headers[name] = value
def _prepare_body(body, form, json, headers):
if body is not None:
if not isinstance(body, bytes):
raise TypeError('body must be bytes or None', type(body))
return body
if json is not None:
_setdefault_header(headers, 'Content-Type', _JSON_CONTENTTYPE)
import json as jsonlib
return jsonlib.dumps(json).encode('utf-8')
if form is not None:
_setdefault_header(headers, 'Content-Type', _FORM_CONTENTTYPE)
return urllib.parse.urlencode(form, doseq=True)
return None
def _prepare_params(params):
if params is None:
return ''
return urllib.parse.urlencode(params, doseq=True)
def _prepare_request(method, url, *, enc_params='', timeout=DEFAULT_TIMEOUT, source_address=None, unix_socket=None, verify=True, ssl_context=None):
"""Parses the URL, returns the path and the right HTTPConnection subclass."""
parsed_url = urllib.parse.urlparse(url)
is_unix = (unix_socket is not None)
scheme = parsed_url.scheme.lower()
if scheme.endswith('+unix'):
scheme = scheme[:-5]
is_unix = True
if scheme == 'https':
raise ValueError("https+unix is not implemented")
if scheme not in ('http', 'https'):
raise ValueError("unrecognized scheme", scheme)
is_https = (scheme == 'https')
host = parsed_url.hostname
port = 443 if is_https else 80
if parsed_url.port:
port = parsed_url.port
if is_unix and unix_socket is None:
unix_socket = urllib.parse.unquote(parsed_url.netloc)
path = parsed_url.path
if parsed_url.query:
if enc_params:
path = f'{path}?{parsed_url.query}&{enc_params}'
else:
path = f'{path}?{parsed_url.query}'
else:
if enc_params:
path = f'{path}?{enc_params}'
else:
pass # just parsed_url.path in this case
if isinstance(source_address, str):
source_address = (source_address, 0)
if is_unix:
conn = UnixHTTPConnection(unix_socket, timeout=timeout)
elif is_https:
if ssl_context is None:
ssl_context = ssl.create_default_context()
if not verify:
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
conn = HTTPSConnection(host, port, source_address=source_address, timeout=timeout,
context=ssl_context)
else:
conn = HTTPConnection(host, port, source_address=source_address, timeout=timeout)
munged_url = urllib.parse.urlunparse((parsed_url.scheme, parsed_url.netloc,
path, parsed_url.params,
'', parsed_url.fragment))
return munged_url, conn, path