394 lines
14 KiB
Python
394 lines
14 KiB
Python
"""
|
|
mureq is a replacement for python-requests, intended to be vendored
|
|
in-tree by Linux systems software and other lightweight applications.
|
|
|
|
mureq is copyright 2021 by its contributors and is released under the
|
|
0BSD ("zero-clause BSD") license.
|
|
"""
|
|
import contextlib
|
|
import io
|
|
import os.path
|
|
import socket
|
|
import ssl
|
|
import sys
|
|
import urllib.parse
|
|
from http.client import HTTPConnection, HTTPSConnection, HTTPMessage, HTTPException
|
|
|
|
__version__ = '0.2.0'
|
|
|
|
__all__ = ['HTTPException', 'TooManyRedirects', 'Response',
|
|
'yield_response', 'request', 'get', 'post', 'head', 'put', 'patch', 'delete']
|
|
|
|
DEFAULT_TIMEOUT = 15.0
|
|
|
|
# e.g. "Python 3.8.10"
|
|
DEFAULT_UA = "Python " + sys.version.split()[0]
|
|
|
|
|
|
def request(method, url, *, read_limit=None, **kwargs):
|
|
"""request performs an HTTP request and reads the entire response body.
|
|
|
|
:param str method: HTTP method to request (e.g. 'GET', 'POST')
|
|
:param str url: URL to request
|
|
:param read_limit: maximum number of bytes to read from the body, or None for no limit
|
|
:type read_limit: int or None
|
|
:param kwargs: optional arguments defined by yield_response
|
|
:return: Response object
|
|
:rtype: Response
|
|
:raises: HTTPException
|
|
"""
|
|
with yield_response(method, url, **kwargs) as response:
|
|
try:
|
|
body = response.read(read_limit)
|
|
except HTTPException:
|
|
raise
|
|
except IOError as e:
|
|
raise HTTPException(str(e)) from e
|
|
return Response(response.url, response.status, _prepare_incoming_headers(response.headers), body)
|
|
|
|
|
|
def get(url, **kwargs):
|
|
"""get performs an HTTP GET request."""
|
|
return request('GET', url=url, **kwargs)
|
|
|
|
|
|
def post(url, body=None, **kwargs):
|
|
"""post performs an HTTP POST request."""
|
|
return request('POST', url=url, body=body, **kwargs)
|
|
|
|
|
|
def head(url, **kwargs):
|
|
"""head performs an HTTP HEAD request."""
|
|
return request('HEAD', url=url, **kwargs)
|
|
|
|
|
|
def put(url, body=None, **kwargs):
|
|
"""put performs an HTTP PUT request."""
|
|
return request('PUT', url=url, body=body, **kwargs)
|
|
|
|
|
|
def patch(url, body=None, **kwargs):
|
|
"""patch performs an HTTP PATCH request."""
|
|
return request('PATCH', url=url, body=body, **kwargs)
|
|
|
|
|
|
def delete(url, **kwargs):
|
|
"""delete performs an HTTP DELETE request."""
|
|
return request('DELETE', url=url, **kwargs)
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def yield_response(method, url, *, unix_socket=None, timeout=DEFAULT_TIMEOUT, headers=None,
|
|
params=None, body=None, form=None, json=None, verify=True, source_address=None,
|
|
max_redirects=None, ssl_context=None):
|
|
"""yield_response is a low-level API that exposes the actual
|
|
http.client.HTTPResponse via a contextmanager.
|
|
|
|
Note that unlike mureq.Response, http.client.HTTPResponse does not
|
|
automatically canonicalize multiple appearances of the same header by
|
|
joining them together with a comma delimiter. To retrieve canonicalized
|
|
headers from the response, use response.getheader():
|
|
https://docs.python.org/3/library/http.client.html#http.client.HTTPResponse.getheader
|
|
|
|
:param str method: HTTP method to request (e.g. 'GET', 'POST')
|
|
:param str url: URL to request
|
|
:param unix_socket: path to Unix domain socket to query, or None for a normal TCP request
|
|
:type unix_socket: str or None
|
|
:param timeout: timeout in seconds, or None for no timeout (default: 15 seconds)
|
|
:type timeout: float or None
|
|
:param headers: HTTP headers as a mapping or list of key-value pairs
|
|
:param params: parameters to be URL-encoded and added to the query string, as a mapping or list of key-value pairs
|
|
:param body: payload body of the request
|
|
:type body: bytes or None
|
|
:param form: parameters to be form-encoded and sent as the payload body, as a mapping or list of key-value pairs
|
|
:param json: object to be serialized as JSON and sent as the payload body
|
|
:param bool verify: whether to verify TLS certificates (default: True)
|
|
:param source_address: source address to bind to for TCP
|
|
:type source_address: str or tuple(str, int) or None
|
|
:param max_redirects: maximum number of redirects to follow, or None (the default) for no redirection
|
|
:type max_redirects: int or None
|
|
:param ssl_context: TLS config to control certificate validation, or None for default behavior
|
|
:type ssl_context: ssl.SSLContext or None
|
|
:return: http.client.HTTPResponse, yielded as context manager
|
|
:rtype: http.client.HTTPResponse
|
|
:raises: HTTPException
|
|
"""
|
|
method = method.upper()
|
|
headers = _prepare_outgoing_headers(headers)
|
|
enc_params = _prepare_params(params)
|
|
body = _prepare_body(body, form, json, headers)
|
|
|
|
visited_urls = []
|
|
|
|
while max_redirects is None or len(visited_urls) <= max_redirects:
|
|
url, conn, path = _prepare_request(method, url, enc_params=enc_params, timeout=timeout, unix_socket=unix_socket, verify=verify, source_address=source_address, ssl_context=ssl_context)
|
|
enc_params = '' # don't reappend enc_params if we get redirected
|
|
visited_urls.append(url)
|
|
try:
|
|
try:
|
|
conn.request(method, path, headers=headers, body=body)
|
|
response = conn.getresponse()
|
|
except HTTPException:
|
|
raise
|
|
except IOError as e:
|
|
# wrap any IOError that is not already an HTTPException
|
|
# in HTTPException, exposing a uniform API for remote errors
|
|
raise HTTPException(str(e)) from e
|
|
redirect_url = _check_redirect(url, response.status, response.headers)
|
|
if max_redirects is None or redirect_url is None:
|
|
response.url = url # https://bugs.python.org/issue42062
|
|
yield response
|
|
return
|
|
else:
|
|
url = redirect_url
|
|
if response.status == 303:
|
|
# 303 See Other: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/303
|
|
method = 'GET'
|
|
finally:
|
|
conn.close()
|
|
|
|
raise TooManyRedirects(visited_urls)
|
|
|
|
|
|
class Response:
|
|
"""Response contains a completely consumed HTTP response.
|
|
|
|
:ivar str url: the retrieved URL, indicating whether a redirection occurred
|
|
:ivar int status_code: the HTTP status code
|
|
:ivar http.client.HTTPMessage headers: the HTTP headers
|
|
:ivar bytes body: the payload body of the response
|
|
"""
|
|
|
|
__slots__ = ('url', 'status_code', 'headers', 'body')
|
|
|
|
def __init__(self, url, status_code, headers, body):
|
|
self.url, self.status_code, self.headers, self.body = url, status_code, headers, body
|
|
|
|
def __repr__(self):
|
|
return f"Response(status_code={self.status_code:d})"
|
|
|
|
@property
|
|
def ok(self):
|
|
"""ok returns whether the response had a successful status code
|
|
(anything other than a 40x or 50x)."""
|
|
return not (400 <= self.status_code < 600)
|
|
|
|
@property
|
|
def content(self):
|
|
"""content returns the response body (the `body` member). This is an
|
|
alias for compatibility with requests.Response."""
|
|
return self.body
|
|
|
|
def raise_for_status(self):
|
|
"""raise_for_status checks the response's success code, raising an
|
|
exception for error codes."""
|
|
if not self.ok:
|
|
raise HTTPErrorStatus(self.status_code)
|
|
|
|
def json(self):
|
|
"""Attempts to deserialize the response body as UTF-8 encoded JSON."""
|
|
import json as jsonlib
|
|
return jsonlib.loads(self.body)
|
|
|
|
def _debugstr(self):
|
|
buf = io.StringIO()
|
|
print("HTTP", self.status_code, file=buf)
|
|
for k, v in self.headers.items():
|
|
print(f"{k}: {v}", file=buf)
|
|
print(file=buf)
|
|
try:
|
|
print(self.body.decode('utf-8'), file=buf)
|
|
except UnicodeDecodeError:
|
|
print(f"<{len(self.body)} bytes binary data>", file=buf)
|
|
return buf.getvalue()
|
|
|
|
|
|
class TooManyRedirects(HTTPException):
|
|
"""TooManyRedirects is raised when automatic following of redirects was
|
|
enabled, but the server redirected too many times without completing."""
|
|
pass
|
|
|
|
|
|
class HTTPErrorStatus(HTTPException):
|
|
"""HTTPErrorStatus is raised by Response.raise_for_status() to indicate an
|
|
HTTP error code (a 40x or a 50x). Note that a well-formed response with an
|
|
error code does not result in an exception unless raise_for_status() is
|
|
called explicitly.
|
|
"""
|
|
|
|
def __init__(self, status_code):
|
|
self.status_code = status_code
|
|
|
|
def __str__(self):
|
|
return f"HTTP response returned error code {self.status_code:d}"
|
|
|
|
|
|
# end public API, begin internal implementation details
|
|
|
|
_JSON_CONTENTTYPE = 'application/json'
|
|
_FORM_CONTENTTYPE = 'application/x-www-form-urlencoded'
|
|
|
|
|
|
class UnixHTTPConnection(HTTPConnection):
|
|
"""UnixHTTPConnection is a subclass of HTTPConnection that connects to a
|
|
Unix domain stream socket instead of a TCP address.
|
|
"""
|
|
|
|
def __init__(self, path, timeout=DEFAULT_TIMEOUT):
|
|
super(UnixHTTPConnection, self).__init__('localhost', timeout=timeout)
|
|
self._unix_path = path
|
|
|
|
def connect(self):
|
|
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
try:
|
|
sock.settimeout(self.timeout)
|
|
sock.connect(self._unix_path)
|
|
except Exception:
|
|
sock.close()
|
|
raise
|
|
self.sock = sock
|
|
|
|
|
|
def _check_redirect(url, status, response_headers):
|
|
"""Return the URL to redirect to, or None for no redirection."""
|
|
if status not in (301, 302, 303, 307, 308):
|
|
return None
|
|
location = response_headers.get('Location')
|
|
if not location:
|
|
return None
|
|
parsed_location = urllib.parse.urlparse(location)
|
|
if parsed_location.scheme:
|
|
# absolute URL
|
|
return location
|
|
|
|
old_url = urllib.parse.urlparse(url)
|
|
if location.startswith('/'):
|
|
# absolute path on old hostname
|
|
return urllib.parse.urlunparse((old_url.scheme, old_url.netloc,
|
|
parsed_location.path, parsed_location.params,
|
|
parsed_location.query, parsed_location.fragment))
|
|
|
|
# relative path on old hostname
|
|
old_dir, _old_file = os.path.split(old_url.path)
|
|
new_path = os.path.join(old_dir, location)
|
|
return urllib.parse.urlunparse((old_url.scheme, old_url.netloc,
|
|
new_path, parsed_location.params,
|
|
parsed_location.query, parsed_location.fragment))
|
|
|
|
|
|
def _prepare_outgoing_headers(headers):
|
|
if headers is None:
|
|
headers = HTTPMessage()
|
|
elif not isinstance(headers, HTTPMessage):
|
|
new_headers = HTTPMessage()
|
|
if hasattr(headers, 'items'):
|
|
iterator = headers.items()
|
|
else:
|
|
iterator = iter(headers)
|
|
for k, v in iterator:
|
|
new_headers[k] = v
|
|
headers = new_headers
|
|
_setdefault_header(headers, 'User-Agent', DEFAULT_UA)
|
|
return headers
|
|
|
|
|
|
# XXX join multi-headers together so that get(), __getitem__(),
|
|
# etc. behave intuitively, then stuff them back in an HTTPMessage.
|
|
def _prepare_incoming_headers(headers):
|
|
headers_dict = {}
|
|
for k, v in headers.items():
|
|
headers_dict.setdefault(k, []).append(v)
|
|
result = HTTPMessage()
|
|
# note that iterating over headers_dict preserves the original
|
|
# insertion order in all versions since Python 3.6:
|
|
for k, vlist in headers_dict.items():
|
|
result[k] = ','.join(vlist)
|
|
return result
|
|
|
|
|
|
def _setdefault_header(headers, name, value):
|
|
if name not in headers:
|
|
headers[name] = value
|
|
|
|
|
|
def _prepare_body(body, form, json, headers):
|
|
if body is not None:
|
|
if not isinstance(body, bytes):
|
|
raise TypeError('body must be bytes or None', type(body))
|
|
return body
|
|
|
|
if json is not None:
|
|
_setdefault_header(headers, 'Content-Type', _JSON_CONTENTTYPE)
|
|
import json as jsonlib
|
|
return jsonlib.dumps(json).encode('utf-8')
|
|
|
|
if form is not None:
|
|
_setdefault_header(headers, 'Content-Type', _FORM_CONTENTTYPE)
|
|
return urllib.parse.urlencode(form, doseq=True)
|
|
|
|
return None
|
|
|
|
|
|
def _prepare_params(params):
|
|
if params is None:
|
|
return ''
|
|
return urllib.parse.urlencode(params, doseq=True)
|
|
|
|
|
|
def _prepare_request(method, url, *, enc_params='', timeout=DEFAULT_TIMEOUT, source_address=None, unix_socket=None, verify=True, ssl_context=None):
|
|
"""Parses the URL, returns the path and the right HTTPConnection subclass."""
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
|
|
is_unix = (unix_socket is not None)
|
|
scheme = parsed_url.scheme.lower()
|
|
if scheme.endswith('+unix'):
|
|
scheme = scheme[:-5]
|
|
is_unix = True
|
|
if scheme == 'https':
|
|
raise ValueError("https+unix is not implemented")
|
|
|
|
if scheme not in ('http', 'https'):
|
|
raise ValueError("unrecognized scheme", scheme)
|
|
|
|
is_https = (scheme == 'https')
|
|
host = parsed_url.hostname
|
|
port = 443 if is_https else 80
|
|
if parsed_url.port:
|
|
port = parsed_url.port
|
|
|
|
if is_unix and unix_socket is None:
|
|
unix_socket = urllib.parse.unquote(parsed_url.netloc)
|
|
|
|
path = parsed_url.path
|
|
if parsed_url.query:
|
|
if enc_params:
|
|
path = f'{path}?{parsed_url.query}&{enc_params}'
|
|
else:
|
|
path = f'{path}?{parsed_url.query}'
|
|
else:
|
|
if enc_params:
|
|
path = f'{path}?{enc_params}'
|
|
else:
|
|
pass # just parsed_url.path in this case
|
|
|
|
if isinstance(source_address, str):
|
|
source_address = (source_address, 0)
|
|
|
|
if is_unix:
|
|
conn = UnixHTTPConnection(unix_socket, timeout=timeout)
|
|
elif is_https:
|
|
if ssl_context is None:
|
|
ssl_context = ssl.create_default_context()
|
|
if not verify:
|
|
ssl_context.check_hostname = False
|
|
ssl_context.verify_mode = ssl.CERT_NONE
|
|
conn = HTTPSConnection(host, port, source_address=source_address, timeout=timeout,
|
|
context=ssl_context)
|
|
else:
|
|
conn = HTTPConnection(host, port, source_address=source_address, timeout=timeout)
|
|
|
|
munged_url = urllib.parse.urlunparse((parsed_url.scheme, parsed_url.netloc,
|
|
path, parsed_url.params,
|
|
'', parsed_url.fragment))
|
|
return munged_url, conn, path
|