sim16/matteo_env/Lib/site-packages/aiohttp/multipart.py

964 lines
32 KiB
Python
Raw Normal View History

2020-12-20 00:08:09 +00:00
import base64
import binascii
import json
import re
import uuid
import warnings
import zlib
from collections import deque
from types import TracebackType
2022-09-18 13:17:20 +00:00
from typing import (
2020-12-20 00:08:09 +00:00
TYPE_CHECKING,
Any,
2022-09-18 13:17:20 +00:00
AsyncIterator,
Deque,
2020-12-20 00:08:09 +00:00
Dict,
Iterator,
List,
Mapping,
Optional,
Sequence,
Tuple,
Type,
Union,
2022-09-18 13:17:20 +00:00
cast,
2020-12-20 00:08:09 +00:00
)
from urllib.parse import parse_qsl, unquote, urlencode
2022-09-18 13:17:20 +00:00
from multidict import CIMultiDict, CIMultiDictProxy, MultiMapping
2020-12-20 00:08:09 +00:00
from .hdrs import (
CONTENT_DISPOSITION,
CONTENT_ENCODING,
CONTENT_LENGTH,
CONTENT_TRANSFER_ENCODING,
CONTENT_TYPE,
)
from .helpers import CHAR, TOKEN, parse_mimetype, reify
from .http import HeadersParser
from .payload import (
JsonPayload,
LookupError,
Order,
Payload,
StringPayload,
get_payload,
payload_type,
)
from .streams import StreamReader
2022-09-18 13:17:20 +00:00
__all__ = (
"MultipartReader",
"MultipartWriter",
"BodyPartReader",
"BadContentDispositionHeader",
"BadContentDispositionParam",
"parse_content_disposition",
"content_disposition_filename",
)
2020-12-20 00:08:09 +00:00
if TYPE_CHECKING: # pragma: no cover
2022-09-18 13:17:20 +00:00
from .client_reqrep import ClientResponse
2020-12-20 00:08:09 +00:00
class BadContentDispositionHeader(RuntimeWarning):
pass
class BadContentDispositionParam(RuntimeWarning):
pass
2022-09-18 13:17:20 +00:00
def parse_content_disposition(
header: Optional[str],
) -> Tuple[Optional[str], Dict[str, str]]:
2020-12-20 00:08:09 +00:00
def is_token(string: str) -> bool:
return bool(string) and TOKEN >= set(string)
def is_quoted(string: str) -> bool:
return string[0] == string[-1] == '"'
def is_rfc5987(string: str) -> bool:
return is_token(string) and string.count("'") == 2
def is_extended_param(string: str) -> bool:
2022-09-18 13:17:20 +00:00
return string.endswith("*")
2020-12-20 00:08:09 +00:00
def is_continuous_param(string: str) -> bool:
2022-09-18 13:17:20 +00:00
pos = string.find("*") + 1
2020-12-20 00:08:09 +00:00
if not pos:
return False
2022-09-18 13:17:20 +00:00
substring = string[pos:-1] if string.endswith("*") else string[pos:]
2020-12-20 00:08:09 +00:00
return substring.isdigit()
2022-09-18 13:17:20 +00:00
def unescape(text: str, *, chars: str = "".join(map(re.escape, CHAR))) -> str:
return re.sub(f"\\\\([{chars}])", "\\1", text)
2020-12-20 00:08:09 +00:00
if not header:
return None, {}
2022-09-18 13:17:20 +00:00
disptype, *parts = header.split(";")
2020-12-20 00:08:09 +00:00
if not is_token(disptype):
warnings.warn(BadContentDispositionHeader(header))
return None, {}
params = {} # type: Dict[str, str]
while parts:
item = parts.pop(0)
2022-09-18 13:17:20 +00:00
if "=" not in item:
2020-12-20 00:08:09 +00:00
warnings.warn(BadContentDispositionHeader(header))
return None, {}
2022-09-18 13:17:20 +00:00
key, value = item.split("=", 1)
2020-12-20 00:08:09 +00:00
key = key.lower().strip()
value = value.lstrip()
if key in params:
warnings.warn(BadContentDispositionHeader(header))
return None, {}
if not is_token(key):
warnings.warn(BadContentDispositionParam(item))
continue
elif is_continuous_param(key):
if is_quoted(value):
value = unescape(value[1:-1])
elif not is_token(value):
warnings.warn(BadContentDispositionParam(item))
continue
elif is_extended_param(key):
if is_rfc5987(value):
encoding, _, value = value.split("'", 2)
2022-09-18 13:17:20 +00:00
encoding = encoding or "utf-8"
2020-12-20 00:08:09 +00:00
else:
warnings.warn(BadContentDispositionParam(item))
continue
try:
2022-09-18 13:17:20 +00:00
value = unquote(value, encoding, "strict")
2020-12-20 00:08:09 +00:00
except UnicodeDecodeError: # pragma: nocover
warnings.warn(BadContentDispositionParam(item))
continue
else:
failed = True
if is_quoted(value):
failed = False
2022-09-18 13:17:20 +00:00
value = unescape(value[1:-1].lstrip("\\/"))
2020-12-20 00:08:09 +00:00
elif is_token(value):
failed = False
elif parts:
# maybe just ; in filename, in any case this is just
# one case fix, for proper fix we need to redesign parser
2022-09-18 13:17:20 +00:00
_value = f"{value};{parts[0]}"
2020-12-20 00:08:09 +00:00
if is_quoted(_value):
parts.pop(0)
2022-09-18 13:17:20 +00:00
value = unescape(_value[1:-1].lstrip("\\/"))
2020-12-20 00:08:09 +00:00
failed = False
if failed:
warnings.warn(BadContentDispositionHeader(header))
return None, {}
params[key] = value
return disptype.lower(), params
2022-09-18 13:17:20 +00:00
def content_disposition_filename(
params: Mapping[str, str], name: str = "filename"
) -> Optional[str]:
name_suf = "%s*" % name
2020-12-20 00:08:09 +00:00
if not params:
return None
elif name_suf in params:
return params[name_suf]
elif name in params:
return params[name]
else:
parts = []
2022-09-18 13:17:20 +00:00
fnparams = sorted(
(key, value) for key, value in params.items() if key.startswith(name_suf)
)
2020-12-20 00:08:09 +00:00
for num, (key, value) in enumerate(fnparams):
2022-09-18 13:17:20 +00:00
_, tail = key.split("*", 1)
if tail.endswith("*"):
2020-12-20 00:08:09 +00:00
tail = tail[:-1]
if tail == str(num):
parts.append(value)
else:
break
if not parts:
return None
2022-09-18 13:17:20 +00:00
value = "".join(parts)
2020-12-20 00:08:09 +00:00
if "'" in value:
encoding, _, value = value.split("'", 2)
2022-09-18 13:17:20 +00:00
encoding = encoding or "utf-8"
return unquote(value, encoding, "strict")
2020-12-20 00:08:09 +00:00
return value
class MultipartResponseWrapper:
"""Wrapper around the MultipartReader.
It takes care about
underlying connection and close it when it needs in.
"""
def __init__(
self,
2022-09-18 13:17:20 +00:00
resp: "ClientResponse",
stream: "MultipartReader",
2020-12-20 00:08:09 +00:00
) -> None:
self.resp = resp
self.stream = stream
2022-09-18 13:17:20 +00:00
def __aiter__(self) -> "MultipartResponseWrapper":
2020-12-20 00:08:09 +00:00
return self
async def __anext__(
self,
2022-09-18 13:17:20 +00:00
) -> Union["MultipartReader", "BodyPartReader"]:
2020-12-20 00:08:09 +00:00
part = await self.next()
if part is None:
2022-09-18 13:17:20 +00:00
raise StopAsyncIteration
2020-12-20 00:08:09 +00:00
return part
def at_eof(self) -> bool:
"""Returns True when all response data had been read."""
return self.resp.content.at_eof()
async def next(
self,
2022-09-18 13:17:20 +00:00
) -> Optional[Union["MultipartReader", "BodyPartReader"]]:
2020-12-20 00:08:09 +00:00
"""Emits next multipart reader object."""
item = await self.stream.next()
if self.stream.at_eof():
await self.release()
return item
async def release(self) -> None:
2022-09-18 13:17:20 +00:00
"""Release the connection gracefully.
All remaining content is read to the void.
"""
2020-12-20 00:08:09 +00:00
await self.resp.release()
class BodyPartReader:
"""Multipart reader for single body part."""
chunk_size = 8192
2022-09-18 13:17:20 +00:00
def __init__(
self, boundary: bytes, headers: "CIMultiDictProxy[str]", content: StreamReader
) -> None:
2020-12-20 00:08:09 +00:00
self.headers = headers
self._boundary = boundary
self._content = content
self._at_eof = False
length = self.headers.get(CONTENT_LENGTH, None)
self._length = int(length) if length is not None else None
self._read_bytes = 0
# TODO: typeing.Deque is not supported by Python 3.5
2022-09-18 13:17:20 +00:00
self._unread: Deque[bytes] = deque()
2020-12-20 00:08:09 +00:00
self._prev_chunk = None # type: Optional[bytes]
self._content_eof = 0
self._cache = {} # type: Dict[str, Any]
2022-09-18 13:17:20 +00:00
def __aiter__(self) -> AsyncIterator["BodyPartReader"]:
return self # type: ignore[return-value]
2020-12-20 00:08:09 +00:00
async def __anext__(self) -> bytes:
part = await self.next()
if part is None:
2022-09-18 13:17:20 +00:00
raise StopAsyncIteration
2020-12-20 00:08:09 +00:00
return part
async def next(self) -> Optional[bytes]:
item = await self.read()
if not item:
return None
return item
2022-09-18 13:17:20 +00:00
async def read(self, *, decode: bool = False) -> bytes:
2020-12-20 00:08:09 +00:00
"""Reads body part data.
decode: Decodes data following by encoding
method from Content-Encoding header. If it missed
data remains untouched
"""
if self._at_eof:
2022-09-18 13:17:20 +00:00
return b""
2020-12-20 00:08:09 +00:00
data = bytearray()
while not self._at_eof:
2022-09-18 13:17:20 +00:00
data.extend(await self.read_chunk(self.chunk_size))
2020-12-20 00:08:09 +00:00
if decode:
return self.decode(data)
return data
2022-09-18 13:17:20 +00:00
async def read_chunk(self, size: int = chunk_size) -> bytes:
2020-12-20 00:08:09 +00:00
"""Reads body part content chunk of the specified size.
size: chunk size
"""
if self._at_eof:
2022-09-18 13:17:20 +00:00
return b""
2020-12-20 00:08:09 +00:00
if self._length:
chunk = await self._read_chunk_from_length(size)
else:
chunk = await self._read_chunk_from_stream(size)
self._read_bytes += len(chunk)
if self._read_bytes == self._length:
self._at_eof = True
if self._at_eof:
clrf = await self._content.readline()
2022-09-18 13:17:20 +00:00
assert (
b"\r\n" == clrf
), "reader did not read all the data or it is malformed"
2020-12-20 00:08:09 +00:00
return chunk
async def _read_chunk_from_length(self, size: int) -> bytes:
# Reads body part content chunk of the specified size.
# The body part must has Content-Length header with proper value.
2022-09-18 13:17:20 +00:00
assert self._length is not None, "Content-Length required for chunked read"
2020-12-20 00:08:09 +00:00
chunk_size = min(size, self._length - self._read_bytes)
chunk = await self._content.read(chunk_size)
return chunk
async def _read_chunk_from_stream(self, size: int) -> bytes:
# Reads content chunk of body part with unknown length.
# The Content-Length header for body part is not necessary.
2022-09-18 13:17:20 +00:00
assert (
size >= len(self._boundary) + 2
), "Chunk size must be greater or equal than boundary length + 2"
2020-12-20 00:08:09 +00:00
first_chunk = self._prev_chunk is None
if first_chunk:
self._prev_chunk = await self._content.read(size)
chunk = await self._content.read(size)
self._content_eof += int(self._content.at_eof())
assert self._content_eof < 3, "Reading after EOF"
assert self._prev_chunk is not None
window = self._prev_chunk + chunk
2022-09-18 13:17:20 +00:00
sub = b"\r\n" + self._boundary
2020-12-20 00:08:09 +00:00
if first_chunk:
idx = window.find(sub)
else:
idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub)))
if idx >= 0:
# pushing boundary back to content
with warnings.catch_warnings():
2022-09-18 13:17:20 +00:00
warnings.filterwarnings("ignore", category=DeprecationWarning)
2020-12-20 00:08:09 +00:00
self._content.unread_data(window[idx:])
if size > idx:
self._prev_chunk = self._prev_chunk[:idx]
2022-09-18 13:17:20 +00:00
chunk = window[len(self._prev_chunk) : idx]
2020-12-20 00:08:09 +00:00
if not chunk:
self._at_eof = True
result = self._prev_chunk
self._prev_chunk = chunk
return result
async def readline(self) -> bytes:
"""Reads body part by line by line."""
if self._at_eof:
2022-09-18 13:17:20 +00:00
return b""
2020-12-20 00:08:09 +00:00
if self._unread:
line = self._unread.popleft()
else:
line = await self._content.readline()
if line.startswith(self._boundary):
# the very last boundary may not come with \r\n,
# so set single rules for everyone
2022-09-18 13:17:20 +00:00
sline = line.rstrip(b"\r\n")
2020-12-20 00:08:09 +00:00
boundary = self._boundary
2022-09-18 13:17:20 +00:00
last_boundary = self._boundary + b"--"
2020-12-20 00:08:09 +00:00
# ensure that we read exactly the boundary, not something alike
if sline == boundary or sline == last_boundary:
self._at_eof = True
self._unread.append(line)
2022-09-18 13:17:20 +00:00
return b""
2020-12-20 00:08:09 +00:00
else:
next_line = await self._content.readline()
if next_line.startswith(self._boundary):
line = line[:-2] # strip CRLF but only once
self._unread.append(next_line)
return line
async def release(self) -> None:
"""Like read(), but reads all the data to the void."""
if self._at_eof:
return
while not self._at_eof:
await self.read_chunk(self.chunk_size)
2022-09-18 13:17:20 +00:00
async def text(self, *, encoding: Optional[str] = None) -> str:
2020-12-20 00:08:09 +00:00
"""Like read(), but assumes that body part contains text data."""
data = await self.read(decode=True)
# see https://www.w3.org/TR/html5/forms.html#multipart/form-data-encoding-algorithm # NOQA
# and https://dvcs.w3.org/hg/xhr/raw-file/tip/Overview.html#dom-xmlhttprequest-send # NOQA
2022-09-18 13:17:20 +00:00
encoding = encoding or self.get_charset(default="utf-8")
2020-12-20 00:08:09 +00:00
return data.decode(encoding)
2022-09-18 13:17:20 +00:00
async def json(self, *, encoding: Optional[str] = None) -> Optional[Dict[str, Any]]:
2020-12-20 00:08:09 +00:00
"""Like read(), but assumes that body parts contains JSON data."""
data = await self.read(decode=True)
if not data:
return None
2022-09-18 13:17:20 +00:00
encoding = encoding or self.get_charset(default="utf-8")
return cast(Dict[str, Any], json.loads(data.decode(encoding)))
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
async def form(self, *, encoding: Optional[str] = None) -> List[Tuple[str, str]]:
"""Like read(), but assumes that body parts contain form urlencoded data."""
2020-12-20 00:08:09 +00:00
data = await self.read(decode=True)
if not data:
return []
if encoding is not None:
real_encoding = encoding
else:
2022-09-18 13:17:20 +00:00
real_encoding = self.get_charset(default="utf-8")
return parse_qsl(
data.rstrip().decode(real_encoding),
keep_blank_values=True,
encoding=real_encoding,
)
2020-12-20 00:08:09 +00:00
def at_eof(self) -> bool:
"""Returns True if the boundary was reached or False otherwise."""
return self._at_eof
def decode(self, data: bytes) -> bytes:
2022-09-18 13:17:20 +00:00
"""Decodes data.
Decoding is done according the specified Content-Encoding
2020-12-20 00:08:09 +00:00
or Content-Transfer-Encoding headers value.
"""
if CONTENT_TRANSFER_ENCODING in self.headers:
data = self._decode_content_transfer(data)
if CONTENT_ENCODING in self.headers:
return self._decode_content(data)
return data
def _decode_content(self, data: bytes) -> bytes:
2022-09-18 13:17:20 +00:00
encoding = self.headers.get(CONTENT_ENCODING, "").lower()
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
if encoding == "deflate":
2020-12-20 00:08:09 +00:00
return zlib.decompress(data, -zlib.MAX_WBITS)
2022-09-18 13:17:20 +00:00
elif encoding == "gzip":
2020-12-20 00:08:09 +00:00
return zlib.decompress(data, 16 + zlib.MAX_WBITS)
2022-09-18 13:17:20 +00:00
elif encoding == "identity":
2020-12-20 00:08:09 +00:00
return data
else:
2022-09-18 13:17:20 +00:00
raise RuntimeError(f"unknown content encoding: {encoding}")
2020-12-20 00:08:09 +00:00
def _decode_content_transfer(self, data: bytes) -> bytes:
2022-09-18 13:17:20 +00:00
encoding = self.headers.get(CONTENT_TRANSFER_ENCODING, "").lower()
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
if encoding == "base64":
2020-12-20 00:08:09 +00:00
return base64.b64decode(data)
2022-09-18 13:17:20 +00:00
elif encoding == "quoted-printable":
2020-12-20 00:08:09 +00:00
return binascii.a2b_qp(data)
2022-09-18 13:17:20 +00:00
elif encoding in ("binary", "8bit", "7bit"):
2020-12-20 00:08:09 +00:00
return data
else:
2022-09-18 13:17:20 +00:00
raise RuntimeError(
"unknown content transfer encoding: {}" "".format(encoding)
)
2020-12-20 00:08:09 +00:00
def get_charset(self, default: str) -> str:
"""Returns charset parameter from Content-Type header or default."""
2022-09-18 13:17:20 +00:00
ctype = self.headers.get(CONTENT_TYPE, "")
2020-12-20 00:08:09 +00:00
mimetype = parse_mimetype(ctype)
2022-09-18 13:17:20 +00:00
return mimetype.parameters.get("charset", default)
2020-12-20 00:08:09 +00:00
@reify
def name(self) -> Optional[str]:
2022-09-18 13:17:20 +00:00
"""Returns name specified in Content-Disposition header.
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
If the header is missing or malformed, returns None.
"""
_, params = parse_content_disposition(self.headers.get(CONTENT_DISPOSITION))
return content_disposition_filename(params, "name")
2020-12-20 00:08:09 +00:00
@reify
def filename(self) -> Optional[str]:
2022-09-18 13:17:20 +00:00
"""Returns filename specified in Content-Disposition header.
Returns None if the header is missing or malformed.
2020-12-20 00:08:09 +00:00
"""
2022-09-18 13:17:20 +00:00
_, params = parse_content_disposition(self.headers.get(CONTENT_DISPOSITION))
return content_disposition_filename(params, "filename")
2020-12-20 00:08:09 +00:00
@payload_type(BodyPartReader, order=Order.try_first)
class BodyPartReaderPayload(Payload):
2022-09-18 13:17:20 +00:00
def __init__(self, value: BodyPartReader, *args: Any, **kwargs: Any) -> None:
2020-12-20 00:08:09 +00:00
super().__init__(value, *args, **kwargs)
params = {} # type: Dict[str, str]
if value.name is not None:
2022-09-18 13:17:20 +00:00
params["name"] = value.name
2020-12-20 00:08:09 +00:00
if value.filename is not None:
2022-09-18 13:17:20 +00:00
params["filename"] = value.filename
2020-12-20 00:08:09 +00:00
if params:
2022-09-18 13:17:20 +00:00
self.set_content_disposition("attachment", True, **params)
2020-12-20 00:08:09 +00:00
async def write(self, writer: Any) -> None:
field = self._value
2022-09-18 13:17:20 +00:00
chunk = await field.read_chunk(size=2 ** 16)
2020-12-20 00:08:09 +00:00
while chunk:
await writer.write(field.decode(chunk))
2022-09-18 13:17:20 +00:00
chunk = await field.read_chunk(size=2 ** 16)
2020-12-20 00:08:09 +00:00
class MultipartReader:
"""Multipart body reader."""
#: Response wrapper, used when multipart readers constructs from response.
response_wrapper_cls = MultipartResponseWrapper
#: Multipart reader class, used to handle multipart/* body parts.
#: None points to type(self)
multipart_reader_cls = None
#: Body part reader class for non multipart/* content types.
part_reader_cls = BodyPartReader
2022-09-18 13:17:20 +00:00
def __init__(self, headers: Mapping[str, str], content: StreamReader) -> None:
2020-12-20 00:08:09 +00:00
self.headers = headers
2022-09-18 13:17:20 +00:00
self._boundary = ("--" + self._get_boundary()).encode()
2020-12-20 00:08:09 +00:00
self._content = content
2022-09-18 13:17:20 +00:00
self._last_part = (
None
) # type: Optional[Union['MultipartReader', BodyPartReader]]
2020-12-20 00:08:09 +00:00
self._at_eof = False
self._at_bof = True
self._unread = [] # type: List[bytes]
2022-09-18 13:17:20 +00:00
def __aiter__(
self,
) -> AsyncIterator["BodyPartReader"]:
return self # type: ignore[return-value]
2020-12-20 00:08:09 +00:00
async def __anext__(
self,
2022-09-18 13:17:20 +00:00
) -> Optional[Union["MultipartReader", BodyPartReader]]:
2020-12-20 00:08:09 +00:00
part = await self.next()
if part is None:
2022-09-18 13:17:20 +00:00
raise StopAsyncIteration
2020-12-20 00:08:09 +00:00
return part
@classmethod
def from_response(
cls,
2022-09-18 13:17:20 +00:00
response: "ClientResponse",
2020-12-20 00:08:09 +00:00
) -> MultipartResponseWrapper:
"""Constructs reader instance from HTTP response.
:param response: :class:`~aiohttp.client.ClientResponse` instance
"""
2022-09-18 13:17:20 +00:00
obj = cls.response_wrapper_cls(
response, cls(response.headers, response.content)
)
2020-12-20 00:08:09 +00:00
return obj
def at_eof(self) -> bool:
2022-09-18 13:17:20 +00:00
"""Returns True if the final boundary was reached, false otherwise."""
2020-12-20 00:08:09 +00:00
return self._at_eof
async def next(
self,
2022-09-18 13:17:20 +00:00
) -> Optional[Union["MultipartReader", BodyPartReader]]:
2020-12-20 00:08:09 +00:00
"""Emits the next multipart body part."""
# So, if we're at BOF, we need to skip till the boundary.
if self._at_eof:
return None
await self._maybe_release_last_part()
if self._at_bof:
await self._read_until_first_boundary()
self._at_bof = False
else:
await self._read_boundary()
if self._at_eof: # we just read the last boundary, nothing to do there
return None
self._last_part = await self.fetch_next_part()
return self._last_part
async def release(self) -> None:
"""Reads all the body parts to the void till the final boundary."""
while not self._at_eof:
item = await self.next()
if item is None:
break
await item.release()
async def fetch_next_part(
self,
2022-09-18 13:17:20 +00:00
) -> Union["MultipartReader", BodyPartReader]:
2020-12-20 00:08:09 +00:00
"""Returns the next body part reader."""
headers = await self._read_headers()
return self._get_part_reader(headers)
def _get_part_reader(
self,
2022-09-18 13:17:20 +00:00
headers: "CIMultiDictProxy[str]",
) -> Union["MultipartReader", BodyPartReader]:
"""Dispatches the response by the `Content-Type` header.
Returns a suitable reader instance.
2020-12-20 00:08:09 +00:00
:param dict headers: Response headers
"""
2022-09-18 13:17:20 +00:00
ctype = headers.get(CONTENT_TYPE, "")
2020-12-20 00:08:09 +00:00
mimetype = parse_mimetype(ctype)
2022-09-18 13:17:20 +00:00
if mimetype.type == "multipart":
2020-12-20 00:08:09 +00:00
if self.multipart_reader_cls is None:
return type(self)(headers, self._content)
return self.multipart_reader_cls(headers, self._content)
else:
return self.part_reader_cls(self._boundary, headers, self._content)
def _get_boundary(self) -> str:
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
2022-09-18 13:17:20 +00:00
assert mimetype.type == "multipart", "multipart/* content type expected"
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
if "boundary" not in mimetype.parameters:
raise ValueError(
"boundary missed for Content-Type: %s" % self.headers[CONTENT_TYPE]
)
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
boundary = mimetype.parameters["boundary"]
2020-12-20 00:08:09 +00:00
if len(boundary) > 70:
2022-09-18 13:17:20 +00:00
raise ValueError("boundary %r is too long (70 chars max)" % boundary)
2020-12-20 00:08:09 +00:00
return boundary
async def _readline(self) -> bytes:
if self._unread:
return self._unread.pop()
return await self._content.readline()
async def _read_until_first_boundary(self) -> None:
while True:
chunk = await self._readline()
2022-09-18 13:17:20 +00:00
if chunk == b"":
raise ValueError(
"Could not find starting boundary %r" % (self._boundary)
)
2020-12-20 00:08:09 +00:00
chunk = chunk.rstrip()
if chunk == self._boundary:
return
2022-09-18 13:17:20 +00:00
elif chunk == self._boundary + b"--":
2020-12-20 00:08:09 +00:00
self._at_eof = True
return
async def _read_boundary(self) -> None:
chunk = (await self._readline()).rstrip()
if chunk == self._boundary:
pass
2022-09-18 13:17:20 +00:00
elif chunk == self._boundary + b"--":
2020-12-20 00:08:09 +00:00
self._at_eof = True
epilogue = await self._readline()
next_line = await self._readline()
# the epilogue is expected and then either the end of input or the
# parent multipart boundary, if the parent boundary is found then
# it should be marked as unread and handed to the parent for
# processing
2022-09-18 13:17:20 +00:00
if next_line[:2] == b"--":
2020-12-20 00:08:09 +00:00
self._unread.append(next_line)
# otherwise the request is likely missing an epilogue and both
# lines should be passed to the parent for processing
# (this handles the old behavior gracefully)
else:
self._unread.extend([next_line, epilogue])
else:
2022-09-18 13:17:20 +00:00
raise ValueError(f"Invalid boundary {chunk!r}, expected {self._boundary!r}")
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
async def _read_headers(self) -> "CIMultiDictProxy[str]":
lines = [b""]
2020-12-20 00:08:09 +00:00
while True:
chunk = await self._content.readline()
chunk = chunk.strip()
lines.append(chunk)
if not chunk:
break
parser = HeadersParser()
headers, raw_headers = parser.parse_headers(lines)
return headers
async def _maybe_release_last_part(self) -> None:
"""Ensures that the last read body part is read completely."""
if self._last_part is not None:
if not self._last_part.at_eof():
await self._last_part.release()
self._unread.extend(self._last_part._unread)
self._last_part = None
_Part = Tuple[Payload, str, str]
class MultipartWriter(Payload):
"""Multipart body writer."""
2022-09-18 13:17:20 +00:00
def __init__(self, subtype: str = "mixed", boundary: Optional[str] = None) -> None:
2020-12-20 00:08:09 +00:00
boundary = boundary if boundary is not None else uuid.uuid4().hex
# The underlying Payload API demands a str (utf-8), not bytes,
# so we need to ensure we don't lose anything during conversion.
# As a result, require the boundary to be ASCII only.
# In both situations.
try:
2022-09-18 13:17:20 +00:00
self._boundary = boundary.encode("ascii")
2020-12-20 00:08:09 +00:00
except UnicodeEncodeError:
2022-09-18 13:17:20 +00:00
raise ValueError("boundary should contain ASCII only chars") from None
ctype = f"multipart/{subtype}; boundary={self._boundary_value}"
2020-12-20 00:08:09 +00:00
super().__init__(None, content_type=ctype)
2022-09-18 13:17:20 +00:00
self._parts = [] # type: List[_Part]
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
def __enter__(self) -> "MultipartWriter":
2020-12-20 00:08:09 +00:00
return self
2022-09-18 13:17:20 +00:00
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
2020-12-20 00:08:09 +00:00
pass
def __iter__(self) -> Iterator[_Part]:
return iter(self._parts)
def __len__(self) -> int:
return len(self._parts)
def __bool__(self) -> bool:
return True
_valid_tchar_regex = re.compile(br"\A[!#$%&'*+\-.^_`|~\w]+\Z")
_invalid_qdtext_char_regex = re.compile(br"[\x00-\x08\x0A-\x1F\x7F]")
@property
def _boundary_value(self) -> str:
"""Wrap boundary parameter value in quotes, if necessary.
Reads self.boundary and returns a unicode sting.
"""
# Refer to RFCs 7231, 7230, 5234.
#
# parameter = token "=" ( token / quoted-string )
# token = 1*tchar
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
# qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text
# obs-text = %x80-FF
# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
# / DIGIT / ALPHA
# ; any VCHAR, except delimiters
# VCHAR = %x21-7E
value = self._boundary
if re.match(self._valid_tchar_regex, value):
2022-09-18 13:17:20 +00:00
return value.decode("ascii") # cannot fail
2020-12-20 00:08:09 +00:00
if re.search(self._invalid_qdtext_char_regex, value):
raise ValueError("boundary value contains invalid characters")
# escape %x5C and %x22
2022-09-18 13:17:20 +00:00
quoted_value_content = value.replace(b"\\", b"\\\\")
2020-12-20 00:08:09 +00:00
quoted_value_content = quoted_value_content.replace(b'"', b'\\"')
2022-09-18 13:17:20 +00:00
return '"' + quoted_value_content.decode("ascii") + '"'
2020-12-20 00:08:09 +00:00
@property
def boundary(self) -> str:
2022-09-18 13:17:20 +00:00
return self._boundary.decode("ascii")
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
def append(self, obj: Any, headers: Optional[MultiMapping[str]] = None) -> Payload:
2020-12-20 00:08:09 +00:00
if headers is None:
headers = CIMultiDict()
if isinstance(obj, Payload):
obj.headers.update(headers)
return self.append_payload(obj)
else:
try:
payload = get_payload(obj, headers=headers)
except LookupError:
2022-09-18 13:17:20 +00:00
raise TypeError("Cannot create payload from %r" % obj)
2020-12-20 00:08:09 +00:00
else:
return self.append_payload(payload)
def append_payload(self, payload: Payload) -> Payload:
"""Adds a new body part to multipart writer."""
# compression
encoding = payload.headers.get(
CONTENT_ENCODING,
2022-09-18 13:17:20 +00:00
"",
2020-12-20 00:08:09 +00:00
).lower() # type: Optional[str]
2022-09-18 13:17:20 +00:00
if encoding and encoding not in ("deflate", "gzip", "identity"):
raise RuntimeError(f"unknown content encoding: {encoding}")
if encoding == "identity":
2020-12-20 00:08:09 +00:00
encoding = None
# te encoding
te_encoding = payload.headers.get(
CONTENT_TRANSFER_ENCODING,
2022-09-18 13:17:20 +00:00
"",
2020-12-20 00:08:09 +00:00
).lower() # type: Optional[str]
2022-09-18 13:17:20 +00:00
if te_encoding not in ("", "base64", "quoted-printable", "binary"):
raise RuntimeError(
"unknown content transfer encoding: {}" "".format(te_encoding)
)
if te_encoding == "binary":
2020-12-20 00:08:09 +00:00
te_encoding = None
# size
size = payload.size
if size is not None and not (encoding or te_encoding):
payload.headers[CONTENT_LENGTH] = str(size)
2022-09-18 13:17:20 +00:00
self._parts.append((payload, encoding, te_encoding)) # type: ignore[arg-type]
2020-12-20 00:08:09 +00:00
return payload
def append_json(
2022-09-18 13:17:20 +00:00
self, obj: Any, headers: Optional[MultiMapping[str]] = None
2020-12-20 00:08:09 +00:00
) -> Payload:
"""Helper to append JSON part."""
if headers is None:
headers = CIMultiDict()
return self.append_payload(JsonPayload(obj, headers=headers))
def append_form(
2022-09-18 13:17:20 +00:00
self,
obj: Union[Sequence[Tuple[str, str]], Mapping[str, str]],
headers: Optional[MultiMapping[str]] = None,
2020-12-20 00:08:09 +00:00
) -> Payload:
"""Helper to append form urlencoded part."""
assert isinstance(obj, (Sequence, Mapping))
if headers is None:
headers = CIMultiDict()
if isinstance(obj, Mapping):
obj = list(obj.items())
data = urlencode(obj, doseq=True)
return self.append_payload(
2022-09-18 13:17:20 +00:00
StringPayload(
data, headers=headers, content_type="application/x-www-form-urlencoded"
)
)
2020-12-20 00:08:09 +00:00
@property
def size(self) -> Optional[int]:
"""Size of the payload."""
total = 0
for part, encoding, te_encoding in self._parts:
if encoding or te_encoding or part.size is None:
return None
total += int(
2022-09-18 13:17:20 +00:00
2
+ len(self._boundary)
+ 2
+ part.size # b'--'+self._boundary+b'\r\n'
+ len(part._binary_headers)
+ 2 # b'\r\n'
2020-12-20 00:08:09 +00:00
)
total += 2 + len(self._boundary) + 4 # b'--'+self._boundary+b'--\r\n'
return total
2022-09-18 13:17:20 +00:00
async def write(self, writer: Any, close_boundary: bool = True) -> None:
2020-12-20 00:08:09 +00:00
"""Write body."""
for part, encoding, te_encoding in self._parts:
2022-09-18 13:17:20 +00:00
await writer.write(b"--" + self._boundary + b"\r\n")
2020-12-20 00:08:09 +00:00
await writer.write(part._binary_headers)
if encoding or te_encoding:
w = MultipartPayloadWriter(writer)
if encoding:
w.enable_compression(encoding)
if te_encoding:
w.enable_encoding(te_encoding)
2022-09-18 13:17:20 +00:00
await part.write(w) # type: ignore[arg-type]
2020-12-20 00:08:09 +00:00
await w.write_eof()
else:
await part.write(writer)
2022-09-18 13:17:20 +00:00
await writer.write(b"\r\n")
2020-12-20 00:08:09 +00:00
if close_boundary:
2022-09-18 13:17:20 +00:00
await writer.write(b"--" + self._boundary + b"--\r\n")
2020-12-20 00:08:09 +00:00
class MultipartPayloadWriter:
def __init__(self, writer: Any) -> None:
self._writer = writer
self._encoding = None # type: Optional[str]
self._compress = None # type: Any
self._encoding_buffer = None # type: Optional[bytearray]
def enable_encoding(self, encoding: str) -> None:
2022-09-18 13:17:20 +00:00
if encoding == "base64":
2020-12-20 00:08:09 +00:00
self._encoding = encoding
self._encoding_buffer = bytearray()
2022-09-18 13:17:20 +00:00
elif encoding == "quoted-printable":
self._encoding = "quoted-printable"
2020-12-20 00:08:09 +00:00
2022-09-18 13:17:20 +00:00
def enable_compression(
self, encoding: str = "deflate", strategy: int = zlib.Z_DEFAULT_STRATEGY
) -> None:
zlib_mode = 16 + zlib.MAX_WBITS if encoding == "gzip" else -zlib.MAX_WBITS
self._compress = zlib.compressobj(wbits=zlib_mode, strategy=strategy)
2020-12-20 00:08:09 +00:00
async def write_eof(self) -> None:
if self._compress is not None:
chunk = self._compress.flush()
if chunk:
self._compress = None
await self.write(chunk)
2022-09-18 13:17:20 +00:00
if self._encoding == "base64":
2020-12-20 00:08:09 +00:00
if self._encoding_buffer:
2022-09-18 13:17:20 +00:00
await self._writer.write(base64.b64encode(self._encoding_buffer))
2020-12-20 00:08:09 +00:00
async def write(self, chunk: bytes) -> None:
if self._compress is not None:
if chunk:
chunk = self._compress.compress(chunk)
if not chunk:
return
2022-09-18 13:17:20 +00:00
if self._encoding == "base64":
2020-12-20 00:08:09 +00:00
buf = self._encoding_buffer
assert buf is not None
buf.extend(chunk)
if buf:
div, mod = divmod(len(buf), 3)
2022-09-18 13:17:20 +00:00
enc_chunk, self._encoding_buffer = (buf[: div * 3], buf[div * 3 :])
2020-12-20 00:08:09 +00:00
if enc_chunk:
b64chunk = base64.b64encode(enc_chunk)
await self._writer.write(b64chunk)
2022-09-18 13:17:20 +00:00
elif self._encoding == "quoted-printable":
2020-12-20 00:08:09 +00:00
await self._writer.write(binascii.b2a_qp(chunk))
else:
await self._writer.write(chunk)