[cookies] Parse cookies leniently (#4780)

Closes #4776, #3778
Authored by: Grub4K
This commit is contained in:
Simon Sawicki
2022-09-16 19:02:00 +02:00
committed by GitHub
parent 5736d79172
commit 8817a80d3a
3 changed files with 244 additions and 1 deletions

View File

@@ -1,6 +1,7 @@
import base64
import contextlib
import http.cookiejar
import http.cookies
import json
import os
import re
@@ -990,3 +991,98 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
if profile is not None and _is_path(profile):
profile = os.path.expanduser(profile)
return browser_name, profile, keyring, container
class LenientSimpleCookie(http.cookies.SimpleCookie):
"""More lenient version of http.cookies.SimpleCookie"""
# From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
_LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
_RESERVED = {
"expires",
"path",
"comment",
"domain",
"max-age",
"secure",
"httponly",
"version",
"samesite",
}
_FLAGS = {"secure", "httponly"}
# Added 'bad' group to catch the remaining value
_COOKIE_PATTERN = re.compile(r"""
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
( # Start of potential value
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
) # End of group 'val'
| # or
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
) # End of potential value
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII | re.VERBOSE)
def load(self, data):
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
if not isinstance(data, str):
return super().load(data)
morsel = None
index = 0
length = len(data)
while 0 <= index < length:
match = self._COOKIE_PATTERN.search(data, index)
if not match:
break
index = match.end(0)
if match.group("bad"):
morsel = None
continue
key, value = match.group("key", "val")
if key[0] == "$":
if morsel is not None:
morsel[key[1:]] = True
continue
lower_key = key.lower()
if lower_key in self._RESERVED:
if morsel is None:
continue
if value is None:
if lower_key not in self._FLAGS:
morsel = None
continue
value = True
else:
value, _ = self.value_decode(value)
morsel[key] = value
elif value is not None:
morsel = self.get(key, http.cookies.Morsel())
real_value, coded_value = self.value_decode(value)
morsel.set(key, real_value, coded_value)
self[key] = morsel
else:
morsel = None