[core] Change how Cookie headers are handled

Cookies are now saved and loaded under `cookies` key in the info dict
instead of `http_headers.Cookie`. Cookies passed in headers are
auto-scoped to the input URLs with a warning.

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

Authored by: Grub4K
This commit is contained in:
Simon Sawicki
2023-07-06 21:51:04 +05:30
committed by pukkandan
parent f8b4bcc0a7
commit 3121512228
3 changed files with 139 additions and 4 deletions

View File

@@ -1,9 +1,11 @@
import collections
import contextlib
import copy
import datetime
import errno
import fileinput
import functools
import http.cookiejar
import io
import itertools
import json
@@ -25,7 +27,7 @@ import unicodedata
from .cache import Cache
from .compat import urllib # isort: split
from .compat import compat_os_name, compat_shlex_quote
from .cookies import load_cookies
from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
from .extractor import gen_extractor_classes, get_info_extractor
@@ -673,6 +675,9 @@ class YoutubeDL:
if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header()
self.__header_cookies = []
self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
@@ -1625,8 +1630,60 @@ class YoutubeDL:
self.to_screen('')
raise
def _load_cookies(self, data, *, from_headers=True):
"""Loads cookies from a `Cookie` header
This tries to work around the security vulnerability of passing cookies to every domain.
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
The unscoped cookies are saved for later to be stored in the jar with a limited scope.
@param data The Cookie header as string to load the cookies from
@param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
"""
for cookie in LenientSimpleCookie(data).values():
if from_headers and any(cookie.values()):
raise ValueError('Invalid syntax in Cookie Header')
domain = cookie.get('domain') or ''
expiry = cookie.get('expires')
if expiry == '': # 0 is valid
expiry = None
prepared_cookie = http.cookiejar.Cookie(
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
cookie.get('secure') or False, expiry, False, None, None, {})
if domain:
self.cookiejar.set_cookie(prepared_cookie)
elif from_headers:
self.deprecated_feature(
'Passing cookies as a header is a potential security risk; '
'they will be scoped to the domain of the downloaded urls. '
'Please consider loading cookies from a file or browser instead.')
self.__header_cookies.append(prepared_cookie)
else:
self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
tb=False, is_error=False)
def _apply_header_cookies(self, url):
"""Applies stray header cookies to the provided url
This loads header cookies and scopes them to the domain provided in `url`.
While this is not ideal, it helps reduce the risk of them being sent
to an unintended destination while mostly maintaining compatibility.
"""
parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
return
for cookie in map(copy.copy, self.__header_cookies):
cookie.domain = f'.{parsed.hostname}'
self.cookiejar.set_cookie(cookie)
@_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
self._apply_header_cookies(url)
try:
ie_result = ie.extract(url)
except UserNotLive as e:
@@ -2414,9 +2471,24 @@ class YoutubeDL:
if 'Youtubedl-No-Compression' in res: # deprecated
res.pop('Youtubedl-No-Compression', None)
res['Accept-Encoding'] = 'identity'
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
if cookies:
res['Cookie'] = cookies
encoder = LenientSimpleCookie()
values = []
for cookie in cookies:
_, value = encoder.value_encode(cookie.value)
values.append(f'{cookie.name}={value}')
if cookie.domain:
values.append(f'Domain={cookie.domain}')
if cookie.path:
values.append(f'Path={cookie.path}')
if cookie.secure:
values.append('Secure')
if cookie.expires:
values.append(f'Expires={cookie.expires}')
if cookie.version:
values.append(f'Version={cookie.version}')
info_dict['cookies'] = '; '.join(values)
if 'X-Forwarded-For' not in res:
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
@@ -3423,6 +3495,8 @@ class YoutubeDL:
infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
for info in variadic(json.loads('\n'.join(f)))]
for info in infos:
self._load_cookies(info.get('cookies'), from_headers=False)
self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat
try:
self.__download_wrapper(self.process_ie_result)(info, download=True)
except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: