mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-10-08 00:54:52 +00:00
[networking] Rewrite architecture (#2861)
New networking interface consists of a `RequestDirector` that directs each `Request` to appropriate `RequestHandler` and returns the `Response` or raises `RequestError`. The handlers define adapters to transform its internal Request/Response/Errors to our interfaces. User-facing changes: - Fix issues with per request proxies on redirects for urllib - Support for `ALL_PROXY` environment variable for proxy setting - Support for `socks5h` proxy - Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093 - Raise error when using `https` proxy instead of silently converting it to `http` Authored by: coletdjnz
This commit is contained in:
@@ -4,7 +4,6 @@ import copy
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
import functools
|
||||
import http.cookiejar
|
||||
import io
|
||||
import itertools
|
||||
@@ -25,8 +24,8 @@ import traceback
|
||||
import unicodedata
|
||||
|
||||
from .cache import Cache
|
||||
from .compat import urllib # isort: split
|
||||
from .compat import compat_os_name, compat_shlex_quote
|
||||
from .compat import functools, urllib # isort: split
|
||||
from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
|
||||
from .cookies import LenientSimpleCookie, load_cookies
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
@@ -34,6 +33,15 @@ from .extractor import gen_extractor_classes, get_info_extractor
|
||||
from .extractor.common import UnsupportedURLIE
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .minicurses import format_text
|
||||
from .networking import Request, RequestDirector
|
||||
from .networking.common import _REQUEST_HANDLERS
|
||||
from .networking.exceptions import (
|
||||
HTTPError,
|
||||
NoSupportingHandlers,
|
||||
RequestError,
|
||||
SSLError,
|
||||
_CompatHTTPError,
|
||||
)
|
||||
from .plugins import directories as plugin_directories
|
||||
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
|
||||
from .postprocessor import (
|
||||
@@ -78,7 +86,6 @@ from .utils import (
|
||||
MaxDownloadsReached,
|
||||
Namespace,
|
||||
PagedList,
|
||||
PerRequestProxyHandler,
|
||||
PlaylistEntries,
|
||||
Popen,
|
||||
PostProcessingError,
|
||||
@@ -87,9 +94,6 @@ from .utils import (
|
||||
SameFileError,
|
||||
UnavailableVideoError,
|
||||
UserNotLive,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
YoutubeDLRedirectHandler,
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
bug_reports_message,
|
||||
@@ -102,6 +106,7 @@ from .utils import (
|
||||
error_to_compat_str,
|
||||
escapeHTML,
|
||||
expand_path,
|
||||
extract_basic_auth,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_bytes,
|
||||
@@ -117,8 +122,6 @@ from .utils import (
|
||||
locked_file,
|
||||
make_archive_id,
|
||||
make_dir,
|
||||
make_HTTPS_handler,
|
||||
merge_headers,
|
||||
network_exceptions,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
@@ -132,7 +135,6 @@ from .utils import (
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
strftime_or_none,
|
||||
@@ -151,7 +153,12 @@ from .utils import (
|
||||
write_json_file,
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import clean_headers
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import (
|
||||
HTTPHeaderDict,
|
||||
clean_headers,
|
||||
clean_proxies,
|
||||
)
|
||||
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
@@ -673,7 +680,9 @@ class YoutubeDL:
|
||||
raise
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
|
||||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self._request_director = self.build_request_director(
|
||||
sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
|
||||
@@ -763,8 +772,6 @@ class YoutubeDL:
|
||||
get_postprocessor(pp_def.pop('key'))(self, **pp_def),
|
||||
when=when)
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
def preload_download_archive(fn):
|
||||
"""Preload the archive, if any is specified"""
|
||||
archive = set()
|
||||
@@ -946,7 +953,11 @@ class YoutubeDL:
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore_console_title()
|
||||
self.close()
|
||||
|
||||
def close(self):
|
||||
self.save_cookies()
|
||||
self._request_director.close()
|
||||
|
||||
def trouble(self, message=None, tb=None, is_error=True):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@@ -2468,7 +2479,7 @@ class YoutubeDL:
|
||||
return _build_selector_function(parsed_selector)
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
||||
res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
|
||||
clean_headers(res)
|
||||
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
|
||||
if cookies:
|
||||
@@ -3943,13 +3954,8 @@ class YoutubeDL:
|
||||
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
|
||||
})) or 'none'))
|
||||
|
||||
self._setup_opener()
|
||||
proxy_map = {}
|
||||
for handler in self._opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
proxy_map.update(handler.proxies)
|
||||
write_debug(f'Proxy map: {proxy_map}')
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
@@ -3977,53 +3983,21 @@ class YoutubeDL:
|
||||
'See https://yt-dl.org/update if you need help updating.' %
|
||||
latest_version)
|
||||
|
||||
def _setup_opener(self):
|
||||
if hasattr(self, '_opener'):
|
||||
return
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
|
||||
@functools.cached_property
|
||||
def proxies(self):
|
||||
"""Global proxy configuration"""
|
||||
opts_proxy = self.params.get('proxy')
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
if opts_proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||
opts_proxy = '__noproxy__'
|
||||
proxies = {'all': opts_proxy}
|
||||
else:
|
||||
proxies = urllib.request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
|
||||
# compat. Set HTTPS_PROXY to __noproxy__ to revert
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = PerRequestProxyHandler(proxies)
|
||||
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
redirect_handler = YoutubeDLRedirectHandler()
|
||||
data_handler = urllib.request.DataHandler()
|
||||
|
||||
# When passing our own FileHandler instance, build_opener won't add the
|
||||
# default FileHandler and allows us to disable the file protocol, which
|
||||
# can be used for malicious purposes (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8227)
|
||||
file_handler = urllib.request.FileHandler()
|
||||
|
||||
if not self.params.get('enable_file_urls'):
|
||||
def file_open(*args, **kwargs):
|
||||
raise urllib.error.URLError(
|
||||
'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.')
|
||||
file_handler.file_open = file_open
|
||||
|
||||
opener = urllib.request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
self._opener = opener
|
||||
return proxies
|
||||
|
||||
@functools.cached_property
|
||||
def cookiejar(self):
|
||||
@@ -4031,11 +4005,84 @@ class YoutubeDL:
|
||||
return load_cookies(
|
||||
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
||||
|
||||
@property
|
||||
def _opener(self):
|
||||
"""
|
||||
Get a urllib OpenerDirector from the Urllib handler (deprecated).
|
||||
"""
|
||||
self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
|
||||
handler = self._request_director.handlers['Urllib']
|
||||
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, str):
|
||||
req = sanitized_Request(req)
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
req = Request(req)
|
||||
elif isinstance(req, urllib.request.Request):
|
||||
req = urllib_req_to_req(req)
|
||||
assert isinstance(req, Request)
|
||||
|
||||
# compat: Assume user:pass url params are basic auth
|
||||
url, basic_auth_header = extract_basic_auth(req.url)
|
||||
if basic_auth_header:
|
||||
req.headers['Authorization'] = basic_auth_header
|
||||
req.url = sanitize_url(url)
|
||||
|
||||
clean_proxies(proxies=req.proxies, headers=req.headers)
|
||||
clean_headers(req.headers)
|
||||
|
||||
try:
|
||||
return self._request_director.send(req)
|
||||
except NoSupportingHandlers as e:
|
||||
for ue in e.unsupported_errors:
|
||||
if not (ue.handler and ue.msg):
|
||||
continue
|
||||
if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
raise
|
||||
except SSLError as e:
|
||||
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||
raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
|
||||
elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
|
||||
raise RequestError(
|
||||
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
|
||||
'Try using --legacy-server-connect', cause=e) from e
|
||||
raise
|
||||
except HTTPError as e: # TODO: Remove in a future release
|
||||
raise _CompatHTTPError(e) from e
|
||||
|
||||
def build_request_director(self, handlers):
|
||||
logger = _YDLLogger(self)
|
||||
headers = self.params.get('http_headers').copy()
|
||||
proxies = self.proxies.copy()
|
||||
clean_headers(headers)
|
||||
clean_proxies(proxies, headers)
|
||||
|
||||
director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
|
||||
for handler in handlers:
|
||||
director.add_handler(handler(
|
||||
logger=logger,
|
||||
headers=headers,
|
||||
cookiejar=self.cookiejar,
|
||||
proxies=proxies,
|
||||
prefer_system_certs='no-certifi' in self.params['compat_opts'],
|
||||
verify=not self.params.get('nocheckcertificate'),
|
||||
**traverse_obj(self.params, {
|
||||
'verbose': 'debug_printtraffic',
|
||||
'source_address': 'source_address',
|
||||
'timeout': 'socket_timeout',
|
||||
'legacy_ssl_support': 'legacy_server_connect',
|
||||
'enable_file_urls': 'enable_file_urls',
|
||||
'client_cert': {
|
||||
'client_certificate': 'client_certificate',
|
||||
'client_certificate_key': 'client_certificate_key',
|
||||
'client_certificate_password': 'client_certificate_password',
|
||||
},
|
||||
}),
|
||||
))
|
||||
return director
|
||||
|
||||
def encode(self, s):
|
||||
if isinstance(s, bytes):
|
||||
@@ -4188,7 +4235,7 @@ class YoutubeDL:
|
||||
else:
|
||||
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
||||
try:
|
||||
uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
|
||||
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
|
||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
|
Reference in New Issue
Block a user