[ie/youtube] Add a PO Token Provider Framework (#12840)
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Core Tests / Core Tests (ubuntu-latest, 3.10) (push) Has been cancelled
Core Tests / Core Tests (ubuntu-latest, 3.11) (push) Has been cancelled
Core Tests / Core Tests (ubuntu-latest, 3.12) (push) Has been cancelled
Core Tests / Core Tests (ubuntu-latest, 3.13) (push) Has been cancelled
Core Tests / Core Tests (ubuntu-latest, pypy-3.10) (push) Has been cancelled
Core Tests / Core Tests (windows-latest, 3.10) (push) Has been cancelled
Core Tests / Core Tests (windows-latest, 3.12) (push) Has been cancelled
Core Tests / Core Tests (windows-latest, 3.13) (push) Has been cancelled
Core Tests / Core Tests (windows-latest, 3.9) (push) Has been cancelled
Core Tests / Core Tests (windows-latest, pypy-3.10) (push) Has been cancelled
Download Tests / Quick Download Tests (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.10) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.11) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.12) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.13) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.10) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, 3.9) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, pypy-3.10) (push) Has been cancelled
Quick Test / Core Test (push) Has been cancelled
Quick Test / Code check (push) Has been cancelled
Release (master) / release (push) Has been cancelled
Release (master) / publish_pypi (push) Has been cancelled

https://github.com/yt-dlp/yt-dlp/tree/master/yt_dlp/extractor/youtube/pot/README.md

Authored by: coletdjnz
This commit is contained in:
coletdjnz
2025-05-18 13:45:26 +12:00
committed by GitHub
parent abf58dcd6a
commit 2685654a37
28 changed files with 4134 additions and 28 deletions

View File

@@ -23,6 +23,8 @@ from ._base import (
_split_innertube_client,
short_client_name,
)
from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest
from ..openload import PhantomJSwrapper
from ...jsinterp import JSInterpreter
from ...networking.exceptions import HTTPError
@@ -66,6 +68,7 @@ from ...utils import (
urljoin,
variadic,
)
from ...utils.networking import clean_headers, clean_proxies, select_proxy
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
STREAMING_DATA_INITIAL_PO_TOKEN = '__yt_dlp_po_token'
@@ -1809,6 +1812,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
super().__init__(*args, **kwargs)
self._code_cache = {}
self._player_cache = {}
self._pot_director = None
def _real_initialize(self):
super()._real_initialize()
self._pot_director = initialize_pot_director(self)
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
lock = threading.Lock()
@@ -2855,7 +2863,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None, visitor_data=None,
data_sync_id=None, session_index=None, player_url=None, video_id=None, **kwargs):
data_sync_id=None, session_index=None, player_url=None, video_id=None, webpage=None, **kwargs):
"""
Fetch a PO Token for a given client and context. This function will validate required parameters for a given context and client.
@@ -2869,10 +2877,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
@param session_index: session index.
@param player_url: player URL.
@param video_id: video ID.
@param webpage: video webpage.
@param kwargs: Additional arguments to pass down. May be more added in the future.
@return: The fetched PO Token. None if it could not be fetched.
"""
# TODO(future): This validation should be moved into pot framework.
# Some sort of middleware or validation provider perhaps?
# GVS WebPO Token is bound to visitor_data / Visitor ID when logged out.
# Must have visitor_data for it to function.
if player_url and context == _PoTokenContext.GVS and not visitor_data and not self.is_authenticated:
@@ -2894,6 +2906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'Got a GVS PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
self.write_debug(f'{video_id}: Retrieved a {context.value} PO Token for {client} client from config')
return config_po_token
# Require GVS WebPO Token if logged in for external fetching
@@ -2903,7 +2916,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
return
return self._fetch_po_token(
po_token = self._fetch_po_token(
client=client,
context=context.value,
ytcfg=ytcfg,
@@ -2912,11 +2925,66 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
session_index=session_index,
player_url=player_url,
video_id=video_id,
video_webpage=webpage,
**kwargs,
)
if po_token:
self.write_debug(f'{video_id}: Retrieved a {context.value} PO Token for {client} client')
return po_token
def _fetch_po_token(self, client, **kwargs):
"""(Unstable) External PO Token fetch stub"""
context = kwargs.get('context')
# Avoid fetching PO Tokens when not required
fetch_pot_policy = self._configuration_arg('fetch_pot', [''], ie_key=YoutubeIE)[0]
if fetch_pot_policy not in ('never', 'auto', 'always'):
fetch_pot_policy = 'auto'
if (
fetch_pot_policy == 'never'
or (
fetch_pot_policy == 'auto'
and _PoTokenContext(context) not in self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
)
):
return None
headers = self.get_param('http_headers').copy()
proxies = self._downloader.proxies.copy()
clean_headers(headers)
clean_proxies(proxies, headers)
innertube_host = self._select_api_hostname(None, default_client=client)
pot_request = PoTokenRequest(
context=PoTokenContext(context),
innertube_context=traverse_obj(kwargs, ('ytcfg', 'INNERTUBE_CONTEXT')),
innertube_host=innertube_host,
internal_client_name=client,
session_index=kwargs.get('session_index'),
player_url=kwargs.get('player_url'),
video_webpage=kwargs.get('video_webpage'),
is_authenticated=self.is_authenticated,
visitor_data=kwargs.get('visitor_data'),
data_sync_id=kwargs.get('data_sync_id'),
video_id=kwargs.get('video_id'),
request_cookiejar=self._downloader.cookiejar,
# All requests that would need to be proxied should be in the
# context of www.youtube.com or the innertube host
request_proxy=(
select_proxy('https://www.youtube.com', proxies)
or select_proxy(f'https://{innertube_host}', proxies)
),
request_headers=headers,
request_timeout=self.get_param('socket_timeout'),
request_verify_tls=not self.get_param('nocheckcertificate'),
request_source_address=self.get_param('source_address'),
bypass_cache=False,
)
return self._pot_director.get_po_token(pot_request)
@staticmethod
def _is_agegated(player_response):
@@ -3074,8 +3142,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'video_id': video_id,
'data_sync_id': data_sync_id if self.is_authenticated else None,
'player_url': player_url if require_js_player else None,
'webpage': webpage,
'session_index': self._extract_session_index(master_ytcfg, player_ytcfg),
'ytcfg': player_ytcfg,
'ytcfg': player_ytcfg or self._get_default_ytcfg(client),
}
player_po_token = self.fetch_po_token(