Update to ytdl-commit-d1c6c5

[YouTube] [core] Improve platform debug log, based on yt-dlp
d1c6c5c4d6

Except:
    * 6ed34338285f722d0da312ce0af3a15a077a3e2a [jsinterp] Add short-cut evaluation for common expression
        * There was no performance improvement when tested with https://github.com/ytdl-org/youtube-dl/issues/30641
    * e8de54bce50f6f77a4d7e8e80675f7003d5bf630 [core] Handle `/../` sequences in HTTP URLs
        * We plan to implement this differently
This commit is contained in:
pukkandan
2023-05-24 23:30:43 +05:30
parent 46f1370e9a
commit 4823ec9f46
10 changed files with 125 additions and 40 deletions

View File

@@ -139,17 +139,21 @@ class FileDownloader:
def format_percent(percent):
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@staticmethod
def calc_eta(start, now, total, current):
@classmethod
def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
if total is NO_DEFAULT:
rate, remaining = start_or_rate, now_or_remaining
if None in (rate, remaining):
return None
return int(float(remaining) / rate)
start, now = start_or_rate, now_or_remaining
if total is None:
return None
if now is None:
now = time.time()
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
rate = cls.calc_speed(start, now, current)
return rate and int((float(total) - float(current)) / rate)
@staticmethod
def calc_speed(start, now, bytes):
@@ -166,6 +170,12 @@ class FileDownloader:
def format_retries(retries):
return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def filesize_or_none(unencoded_filename):
if os.path.isfile(unencoded_filename):
return os.path.getsize(unencoded_filename)
return 0
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)

View File

@@ -121,6 +121,11 @@ class FragmentFD(FileDownloader):
'request_data': request_data,
'ctx_id': ctx.get('ctx_id'),
}
frag_resume_len = 0
if ctx['dl'].params.get('continuedl', True):
frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False
@@ -155,9 +160,7 @@ class FragmentFD(FileDownloader):
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
if not ctx.setdefault('live', False):
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
@@ -173,12 +176,11 @@ class FragmentFD(FileDownloader):
})
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
resume_len = 0
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = self.filesize_or_none(tmpfilename)
if resume_len > 0:
open_mode = 'ab'
resume_len = os.path.getsize(encodeFilename(tmpfilename))
# Should be initialized before ytdl file check
ctx.update({
@@ -187,7 +189,9 @@ class FragmentFD(FileDownloader):
})
if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx)
is_corrupt = ctx.get('ytdl_corrupt') is True
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@@ -201,7 +205,12 @@ class FragmentFD(FileDownloader):
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
self._write_ytdl_file(ctx)
else:
if not continuedl:
if ytdl_file_exists:
self._read_ytdl_file(ctx)
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0
@@ -274,12 +283,10 @@ class FragmentFD(FileDownloader):
else:
frag_downloaded_bytes = s['downloaded_bytes']
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
if not ctx['live']:
state['eta'] = self.calc_eta(
start, time_now, estimated_size - resume_len,
state['downloaded_bytes'] - resume_len)
ctx['speed'] = state['speed'] = self.calc_speed(
ctx['fragment_started'], time_now, frag_downloaded_bytes)
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
if not ctx['live']:
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state, info_dict)
@@ -297,7 +304,7 @@ class FragmentFD(FileDownloader):
to_file = ctx['tmpfilename'] != '-'
if to_file:
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
downloaded_bytes = self.filesize_or_none(ctx['filename'])
else:
downloaded_bytes = ctx['complete_frags_downloaded_bytes']

View File

@@ -150,7 +150,8 @@ class HttpFD(FileDownloader):
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
elif range_start > 0:
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))

View File

@@ -3,6 +3,8 @@ from ..utils import (
ExtractorError,
GeoRestrictedError,
int_or_none,
remove_start,
traverse_obj,
update_url_query,
urlencode_postdata,
)
@@ -72,7 +74,14 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
requestor_id, brand = self._DOMAIN_MAP[domain]
result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
filter_value, query={'filter[%s]' % filter_key: filter_value})
result = traverse_obj(
result, ('results',
lambda k, v: k == 0 and v[filter_key] == filter_value),
get_all=False)
if not result:
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
video_id=remove_start(filter_value, '/'))
title = result['title']
video_id = result['id']
media_url = result['publicUrl']
@@ -123,7 +132,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'skip': 'This video is only available for users of participating TV providers.',
'skip': 'Geo-restricted - This content is not available in your location.'
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': {
@@ -140,6 +149,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'skip': 'This video is only available for users of participating TV providers.',
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
@@ -303,6 +313,7 @@ class HistoryTopicIE(AENetworksBaseIE):
class HistoryPlayerIE(AENetworksBaseIE):
IE_NAME = 'history:player'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
_TESTS = []
def _real_extract(self, url):
domain, video_id = self._match_valid_url(url).groups()

View File

@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
traverse_obj,
smuggle_url,
traverse_obj,
unsmuggle_url,
)

View File

@@ -66,7 +66,6 @@ from ..utils import (
variadic,
)
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
@@ -2994,17 +2993,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
# Obsolete patterns
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')
@@ -4883,7 +4879,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
if metadata_renderer:
channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
('channelUrl', {self.ucid_from_url}))
('channelUrl', {self.ucid_from_url}))
info.update({
'channel': metadata_renderer.get('title'),
'channel_id': channel_id,

View File

@@ -443,7 +443,7 @@ class JSInterpreter:
err = e
pending = (None, False)
m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr)
if m:
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
if err:

View File

@@ -130,8 +130,13 @@ USER_AGENTS = {
}
NO_DEFAULT = object()
IDENTITY = lambda x: x
class NO_DEFAULT:
pass
def IDENTITY(x):
return x
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
@@ -3223,6 +3228,9 @@ def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO
def variadic(x, allowed_types=NO_DEFAULT):
if not isinstance(allowed_types, (tuple, type)):
deprecation_warning('allowed_types should be a tuple or a type')
allowed_types = tuple(allowed_types)
return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )