Update to ytdl-commit-d1c6c5

[YouTube] [core] Improve platform debug log, based on yt-dlp d1c6c5c4d6 Except: * 6ed34338285f722d0da312ce0af3a15a077a3e2a [jsinterp] Add short-cut evaluation for common expression * There was no performance improvement when tested with https://github.com/ytdl-org/youtube-dl/issues/30641 * e8de54bce50f6f77a4d7e8e80675f7003d5bf630 [core] Handle `/../` sequences in HTTP URLs * We plan to implement this differently
2025-10-06 10:54:52 +00:00 · 2023-05-24 23:30:43 +05:30
parent 46f1370e9a
commit 4823ec9f46
10 changed files with 125 additions and 40 deletions
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -139,17 +139,21 @@ class FileDownloader:
    def format_percent(percent):
        return '  N/A%' if percent is None else f'{percent:>5.1f}%'

-    @staticmethod
-    def calc_eta(start, now, total, current):
+    @classmethod
+    def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
+        if total is NO_DEFAULT:
+            rate, remaining = start_or_rate, now_or_remaining
+            if None in (rate, remaining):
+                return None
+            return int(float(remaining) / rate)
+
+        start, now = start_or_rate, now_or_remaining
        if total is None:
            return None
        if now is None:
            now = time.time()
-        dif = now - start
-        if current == 0 or dif < 0.001:  # One millisecond
-            return None
-        rate = float(current) / dif
-        return int((float(total) - float(current)) / rate)
+        rate = cls.calc_speed(start, now, current)
+        return rate and int((float(total) - float(current)) / rate)

    @staticmethod
    def calc_speed(start, now, bytes):
@@ -166,6 +170,12 @@ class FileDownloader:
    def format_retries(retries):
        return 'inf' if retries == float('inf') else int(retries)

+    @staticmethod
+    def filesize_or_none(unencoded_filename):
+        if os.path.isfile(unencoded_filename):
+            return os.path.getsize(unencoded_filename)
+        return 0
+
    @staticmethod
    def best_block_size(elapsed_time, bytes):
        new_min = max(bytes / 2.0, 1.0)
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -121,6 +121,11 @@ class FragmentFD(FileDownloader):
            'request_data': request_data,
            'ctx_id': ctx.get('ctx_id'),
        }
+        frag_resume_len = 0
+        if ctx['dl'].params.get('continuedl', True):
+            frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
+        fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
+
        success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
        if not success:
            return False
@@ -155,9 +160,7 @@ class FragmentFD(FileDownloader):
            del ctx['fragment_filename_sanitized']

    def _prepare_frag_download(self, ctx):
-        if 'live' not in ctx:
-            ctx['live'] = False
-        if not ctx['live']:
+        if not ctx.setdefault('live', False):
            total_frags_str = '%d' % ctx['total_frags']
            ad_frags = ctx.get('ad_frags', 0)
            if ad_frags:
@@ -173,12 +176,11 @@ class FragmentFD(FileDownloader):
        })
        tmpfilename = self.temp_name(ctx['filename'])
        open_mode = 'wb'
-        resume_len = 0

        # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
+        resume_len = self.filesize_or_none(tmpfilename)
+        if resume_len > 0:
            open_mode = 'ab'
-            resume_len = os.path.getsize(encodeFilename(tmpfilename))

        # Should be initialized before ytdl file check
        ctx.update({
@@ -187,7 +189,9 @@ class FragmentFD(FileDownloader):
        })

        if self.__do_ytdl_file(ctx):
-            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+            ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
+            continuedl = self.params.get('continuedl', True)
+            if continuedl and ytdl_file_exists:
                self._read_ytdl_file(ctx)
                is_corrupt = ctx.get('ytdl_corrupt') is True
                is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@@ -201,7 +205,12 @@ class FragmentFD(FileDownloader):
                    if 'ytdl_corrupt' in ctx:
                        del ctx['ytdl_corrupt']
                    self._write_ytdl_file(ctx)
+
            else:
+                if not continuedl:
+                    if ytdl_file_exists:
+                        self._read_ytdl_file(ctx)
+                    ctx['fragment_index'] = resume_len = 0
                self._write_ytdl_file(ctx)
                assert ctx['fragment_index'] == 0

@@ -274,12 +283,10 @@ class FragmentFD(FileDownloader):
            else:
                frag_downloaded_bytes = s['downloaded_bytes']
                state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
-                if not ctx['live']:
-                    state['eta'] = self.calc_eta(
-                        start, time_now, estimated_size - resume_len,
-                        state['downloaded_bytes'] - resume_len)
                ctx['speed'] = state['speed'] = self.calc_speed(
-                    ctx['fragment_started'], time_now, frag_downloaded_bytes)
+                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
+                if not ctx['live']:
+                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
                ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
            self._hook_progress(state, info_dict)

@@ -297,7 +304,7 @@ class FragmentFD(FileDownloader):

        to_file = ctx['tmpfilename'] != '-'
        if to_file:
-            downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
+            downloaded_bytes = self.filesize_or_none(ctx['filename'])
        else:
            downloaded_bytes = ctx['complete_frags_downloaded_bytes']

--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -150,7 +150,8 @@ class HttpFD(FileDownloader):
                    # Content-Range is either not present or invalid. Assuming remote webserver is
                    # trying to send the whole file, resume is not possible, so wiping the local file
                    # and performing entire redownload
-                    self.report_unable_to_resume()
+                    elif range_start > 0:
+                        self.report_unable_to_resume()
                    ctx.resume_len = 0
                    ctx.open_mode = 'wb'
                ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
--- a/yt_dlp/extractor/aenetworks.py
+++ b/yt_dlp/extractor/aenetworks.py
@@ -3,6 +3,8 @@ from ..utils import (
    ExtractorError,
    GeoRestrictedError,
    int_or_none,
+    remove_start,
+    traverse_obj,
    update_url_query,
    urlencode_postdata,
 )
@@ -72,7 +74,14 @@ class AENetworksBaseIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
        requestor_id, brand = self._DOMAIN_MAP[domain]
        result = self._download_json(
            'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
-            filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+            filter_value, query={'filter[%s]' % filter_key: filter_value})
+        result = traverse_obj(
+            result, ('results',
+                     lambda k, v: k == 0 and v[filter_key] == filter_value),
+            get_all=False)
+        if not result:
+            raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
+                                 video_id=remove_start(filter_value, '/'))
        title = result['title']
        video_id = result['id']
        media_url = result['publicUrl']
@@ -123,7 +132,7 @@ class AENetworksIE(AENetworksBaseIE):
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
-        'skip': 'This video is only available for users of participating TV providers.',
+        'skip': 'Geo-restricted - This content is not available in your location.'
    }, {
        'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
        'info_dict': {
@@ -140,6 +149,7 @@ class AENetworksIE(AENetworksBaseIE):
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
+        'skip': 'This video is only available for users of participating TV providers.',
    }, {
        'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
        'only_matching': True
@@ -303,6 +313,7 @@ class HistoryTopicIE(AENetworksBaseIE):
 class HistoryPlayerIE(AENetworksBaseIE):
    IE_NAME = 'history:player'
    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
+    _TESTS = []

    def _real_extract(self, url):
        domain, video_id = self._match_valid_url(url).groups()
--- a/yt_dlp/extractor/litv.py
+++ b/yt_dlp/extractor/litv.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
-    traverse_obj,
    smuggle_url,
+    traverse_obj,
    unsmuggle_url,
 )

--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -66,7 +66,6 @@ from ..utils import (
    variadic,
 )

-
 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
 # any clients starting with _ cannot be explicitly requested by the user
 INNERTUBE_CLIENTS = {
@@ -2994,17 +2993,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
             r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
             r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
             r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
             # Obsolete patterns
-             r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
             r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
             r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
            jscode, 'Initial JS player signature function name', group='sig')

@@ -4883,7 +4879,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
        metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
        if metadata_renderer:
            channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
-                                                         ('channelUrl', {self.ucid_from_url}))
+                                      ('channelUrl', {self.ucid_from_url}))
            info.update({
                'channel': metadata_renderer.get('title'),
                'channel_id': channel_id,
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@@ -443,7 +443,7 @@ class JSInterpreter:
                err = e

            pending = (None, False)
-            m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
+            m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr)
            if m:
                sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
                if err:
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -130,8 +130,13 @@ USER_AGENTS = {
 }


-NO_DEFAULT = object()
-IDENTITY = lambda x: x
+class NO_DEFAULT:
+    pass
+
+
+def IDENTITY(x):
+    return x
+

 ENGLISH_MONTH_NAMES = [
    'January', 'February', 'March', 'April', 'May', 'June',
@@ -3223,6 +3228,9 @@ def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO


 def variadic(x, allowed_types=NO_DEFAULT):
+    if not isinstance(allowed_types, (tuple, type)):
+        deprecation_warning('allowed_types should be a tuple or a type')
+        allowed_types = tuple(allowed_types)
    return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )