[extractor] Use classmethod/property where possible

and refactor lazy extractors accordingly. This reduces the need to create extractor instances
2025-11-01 15:10:45 +00:00 · 2022-05-11 21:24:44 +05:30
parent 7ddbf09c25
commit 82d020804d
11 changed files with 188 additions and 167 deletions
--- a/yt_dlp/extractor/init.py
+++ b/yt_dlp/extractor/init.py
@@ -37,11 +37,17 @@ def gen_extractors():
    return [klass() for klass in gen_extractor_classes()]


-def list_extractors(age_limit):
+def list_extractor_classes(age_limit=None):
    """Return a list of extractors that are suitable for the given age, sorted by extractor name"""
-    return sorted(filter(
-        lambda ie: ie.is_suitable(age_limit),
-        gen_extractors()), key=lambda ie: ie.IE_NAME.lower())
+    yield from sorted(filter(
+        lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,  # noqa: F405
+        gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
+    yield GenericIE  # noqa: F405
+
+
+def list_extractors(age_limit=None):
+    """Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
+    return [ie() for ie in list_extractor_classes(age_limit)]


 def get_info_extractor(ie_name):
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -40,6 +40,7 @@ from ..utils import (
    age_restricted,
    base_url,
    bug_reports_message,
+    classproperty,
    clean_html,
    determine_ext,
    determine_protocol,
@@ -710,9 +711,9 @@ class InfoExtractor:
        """A string for getting the InfoExtractor with get_info_extractor"""
        return cls.__name__[:-2]

-    @property
-    def IE_NAME(self):
-        return type(self).__name__[:-2]
+    @classproperty
+    def IE_NAME(cls):
+        return cls.__name__[:-2]

    @staticmethod
    def __can_accept_status_code(err, expected_status):
@@ -3624,56 +3625,57 @@ class InfoExtractor:
                self._set_cookie(domain, cookie, value)
                break

-    def get_testcases(self, include_onlymatching=False):
-        t = getattr(self, '_TEST', None)
+    @classmethod
+    def get_testcases(cls, include_onlymatching=False):
+        t = getattr(cls, '_TEST', None)
        if t:
-            assert not hasattr(self, '_TESTS'), \
-                '%s has _TEST and _TESTS' % type(self).__name__
+            assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
            tests = [t]
        else:
-            tests = getattr(self, '_TESTS', [])
+            tests = getattr(cls, '_TESTS', [])
        for t in tests:
            if not include_onlymatching and t.get('only_matching', False):
                continue
-            t['name'] = type(self).__name__[:-len('IE')]
+            t['name'] = cls.ie_key()
            yield t

-    def is_suitable(self, age_limit):
+    @classmethod
+    def is_suitable(cls, age_limit):
        """ Test whether the extractor is generally suitable for the given
        age limit (i.e. pornographic sites are not, all others usually are) """

        any_restricted = False
-        for tc in self.get_testcases(include_onlymatching=False):
+        for tc in cls.get_testcases(include_onlymatching=False):
            if tc.get('playlist', []):
                tc = tc['playlist'][0]
-            is_restricted = age_restricted(
-                tc.get('info_dict', {}).get('age_limit'), age_limit)
+            is_restricted = age_restricted(tc.get('info_dict', {}).get('age_limit'), age_limit)
            if not is_restricted:
                return True
            any_restricted = any_restricted or is_restricted
        return not any_restricted

-    def description(self, *, markdown=True, search_examples=None):
+    @classmethod
+    def description(cls, *, markdown=True, search_examples=None):
        """Description of the extractor"""
        desc = ''
-        if self._NETRC_MACHINE:
+        if cls._NETRC_MACHINE:
            if markdown:
-                desc += f' [<abbr title="netrc machine"><em>{self._NETRC_MACHINE}</em></abbr>]'
+                desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]'
            else:
-                desc += f' [{self._NETRC_MACHINE}]'
-        if self.IE_DESC is False:
+                desc += f' [{cls._NETRC_MACHINE}]'
+        if cls.IE_DESC is False:
            desc += ' [HIDDEN]'
-        elif self.IE_DESC:
-            desc += f' {self.IE_DESC}'
-        if self.SEARCH_KEY:
-            desc += f'; "{self.SEARCH_KEY}:" prefix'
+        elif cls.IE_DESC:
+            desc += f' {cls.IE_DESC}'
+        if cls.SEARCH_KEY:
+            desc += f'; "{cls.SEARCH_KEY}:" prefix'
            if search_examples:
                _COUNTS = ('', '5', '10', 'all')
-                desc += f' (Example: "{self.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
-        if not self.working():
+                desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
+        if not cls.working():
            desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'

-        name = f' - **{self.IE_NAME}**' if markdown else self.IE_NAME
+        name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME
        return f'{name}:{desc}' if desc else name

    def extract_subtitles(self, *args, **kwargs):
@@ -3849,6 +3851,6 @@ class SearchInfoExtractor(InfoExtractor):
        """Returns an iterator of search results"""
        raise NotImplementedError('This method must be implemented by subclasses')

-    @property
-    def SEARCH_KEY(self):
-        return self._SEARCH_KEY
+    @classproperty
+    def SEARCH_KEY(cls):
+        return cls._SEARCH_KEY
--- a/yt_dlp/extractor/drtv.py
+++ b/yt_dlp/extractor/drtv.py
@@ -18,6 +18,7 @@ from ..utils import (
    url_or_none,
 )

+
 class DRTVIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
--- a/yt_dlp/extractor/testurl.py
+++ b/yt_dlp/extractor/testurl.py
@@ -8,55 +8,36 @@ class TestURLIE(InfoExtractor):
    """ Allows addressing of the test cases as test:yout.*be_1 """

    IE_DESC = False  # Do not list
-    _VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'
+    _VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'

    def _real_extract(self, url):
-        from ..extractor import gen_extractors
+        from ..extractor import gen_extractor_classes

-        mobj = self._match_valid_url(url)
-        video_id = mobj.group('id')
-        extractor_id = mobj.group('extractor')
-        all_extractors = gen_extractors()
+        extractor_id, num = self._match_valid_url(url).group('extractor', 'num')

        rex = re.compile(extractor_id, flags=re.IGNORECASE)
-        matching_extractors = [
-            e for e in all_extractors if rex.search(e.IE_NAME)]
+        matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]

        if len(matching_extractors) == 0:
-            raise ExtractorError(
-                'No extractors matching %r found' % extractor_id,
-                expected=True)
+            raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True)
        elif len(matching_extractors) > 1:
-            # Is it obvious which one to pick?
-            try:
+            try:  # Check for exact match
                extractor = next(
                    ie for ie in matching_extractors
                    if ie.IE_NAME.lower() == extractor_id.lower())
            except StopIteration:
                raise ExtractorError(
-                    ('Found multiple matching extractors: %s' %
-                        ' '.join(ie.IE_NAME for ie in matching_extractors)),
+                    'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
                    expected=True)
        else:
            extractor = matching_extractors[0]

-        num_str = mobj.group('num')
-        num = int(num_str) if num_str else 0
-
-        testcases = []
-        t = getattr(extractor, '_TEST', None)
-        if t:
-            testcases.append(t)
-        testcases.extend(getattr(extractor, '_TESTS', []))
-
+        testcases = tuple(extractor.get_testcases(True))
        try:
-            tc = testcases[num]
+            tc = testcases[int(num or 0)]
        except IndexError:
            raise ExtractorError(
-                ('Test case %d not found, got only %d tests' %
-                    (num, len(testcases))),
-                expected=True)
+                f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)

-        self.to_screen('Test URL: %s' % tc['url'])
-
-        return self.url_result(tc['url'], video_id=video_id)
+        self.to_screen(f'Test URL: {tc["url"]}')
+        return self.url_result(tc['url'])
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
    NO_DEFAULT,
    ExtractorError,
    bug_reports_message,
+    classproperty,
    clean_html,
    datetime_from_str,
    dict_get,
@@ -5781,16 +5782,17 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
 class YoutubeFeedsInfoExtractor(InfoExtractor):
    """
    Base class for feed extractors
-    Subclasses must define the _FEED_NAME property.
+    Subclasses must re-define the _FEED_NAME property.
    """
    _LOGIN_REQUIRED = True
+    _FEED_NAME = 'feeds'

    def _real_initialize(self):
        YoutubeBaseInfoExtractor._check_login_required(self)

-    @property
+    @classproperty
    def IE_NAME(self):
-        return 'youtube:%s' % self._FEED_NAME
+        return f'youtube:{self._FEED_NAME}'

    def _real_extract(self, url):
        return self.url_result(