mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-11-01 15:10:45 +00:00
[extractor] Use classmethod/property where possible
and refactor lazy extractors accordingly. This reduces the need to create extractor instances
This commit is contained in:
@@ -37,11 +37,17 @@ def gen_extractors():
|
||||
return [klass() for klass in gen_extractor_classes()]
|
||||
|
||||
|
||||
def list_extractors(age_limit):
|
||||
def list_extractor_classes(age_limit=None):
|
||||
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
|
||||
return sorted(filter(
|
||||
lambda ie: ie.is_suitable(age_limit),
|
||||
gen_extractors()), key=lambda ie: ie.IE_NAME.lower())
|
||||
yield from sorted(filter(
|
||||
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405
|
||||
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
|
||||
yield GenericIE # noqa: F405
|
||||
|
||||
|
||||
def list_extractors(age_limit=None):
|
||||
"""Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
|
||||
return [ie() for ie in list_extractor_classes(age_limit)]
|
||||
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
|
||||
@@ -40,6 +40,7 @@ from ..utils import (
|
||||
age_restricted,
|
||||
base_url,
|
||||
bug_reports_message,
|
||||
classproperty,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
@@ -710,9 +711,9 @@ class InfoExtractor:
|
||||
"""A string for getting the InfoExtractor with get_info_extractor"""
|
||||
return cls.__name__[:-2]
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return type(self).__name__[:-2]
|
||||
@classproperty
|
||||
def IE_NAME(cls):
|
||||
return cls.__name__[:-2]
|
||||
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
@@ -3624,56 +3625,57 @@ class InfoExtractor:
|
||||
self._set_cookie(domain, cookie, value)
|
||||
break
|
||||
|
||||
def get_testcases(self, include_onlymatching=False):
|
||||
t = getattr(self, '_TEST', None)
|
||||
@classmethod
|
||||
def get_testcases(cls, include_onlymatching=False):
|
||||
t = getattr(cls, '_TEST', None)
|
||||
if t:
|
||||
assert not hasattr(self, '_TESTS'), \
|
||||
'%s has _TEST and _TESTS' % type(self).__name__
|
||||
assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
|
||||
tests = [t]
|
||||
else:
|
||||
tests = getattr(self, '_TESTS', [])
|
||||
tests = getattr(cls, '_TESTS', [])
|
||||
for t in tests:
|
||||
if not include_onlymatching and t.get('only_matching', False):
|
||||
continue
|
||||
t['name'] = type(self).__name__[:-len('IE')]
|
||||
t['name'] = cls.ie_key()
|
||||
yield t
|
||||
|
||||
def is_suitable(self, age_limit):
|
||||
@classmethod
|
||||
def is_suitable(cls, age_limit):
|
||||
""" Test whether the extractor is generally suitable for the given
|
||||
age limit (i.e. pornographic sites are not, all others usually are) """
|
||||
|
||||
any_restricted = False
|
||||
for tc in self.get_testcases(include_onlymatching=False):
|
||||
for tc in cls.get_testcases(include_onlymatching=False):
|
||||
if tc.get('playlist', []):
|
||||
tc = tc['playlist'][0]
|
||||
is_restricted = age_restricted(
|
||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||
is_restricted = age_restricted(tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||
if not is_restricted:
|
||||
return True
|
||||
any_restricted = any_restricted or is_restricted
|
||||
return not any_restricted
|
||||
|
||||
def description(self, *, markdown=True, search_examples=None):
|
||||
@classmethod
|
||||
def description(cls, *, markdown=True, search_examples=None):
|
||||
"""Description of the extractor"""
|
||||
desc = ''
|
||||
if self._NETRC_MACHINE:
|
||||
if cls._NETRC_MACHINE:
|
||||
if markdown:
|
||||
desc += f' [<abbr title="netrc machine"><em>{self._NETRC_MACHINE}</em></abbr>]'
|
||||
desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]'
|
||||
else:
|
||||
desc += f' [{self._NETRC_MACHINE}]'
|
||||
if self.IE_DESC is False:
|
||||
desc += f' [{cls._NETRC_MACHINE}]'
|
||||
if cls.IE_DESC is False:
|
||||
desc += ' [HIDDEN]'
|
||||
elif self.IE_DESC:
|
||||
desc += f' {self.IE_DESC}'
|
||||
if self.SEARCH_KEY:
|
||||
desc += f'; "{self.SEARCH_KEY}:" prefix'
|
||||
elif cls.IE_DESC:
|
||||
desc += f' {cls.IE_DESC}'
|
||||
if cls.SEARCH_KEY:
|
||||
desc += f'; "{cls.SEARCH_KEY}:" prefix'
|
||||
if search_examples:
|
||||
_COUNTS = ('', '5', '10', 'all')
|
||||
desc += f' (Example: "{self.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
|
||||
if not self.working():
|
||||
desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
|
||||
if not cls.working():
|
||||
desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
|
||||
|
||||
name = f' - **{self.IE_NAME}**' if markdown else self.IE_NAME
|
||||
name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME
|
||||
return f'{name}:{desc}' if desc else name
|
||||
|
||||
def extract_subtitles(self, *args, **kwargs):
|
||||
@@ -3849,6 +3851,6 @@ class SearchInfoExtractor(InfoExtractor):
|
||||
"""Returns an iterator of search results"""
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
@property
|
||||
def SEARCH_KEY(self):
|
||||
return self._SEARCH_KEY
|
||||
@classproperty
|
||||
def SEARCH_KEY(cls):
|
||||
return cls._SEARCH_KEY
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
|
||||
@@ -8,55 +8,36 @@ class TestURLIE(InfoExtractor):
|
||||
""" Allows addressing of the test cases as test:yout.*be_1 """
|
||||
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'
|
||||
_VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'
|
||||
|
||||
def _real_extract(self, url):
|
||||
from ..extractor import gen_extractors
|
||||
from ..extractor import gen_extractor_classes
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
extractor_id = mobj.group('extractor')
|
||||
all_extractors = gen_extractors()
|
||||
extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
|
||||
|
||||
rex = re.compile(extractor_id, flags=re.IGNORECASE)
|
||||
matching_extractors = [
|
||||
e for e in all_extractors if rex.search(e.IE_NAME)]
|
||||
matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]
|
||||
|
||||
if len(matching_extractors) == 0:
|
||||
raise ExtractorError(
|
||||
'No extractors matching %r found' % extractor_id,
|
||||
expected=True)
|
||||
raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True)
|
||||
elif len(matching_extractors) > 1:
|
||||
# Is it obvious which one to pick?
|
||||
try:
|
||||
try: # Check for exact match
|
||||
extractor = next(
|
||||
ie for ie in matching_extractors
|
||||
if ie.IE_NAME.lower() == extractor_id.lower())
|
||||
except StopIteration:
|
||||
raise ExtractorError(
|
||||
('Found multiple matching extractors: %s' %
|
||||
' '.join(ie.IE_NAME for ie in matching_extractors)),
|
||||
'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
|
||||
expected=True)
|
||||
else:
|
||||
extractor = matching_extractors[0]
|
||||
|
||||
num_str = mobj.group('num')
|
||||
num = int(num_str) if num_str else 0
|
||||
|
||||
testcases = []
|
||||
t = getattr(extractor, '_TEST', None)
|
||||
if t:
|
||||
testcases.append(t)
|
||||
testcases.extend(getattr(extractor, '_TESTS', []))
|
||||
|
||||
testcases = tuple(extractor.get_testcases(True))
|
||||
try:
|
||||
tc = testcases[num]
|
||||
tc = testcases[int(num or 0)]
|
||||
except IndexError:
|
||||
raise ExtractorError(
|
||||
('Test case %d not found, got only %d tests' %
|
||||
(num, len(testcases))),
|
||||
expected=True)
|
||||
f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
|
||||
|
||||
self.to_screen('Test URL: %s' % tc['url'])
|
||||
|
||||
return self.url_result(tc['url'], video_id=video_id)
|
||||
self.to_screen(f'Test URL: {tc["url"]}')
|
||||
return self.url_result(tc['url'])
|
||||
|
||||
@@ -31,6 +31,7 @@ from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
classproperty,
|
||||
clean_html,
|
||||
datetime_from_str,
|
||||
dict_get,
|
||||
@@ -5781,16 +5782,17 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
class YoutubeFeedsInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for feed extractors
|
||||
Subclasses must define the _FEED_NAME property.
|
||||
Subclasses must re-define the _FEED_NAME property.
|
||||
"""
|
||||
_LOGIN_REQUIRED = True
|
||||
_FEED_NAME = 'feeds'
|
||||
|
||||
def _real_initialize(self):
|
||||
YoutubeBaseInfoExtractor._check_login_required(self)
|
||||
|
||||
@property
|
||||
@classproperty
|
||||
def IE_NAME(self):
|
||||
return 'youtube:%s' % self._FEED_NAME
|
||||
return f'youtube:{self._FEED_NAME}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
|
||||
Reference in New Issue
Block a user