[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated
This commit is contained in:
pukkandan
2022-08-01 06:53:25 +05:30
parent 1e8fe57e5c
commit bfd973ece3
138 changed files with 499 additions and 1909 deletions

View File

@@ -123,6 +123,13 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
_EMBED_REGEX = [
r'''(?x)
<meta\s+
property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2''',
r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1'
]
_TESTS = [{
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
@@ -192,22 +199,11 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
}]
@classmethod
def _extract_urls(cls, webpage):
m = re.search(
r'''(?x)
<meta\s+
property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
''', webpage)
if m:
return [m.group('url')]
def _extract_embed_urls(cls, url, webpage):
# Are whitespaces ignored in URLs?
# https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
for embed_url in super()._extract_embed_urls(url, webpage):
yield re.sub(r'\s', '', embed_url)
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):