[extractor/JWPlatform] Fix extractor (#5112)

Fix bitrate and filesize extraction and support embeds with unquoted urls.

Related: #5106 

Authored by: coletdjnz
This commit is contained in:
coletdjnz
2022-10-04 08:37:48 +13:00
committed by GitHub
parent 8671f995cc
commit d3a3d7f0cc
3 changed files with 32 additions and 14 deletions

View File

@@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# JWPlatform iframe
'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
'info_dict': {
'id': 'AG26UQXM',
'ext': 'mp4',
'upload_date': '20160719',
'timestamp': 1468923808,
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
'description': '',
'duration': 294.0,
},
}, {
# Player url not surrounded by quotes
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
'info_dict': {
'id': 'R10NQdhY',
'title': 'Playgirl',
'ext': 'mp4',
'upload_date': '20220624',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
'timestamp': 1656064800,
'description': 'BRD 1966, Will Tremper',
'duration': 5146.0,
},
'params': {'allowed_extractors': ['generic', 'jwplatform']},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
# <input value=URL> is used by hyland.com
# if we find <iframe>, dont look for <input>
ret = re.findall(
r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
webpage)
if ret:
return ret