Merge branch 'master' into youtube-mix-fix

This commit is contained in:
insaneracist
2020-10-31 02:40:11 -07:00
28 changed files with 579 additions and 154 deletions

View File

@@ -1857,13 +1857,13 @@ class YoutubeDL(object):
self.report_error('Cannot write annotations file: ' + annofn)
return
def dl(name, info):
def dl(name, info, subtitle=False):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
return fd.download(name, info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
@@ -1872,7 +1872,7 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
# ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@@ -1891,6 +1891,8 @@ class YoutubeDL(object):
return
else:
try:
dl(sub_filename, sub_info, subtitle=True)
'''
if self.params.get('sleep_interval_subtitles', False):
dl(sub_filename, sub_info)
else:
@@ -1898,6 +1900,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
'''
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))

View File

@@ -326,7 +326,7 @@ class FileDownloader(object):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')
def download(self, filename, info_dict):
def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
@@ -353,16 +353,22 @@ class FileDownloader(object):
})
return True
min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
if subtitle is False:
min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
self.to_screen(
'[download] Sleeping %s seconds...' % (
int(sleep_interval) if sleep_interval.is_integer()
else '%.2f' % sleep_interval))
time.sleep(sleep_interval)
else:
sleep_interval_sub = self.params.get('sleep_interval_subtitles')
self.to_screen(
'[download] Sleeping %s seconds...' % (
int(sleep_interval) if sleep_interval.is_integer()
else '%.2f' % sleep_interval))
time.sleep(sleep_interval)
int(sleep_interval_sub)))
time.sleep(sleep_interval_sub)
return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict):

View File

@@ -1438,6 +1438,13 @@ class AdobePassIE(InfoExtractor):
provider_redirect_page, 'oauth redirect')
self._download_webpage(
oauth_redirect_url, video_id, 'Confirming auto login')
elif 'automatically signed in with' in provider_redirect_page:
# Seems like comcast is rolling up new way of automatically signing customers
oauth_redirect_url = self._html_search_regex(
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
'oauth redirect (signed)')
# Just need to process the request. No useful data comes back
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
else:
if '<form name="signin"' in provider_redirect_page:
provider_login_page_res = provider_redirect_page_res

View File

@@ -471,12 +471,17 @@ class BrightcoveNewIE(AdobePassIE):
title = json_data['name'].strip()
formats = []
sources_num = len(json_data.get('sources'))
key_systems_present = 0
for source in json_data.get('sources', []):
container = source.get('container')
ext = mimetype2ext(source.get('type'))
src = source.get('src')
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
# https://apis.support.brightcove.com/playback/references/playback-api-video-fields-reference.html
if source.get('key_systems'):
key_systems_present += 1
continue
elif ext == 'ism' or container == 'WVM':
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:
@@ -533,6 +538,10 @@ class BrightcoveNewIE(AdobePassIE):
'format_id': build_format_id('rtmp'),
})
formats.append(f)
if sources_num == key_systems_present:
raise ExtractorError('This video is DRM protected', expected=True)
if not formats:
# for sonyliv.com DRM protected videos
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')

View File

@@ -751,6 +751,7 @@ from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
from .nitter import NitterIE
from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE
from .noco import NocoIE
@@ -1037,6 +1038,10 @@ from .sky import (
SkyNewsIE,
SkySportsIE,
)
from .skyitalia import (
SkyArteItaliaIE,
SkyItaliaIE,
)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE

View File

@@ -289,7 +289,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return mgid
def _extract_mgid(self, webpage, url, data_zone=None):
def _extract_mgid(self, webpage, url, title=None, data_zone=None):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
@@ -300,7 +300,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
except RegexNotFoundError:
mgid = None
title = self._match_id(url)
if not title:
title = url_basename(url)
try:
window_data = self._parse_json(self._search_regex(
@@ -336,7 +337,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
mgid = self._extract_mgid(webpage, url)
mgid = self._extract_mgid(webpage, url, title=title)
videos_info = self._get_videos_info(mgid, url=url)
return videos_info

View File

@@ -13,17 +13,16 @@ from ..utils import (
class NetzkinoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
_TESTS = [{
'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
'md5': '92a3f8b76f8d7220acce5377ea5d4873',
'info_dict': {
'id': 'rakete-zum-mond',
'ext': 'mp4',
'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
'comments': 'mincount:3',
'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
'title': 'Rakete zum Mond \u2013 Jules Verne',
'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
'upload_date': '20120813',
'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1344858571,
@@ -32,17 +31,30 @@ class NetzkinoIE(InfoExtractor):
'params': {
'skip_download': 'Download only works from Germany',
}
}
}, {
'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
'md5': 'c7728b2dadd04ff6727814847a51ef03',
'info_dict': {
'id': 'dr-jekyll-mrs-hyde-2',
'ext': 'mp4',
'title': 'Dr. Jekyll & Mrs. Hyde 2',
'description': 'md5:c2e9626ebd02de0a794b95407045d186',
'upload_date': '20190130',
'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1548849437,
'age_limit': 18,
},
'params': {
'skip_download': 'Download only works from Germany',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
category_id = mobj.group('category')
video_id = mobj.group('id')
api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
api_info = self._download_json(api_url, video_id)
info = next(
p for p in api_info['posts'] if p['slug'] == video_id)
api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id
info = self._download_json(api_url, video_id)
custom_fields = info['custom_fields']
production_js = self._download_webpage(
@@ -67,23 +79,12 @@ class NetzkinoIE(InfoExtractor):
} for key, tpl in templates.items()]
self._sort_formats(formats)
comments = [{
'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
'id': c['id'],
'author': c['name'],
'html': c['content'],
'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
} for c in info.get('comments', [])]
return {
'id': video_id,
'formats': formats,
'comments': comments,
'title': info['title'],
'age_limit': int_or_none(custom_fields.get('FSK')[0]),
'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
'description': clean_html(info.get('content')),
'thumbnail': info.get('thumbnail'),
'playlist_title': api_info.get('title'),
'playlist_id': category_id,
}

View File

@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
parse_duration,
@@ -20,22 +21,22 @@ class NewgroundsIE(InfoExtractor):
'info_dict': {
'id': '549479',
'ext': 'mp3',
'title': 'B7 - BusMode',
'title': 'Burn7 - B7 - BusMode',
'uploader': 'Burn7',
'timestamp': 1378878540,
'upload_date': '20130911',
'duration': 143,
},
}, {
'url': 'https://www.newgrounds.com/portal/view/673111',
'md5': '3394735822aab2478c31b1004fe5e5bc',
'url': 'https://www.newgrounds.com/portal/view/1',
'md5': 'fbfb40e2dc765a7e830cb251d370d981',
'info_dict': {
'id': '673111',
'id': '1',
'ext': 'mp4',
'title': 'Dancin',
'uploader': 'Squirrelman82',
'timestamp': 1460256780,
'upload_date': '20160410',
'title': 'Brian-Beaton - Scrotum 1',
'uploader': 'Brian-Beaton',
'timestamp': 955064100,
'upload_date': '20000406',
},
}, {
# source format unavailable, additional mp4 formats
@@ -43,7 +44,7 @@ class NewgroundsIE(InfoExtractor):
'info_dict': {
'id': '689400',
'ext': 'mp4',
'title': 'ZTV News Episode 8',
'title': 'Bennettthesage - ZTV News Episode 8',
'uploader': 'BennettTheSage',
'timestamp': 1487965140,
'upload_date': '20170224',
@@ -55,42 +56,73 @@ class NewgroundsIE(InfoExtractor):
def _real_extract(self, url):
media_id = self._match_id(url)
formats = []
uploader = None
webpage = self._download_webpage(url, media_id)
title = self._html_search_regex(
r'<title>([^>]+)</title>', webpage, 'title')
media_url = self._parse_json(self._search_regex(
r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
media_url_string = self._search_regex(
r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False)
formats = [{
'url': media_url,
'format_id': 'source',
'quality': 1,
}]
if media_url_string:
media_url = self._parse_json(media_url_string, media_id)
formats = [{
'url': media_url,
'format_id': 'source',
'quality': 1,
}]
max_resolution = int_or_none(self._search_regex(
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
default=None))
if max_resolution:
url_base = media_url.rpartition('.')[0]
for resolution in (360, 720, 1080):
if resolution > max_resolution:
break
formats.append({
'url': '%s.%dp.mp4' % (url_base, resolution),
'format_id': '%dp' % resolution,
'height': resolution,
})
max_resolution = int_or_none(self._search_regex(
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
default=None))
if max_resolution:
url_base = media_url.rpartition('.')[0]
for resolution in (360, 720, 1080):
if resolution > max_resolution:
break
formats.append({
'url': '%s.%dp.mp4' % (url_base, resolution),
'format_id': '%dp' % resolution,
'height': resolution,
})
else:
video_id = int_or_none(self._search_regex(
r'data-movie-id=\\"([0-9]+)\\"', webpage, ''))
if not video_id:
raise ExtractorError('Could not extract media data')
url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id
headers = {
'Accept': 'application/json',
'Referer': url,
'X-Requested-With': 'XMLHttpRequest'
}
json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False)
if not json_video:
raise ExtractorError('Could not fetch media data')
uploader = json_video.get('author')
title = json_video.get('title')
media_formats = json_video.get('sources', [])
for media_format in media_formats:
media_sources = media_formats[media_format]
for source in media_sources:
formats.append({
'format_id': media_format,
'quality': int_or_none(media_format[:-1]),
'url': source.get('src')
})
self._check_formats(formats, media_id)
self._sort_formats(formats)
uploader = self._html_search_regex(
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
fatal=False)
if not uploader:
uploader = self._html_search_regex(
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
fatal=False)
timestamp = unified_timestamp(self._html_search_regex(
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
@@ -109,6 +141,9 @@ class NewgroundsIE(InfoExtractor):
if '<dd>Song' in webpage:
formats[0]['vcodec'] = 'none'
if uploader:
title = "%s - %s" % (uploader, title)
return {
'id': media_id,
'title': title,

View File

@@ -0,0 +1,167 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
parse_count,
unified_strdate,
unified_timestamp,
remove_end,
determine_ext,
)
import re
class NitterIE(InfoExtractor):
# Taken from https://github.com/zedeus/nitter/wiki/Instances
INSTANCES = ('nitter.net',
'nitter.snopyta.org',
'nitter.42l.fr',
'nitter.nixnet.services',
'nitter.13ad.de',
'nitter.pussthecat.org',
'nitter.mastodont.cat',
'nitter.dark.fail',
'nitter.tedomum.net',
'nitter.cattube.org',
'nitter.fdn.fr',
'nitter.1d4.us',
'nitter.kavin.rocks',
'tweet.lambda.dance',
'nitter.cc',
'nitter.weaponizedhumiliation.com',
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
_INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
_VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
current_instance = INSTANCES[0] # the test and official instance
_TESTS = [
{
# GIF (wrapped in mp4)
'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
'info_dict': {
'id': '1314279897502629888',
'ext': 'mp4',
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Firefox 🔥',
'uploader_id': 'firefox',
'uploader_url': 'https://' + current_instance + '/firefox',
'upload_date': '20201008',
'timestamp': 1602183720,
},
}, { # normal video
'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
'info_dict': {
'id': '1299715685392756737',
'ext': 'mp4',
'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Le Doc',
'uploader_id': 'Le___Doc',
'uploader_url': 'https://' + current_instance + '/Le___Doc',
'upload_date': '20200829',
'timestamp': 1598711341,
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
},
}, { # video embed in a "Streaming Political Ads" box
'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
'info_dict': {
'id': '1321147074491092994',
'ext': 'mp4',
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Mozilla',
'uploader_id': 'mozilla',
'uploader_url': 'https://' + current_instance + '/mozilla',
'upload_date': '20201027',
'timestamp': 1603820982
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
parsed_url = compat_urlparse.urlparse(url)
base_url = parsed_url.scheme + '://' + parsed_url.netloc
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
webpage = self._download_webpage(url, video_id)
video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
ext = determine_ext(video_url)
if ext == 'unknown_video':
formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
else:
formats = [{
'url': video_url,
'ext': ext
}]
title = (
self._og_search_description(webpage).replace('\n', ' ')
or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
description = title
mobj = re.match(self._VALID_URL, url)
uploader_id = (
mobj.group('uploader_id')
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
if uploader_id:
uploader_url = base_url + '/' + uploader_id
uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
if uploader:
title = uploader + ' - ' + title
view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this
thumbnails = []
thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
for id in thumbnail_ids:
thumbnails.append({
'id': id,
'url': thumbnail + '%3A' + id,
})
date = self._html_search_regex(r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)
upload_date = unified_strdate(date)
timestamp = unified_timestamp(date)
return {
'id': video_id,
'title': title,
'description': description,
'uploader': uploader,
'timestamp': timestamp,
'uploader_id': uploader_id,
'uploader_url': uploader_url,
'view_count': view_count,
'like_count': like_count,
'repost_count': repost_count,
'comment_count': comment_count,
'formats': formats,
'thumbnails': thumbnails,
'thumbnail': thumbnail,
'upload_date': upload_date,
}

View File

@@ -0,0 +1,119 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class SkyItaliaBaseIE(InfoExtractor):
_GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}'
_RES = {
'low': [426, 240],
'med': [640, 360],
'high': [854, 480],
'hd': [1280, 720]
}
def _extract_video_id(self, url):
webpage = self._download_webpage(url, 'skyitalia')
video_id = self._html_search_regex(
[r'data-videoid=\"(\d+)\"',
r'http://player\.sky\.it/social\?id=(\d+)\&'],
webpage, 'video_id')
if video_id:
return video_id
raise ExtractorError('Video ID not found.')
def _get_formats(self, video_id, token):
data_url = self._GET_VIDEO_DATA.replace('{id}', video_id)
data_url = data_url.replace('{token}', token)
video_data = self._parse_json(
self._download_webpage(data_url, video_id),
video_id)
formats = []
for q, r in self._RES.items():
key = 'web_%s_url' % q
if key not in video_data:
continue
formats.append({
'url': video_data.get(key),
'format_id': q,
'width': r[0],
'height': r[1]
})
self._sort_formats(formats)
title = video_data.get('title')
thumb = video_data.get('thumb')
return {
'id': video_id,
'title': title,
'thumbnail': thumb,
'formats': formats
}
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id == 'None':
video_id = self._extract_video_id(url)
return self._get_formats(video_id, self._TOKEN)
class SkyItaliaIE(SkyItaliaBaseIE):
IE_NAME = 'sky.it'
_VALID_URL = r'''(?x)https?://
(?P<ie>sport|tg24|video)
\.sky\.it/(?:.+?)
(?P<id>[0-9]{6})?
(?:$|\?)'''
_TESTS = [{
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162',
'md5': '9c03b590b06e5952d8051f0e02b0feca',
'info_dict': {
'id': '616162',
'ext': 'mp4',
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
}
}, {
'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta',
'md5': '9c03b590b06e5952d8051f0e02b0feca',
'info_dict': {
'id': '616162',
'ext': 'mp4',
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
}
}, {
'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi',
'md5': 'caa25e62dadb529bc5e0b078da99f854',
'info_dict': {
'id': '615904',
'ext': 'mp4',
'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg',
}
}, {
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api',
'only_matching': True,
}]
_TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk'
class SkyArteItaliaIE(SkyItaliaBaseIE):
IE_NAME = 'arte.sky.it'
_VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$'
_TEST = {
'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/',
'md5': '2f22513a89f45142f2746f878d690647',
'info_dict': {
'id': '612888',
'ext': 'mp4',
'title': 'I maestri del cinema Federico Felini',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg',
}
}
_TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
js_to_json,
orderedSet,
@@ -33,27 +34,11 @@ class XTubeIE(InfoExtractor):
'title': 'strange erotica',
'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers',
'duration': 450,
'duration': 449,
'view_count': int,
'comment_count': int,
'age_limit': 18,
}
}, {
# FLV videos with duplicated formats
'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
'md5': 'a406963eb349dd43692ec54631efd88b',
'info_dict': {
'id': '9299752',
'display_id': 'A-Super-Run-Part-1-YT',
'ext': 'flv',
'title': 'A Super Run - Part 1 (YT)',
'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
'uploader': 'tshirtguy59',
'duration': 579,
'view_count': int,
'comment_count': int,
'age_limit': 18,
},
}, {
# new URL schema
'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
@@ -89,16 +74,24 @@ class XTubeIE(InfoExtractor):
title, thumbnail, duration = [None] * 3
config = self._parse_json(self._search_regex(
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
default='{}'), video_id, transform_source=js_to_json, fatal=False)
if config:
config = config.get('mainRoll')
if isinstance(config, dict):
title = config.get('title')
thumbnail = config.get('poster')
duration = int_or_none(config.get('duration'))
sources = config.get('sources') or config.get('format')
json_config_string = self._search_regex(
r'playerConf=({.+?}),loaderConf',
webpage, 'config', default=None)
if not json_config_string:
raise ExtractorError("Could not extract video player data")
json_config_string = json_config_string.replace("!0", "true").replace("!1", "false")
config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False)
if not config:
raise ExtractorError("Could not extract video player data")
config = config.get('mainRoll')
if isinstance(config, dict):
title = config.get('title')
thumbnail = config.get('poster')
duration = int_or_none(config.get('duration'))
sources = config.get('sources') or config.get('format')
if not isinstance(sources, dict):
sources = self._parse_json(self._search_regex(

View File

@@ -1375,14 +1375,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': ext,
})
sub_lang_list[lang] = sub_formats
""" if has_live_chat_replay:
if has_live_chat_replay:
sub_lang_list['live_chat'] = [
{
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat_replay',
},
] """
]
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
@@ -1406,6 +1406,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _get_music_metadata_from_yt_initial(self, yt_initial):
music_metadata = []
key_map = {
'Album': 'album',
'Artist': 'artist',
'Song': 'track'
}
contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
if type(contents) is list:
for content in contents:
music_track = {}
if type(content) is not dict:
continue
videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
if type(videoSecondaryInfoRenderer) is not dict:
continue
rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
if type(rows) is not list:
continue
for row in rows:
metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
if type(metadataRowRenderer) is not dict:
continue
key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
if type(key) is not str or type(value) is not str:
continue
if key in key_map:
if key_map[key] in music_track:
# we've started on a new track
music_metadata.append(music_track)
music_track = {}
music_track[key_map[key]] = value
if len(music_track.keys()):
music_metadata.append(music_track)
return music_metadata
def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
@@ -2328,6 +2366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if release_year:
release_year = int(release_year)
yt_initial = self._get_yt_initial_data(video_id, video_webpage)
if yt_initial:
music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
if len(music_metadata):
album = music_metadata[0].get('album')
artist = music_metadata[0].get('artist')
track = music_metadata[0].get('track')
m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
video_webpage)

View File

@@ -582,7 +582,7 @@ def parseOpts(overrideArguments=None):
'along with --min-sleep-interval.'))
workarounds.add_option(
'--sleep-subtitles',
dest='sleep_interval_subtitles', action='store_true', default=False,
dest='sleep_interval_subtitles', action='store_true', default=0,
help='Enforce sleep interval on subtitles as well')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')

View File

@@ -37,10 +37,26 @@ def update_self(to_screen, verbose, opener):
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
def sha256sum():
h = hashlib.sha256()
b = bytearray(128 * 1024)
mv = memoryview(b)
with open(os.path.realpath(sys.executable), 'rb', buffering=0) as f:
for n in iter(lambda: f.readinto(mv), 0):
h.update(mv[:n])
return h.hexdigest()
to_screen('Current Build Hash %s' % sha256sum())
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update.')
return
# compiled file.exe can find itself by
# to_screen(os.path.basename(sys.executable))
# and path to py or exe
# to_screen(os.path.realpath(sys.executable))
# Check if there is a new version
try:
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
@@ -48,6 +64,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
if newversion == __version__:
to_screen('youtube-dlc is up-to-date (' + __version__ + ')')
@@ -61,6 +78,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
if 'signature' not in versions_info:
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
@@ -109,6 +127,7 @@ def update_self(to_screen, verbose, opener):
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()
@@ -155,6 +174,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
if verbose:
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()

View File

@@ -2320,8 +2320,8 @@ def bug_reports_message():
if ytdl_is_updateable():
update_cmd = 'type youtube-dlc -U to update'
else:
update_cmd = 'see https://yt-dl.org/update on how to update'
msg = '; please report this issue on https://yt-dl.org/bug .'
update_cmd = 'see https://github.com/blackjack4494/yt-dlc on how to update'
msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .'
msg += ' Make sure you are using the latest version; %s.' % update_cmd
msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
return msg