Update to ytdl-2021.01.24.1

This commit is contained in:
Remita Amine
2021-01-16 18:12:05 +01:00
committed by pukkandan
parent f74980cbae
commit a820dc722e
23 changed files with 987 additions and 412 deletions

View File

@@ -181,9 +181,12 @@ class YoutubeDL(object):
allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names.
trim_file_name: Limit length of filename (extension excluded).
ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
trim_file_name: Limit length of filename (extension excluded)
ignoreerrors: Do not stop on download errors
(Default True when running youtube-dlc,
but False when directly accessing YoutubeDL class)
force_generic_extractor: Force downloader to use the generic extractor
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
@@ -741,7 +744,7 @@ class YoutubeDL(object):
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
@@ -761,8 +764,8 @@ class YoutubeDL(object):
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
# string 'NA' is returned for missing fields. We will patch output
# template for missing fields to meet string presentation type.
# string NA placeholder is returned for missing fields. We will patch
# output template for missing fields to meet string presentation type.
for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:

View File

@@ -373,6 +373,7 @@ def _real_main(argv=None):
'listformats': opts.listformats,
'listformats_table': opts.listformats_table,
'outtmpl': outtmpl,
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
'paths': opts.paths,
'autonumber_size': opts.autonumber_size,
'autonumber_start': opts.autonumber_start,

View File

@@ -256,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
'title': 'Ancient Aliens',
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
},
'playlist_mincount': 168,
'playlist_mincount': 150,
}]
_RESOURCE = 'series'
_ITEMS_KEY = 'episodes'

View File

@@ -1,13 +1,16 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
'info_dict': {
'id': '3792260579001',
'ext': 'mp4',
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server',
}, {
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
'only_matching': True,
}, {
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
program_name = self._match_id(url)
webpage = self._download_webpage(url, program_name)
brightcove_id = self._search_regex(
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
post_type, name = re.match(self._VALID_URL, url).groups()
post_type = {
'features': 'post',
'program': 'episode',
'videos': 'video',
}[post_type.split('/')[0]]
video = self._download_json(
'https://www.aljazeera.com/graphql', name, query={
'operationName': 'SingleArticleQuery',
'variables': json.dumps({
'name': name,
'postType': post_type,
}),
}, headers={
'wp-site': 'aje',
})['data']['article']['video']
video_id = video['id']
account_id = video.get('accountId') or '665003303001'
player_id = video.get('playerId') or 'BkeSH5BDb'
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
'BrightcoveNew', video_id)

View File

@@ -1,13 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
try_get,
unified_strdate,
unified_timestamp,
)
@@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
'ext': 'mp4',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://',
'timestamp': 1523664000,
'upload_date': '20180414',
'timestamp': 1523318400,
'upload_date': '20180410',
'release_date': '20180410',
'series': "America's Test Kitchen",
'season_number': 18,
@@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
'md5': '06451608c57651e985a498e69cec17e5',
'info_dict': {
'id': '5fbe8c61bda2010001c6763b',
'title': 'Simple Chicken Dinner',
'ext': 'mp4',
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
'thumbnail': r're:^https?://',
'timestamp': 1610755200,
'upload_date': '20210116',
'release_date': '20210116',
'series': "America's Test Kitchen",
'season_number': 21,
'episode': 'Simple Chicken Dinner',
'episode_number': 3,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
@@ -60,7 +84,76 @@ class AmericasTestKitchenIE(InfoExtractor):
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'ie_key': 'Zype',
'description': clean_html(video.get('description')),
'timestamp': unified_timestamp(video.get('publishDate')),
'release_date': unified_strdate(video.get('publishDate')),
'episode_number': int_or_none(episode.get('number')),
'season_number': int_or_none(episode.get('season')),
'series': try_get(episode, lambda x: x['show']['title']),
'episode': episode.get('title'),
}
class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
_TESTS = [{
# ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
'info_dict': {
'id': 'season_1',
'title': 'Season 1',
},
'playlist_count': 13,
}, {
# Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'info_dict': {
'id': 'season_12',
'title': 'Season 12',
},
'playlist_count': 13,
}]
def _real_extract(self, url):
show_name, season_number = re.match(self._VALID_URL, url).groups()
season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
season = 'Season %d' % season_number
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={
'Origin': 'https://www.%s.com' % show_name,
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
'facetFilters': json.dumps([
'search_season_list:' + season,
'search_document_klass:episode',
'search_show_slug:' + slug,
]),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
'attributesToHighlight': '',
'hitsPerPage': 1000,
})
def entries():
for episode in (season_search.get('hits') or []):
search_url = episode.get('search_url')
if not search_url:
continue
yield {
'_type': 'url',
'url': 'https://www.%s.com%s' % (show_name, search_url),
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'title': episode.get('title'),
'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')),
'season_number': season_number,
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
'ie_key': AmericasTestKitchenIE.ie_key(),
}
return self.playlist_result(
entries(), 'season_%d' % season_number, season)

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .yahoo import YahooIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
@@ -15,9 +15,9 @@ from ..utils import (
)
class AolIE(InfoExtractor):
class AolIE(YahooIE):
IE_NAME = 'aol.com'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
_TESTS = [{
# video with 5min ID
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
}, {
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
}, {
# Yahoo video
'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
if '-' in video_id:
return self._extract_yahoo_video(video_id, 'us')
response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,

View File

@@ -226,13 +226,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
if doc.tag == 'rss':
return GenericIE()._extract_rss(url, video_id, doc)
title = self._html_search_regex(
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>',
r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
description = self._html_search_meta(
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
@@ -289,18 +289,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
class ARDIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
_TESTS = [{
# available till 14.02.2019
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
# available till 7.01.2022
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
'info_dict': {
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
'id': '102',
'display_id': 'maischberger-die-woche',
'id': '100',
'ext': 'mp4',
'duration': 4435.0,
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
'upload_date': '20180214',
'duration': 3687.0,
'title': 'maischberger. die woche vom 7. Januar 2021',
'upload_date': '20210107',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
@@ -355,17 +355,17 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
'info_dict': {
'display_id': 'die-robuste-roswita',
'id': '70153354',
'id': '78566716',
'title': 'Die robuste Roswita',
'description': r're:^Der Mord.*trüber ist als die Ilm.',
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
'duration': 5316,
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
'timestamp': 1577047500,
'upload_date': '20191222',
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
'timestamp': 1596658200,
'upload_date': '20200805',
'ext': 'mp4',
},
}, {

View File

@@ -1,142 +1,51 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
/(?P<title>.*)'''
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
'md5': 'b8acb347177c680ff18a292aa2166f80',
'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
'ext': 'mp4',
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
'timestamp': 1376798400,
'upload_date': '20130818',
'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
'timestamp': 1598670000,
'upload_date': '20200829',
},
}, {
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
'only_matching': True,
}]
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(?:full-episodes|shows(?=/[^/]+/full-episodes))
/(?P<id>[^?]+)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
'info_dict': {
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
'title': 'November 28, 2016 - Ryan Speedo Green',
},
'playlist_count': 4,
}, {
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
videos_info = self._get_videos_info(mgid)
return videos_info
class ToshIE(MTVServicesInfoExtractor):
IE_DESC = 'Tosh.0'
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
_TESTS = [{
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
'info_dict': {
'description': 'Tosh asked fans to share their summer plans.',
'title': 'Twitter Users Share Summer Plans',
},
'playlist': [{
'md5': 'f269e88114c1805bb6d7653fecea9e06',
'info_dict': {
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4',
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
'description': 'Tosh asked fans to share their summer plans.',
'thumbnail': r're:^https?://.*\.jpg',
# It's really reported to be published on year 2077
'upload_date': '20770610',
'timestamp': 3390510600,
'subtitles': {
'en': 'mincount:3',
},
},
}]
}, {
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
'only_matching': True,
}]
class ComedyCentralTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
_TESTS = [{
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
'info_dict': {
'id': 'local_playlist-f99b626bdfe13568579a',
'ext': 'flv',
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
'ext': 'mp4',
'title': 'Josh Investigates',
'description': 'Steht uns das Ende der Welt bevor?',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
'only_matching': True,
}, {
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
'only_matching': True,
}]
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
_GEO_COUNTRIES = ['DE']
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mrss_url = self._search_regex(
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'mrss url', group='url')
return self._get_videos_info_from_url(mrss_url, video_id)
class ComedyCentralShortnameIE(InfoExtractor):
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
_TESTS = [{
'url': ':tds',
'only_matching': True,
}, {
'url': ':thedailyshow',
'only_matching': True,
}, {
'url': ':theopposition',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
shortcut_map = {
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
def _get_feed_query(self, uri):
return {
'accountOverride': 'intl.mtvi.com',
'arcEp': 'web.cc.tv',
'ep': 'b9032c3a',
'imageEp': 'web.cc.tv',
'mgid': uri,
}
return self.url_result(shortcut_map[video_id])

View File

@@ -50,7 +50,10 @@ from .animelab import (
AnimeLabIE,
AnimeLabShowsIE,
)
from .americastestkitchen import AmericasTestKitchenIE
from .americastestkitchen import (
AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE,
)
from .animeondemand import AnimeOnDemandIE
from .anvato import AnvatoIE
from .aol import AolIE
@@ -244,11 +247,8 @@ from .cnn import (
)
from .coub import CoubIE
from .comedycentral import (
ComedyCentralFullEpisodesIE,
ComedyCentralIE,
ComedyCentralShortnameIE,
ComedyCentralTVIE,
ToshIE,
)
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import (
@@ -682,6 +682,11 @@ from .mildom import (
MildomVodIE,
MildomUserVodIE,
)
from .minds import (
MindsIE,
MindsChannelIE,
MindsGroupIE,
)
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE
@@ -1162,6 +1167,10 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .spotify import (
SpotifyIE,
SpotifyShowIE,
)
from .spreaker import (
SpreakerIE,
SpreakerPageIE,
@@ -1270,7 +1279,10 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .trovolive import TrovoLiveIE
from .trovo import (
TrovoIE,
TrovoVodIE,
)
from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE

View File

@@ -11,7 +11,7 @@ from ..utils import (
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
'info_dict': {
'id': 'rendez-vous-au-pays-des-geeks',
@@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
'timestamp': 1393642916,
'timestamp': 1393700400,
'vcodec': 'none',
}
}
}, {
# no thumbnail
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
</h1>|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
).*?
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
''',
webpage, 'video data'))
video_url = video_data['data-asset-source']
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
video_url = video_data.get('data-url') or video_data['data-asset-source']
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
webpage, 'description', default=None)
thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
webpage, 'thumbnail', default=None)
uploader = self._html_search_regex(
r'(?s)<span class="author">(.*?)</span>',
webpage, 'uploader', default=None)
@@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
'ext': ext,
'vcodec': 'none' if ext == 'mp3' else None,
'uploader': uploader,
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
'duration': int_or_none(video_data.get('data-duration')),
}

View File

@@ -5,7 +5,10 @@ import functools
import json
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
determine_ext,
ExtractorError,
@@ -131,6 +134,9 @@ class LBRYIE(LBRYBaseIE):
}, {
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'only_matching': True,
}, {
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -139,6 +145,7 @@ class LBRYIE(LBRYBaseIE):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
display_id = display_id.replace(':', '#')
display_id = compat_urllib_parse_unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
result_value = result['value']

View File

@@ -0,0 +1,196 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,
str_or_none,
strip_or_none,
)
class MindsBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
def _call_api(self, path, video_id, resource, query=None):
api_url = 'https://www.minds.com/api/' + path
token = self._get_cookies(api_url).get('XSRF-TOKEN')
return self._download_json(
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
'Referer': 'https://www.minds.com/',
'X-XSRF-TOKEN': token.value if token else '',
}, query=query)
class MindsIE(MindsBaseIE):
IE_NAME = 'minds'
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.minds.com/media/100000000000086822',
'md5': '215a658184a419764852239d4970b045',
'info_dict': {
'id': '100000000000086822',
'ext': 'mp4',
'title': 'Minds intro sequence',
'thumbnail': r're:https?://.+\.png',
'uploader_id': 'ottman',
'upload_date': '20130524',
'timestamp': 1369404826,
'uploader': 'Bill Ottman',
'view_count': int,
'like_count': int,
'dislike_count': int,
'tags': ['animation'],
'comment_count': int,
'license': 'attribution-cc',
},
}, {
# entity.type == 'activity' and empty title
'url': 'https://www.minds.com/newsfeed/798025111988506624',
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
'info_dict': {
'id': '798022190320226304',
'ext': 'mp4',
'title': '798022190320226304',
'uploader': 'ColinFlaherty',
'upload_date': '20180111',
'timestamp': 1515639316,
'uploader_id': 'ColinFlaherty',
},
}, {
'url': 'https://www.minds.com/archive/view/715172106794442752',
'only_matching': True,
}, {
# youtube perma_url
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
'only_matching': True,
}]
def _real_extract(self, url):
entity_id = self._match_id(url)
entity = self._call_api(
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
if entity.get('type') == 'activity':
if entity.get('custom_type') == 'video':
video_id = entity['entity_guid']
else:
return self.url_result(entity['perma_url'])
else:
assert(entity['subtype'] == 'video')
video_id = entity_id
# 1080p and webm formats available only on the sources array
video = self._call_api(
'v2/media/video/' + video_id, video_id, 'video')
formats = []
for source in (video.get('sources') or []):
src = source.get('src')
if not src:
continue
formats.append({
'format_id': source.get('label'),
'height': int_or_none(source.get('size')),
'url': src,
})
self._sort_formats(formats)
entity = video.get('entity') or entity
owner = entity.get('ownerObj') or {}
uploader_id = owner.get('username')
tags = entity.get('tags')
if tags and isinstance(tags, compat_str):
tags = [tags]
thumbnail = None
poster = video.get('poster') or entity.get('thumbnail_src')
if poster:
urlh = self._request_webpage(poster, video_id, fatal=False)
if urlh:
thumbnail = urlh.geturl()
return {
'id': video_id,
'title': entity.get('title') or video_id,
'formats': formats,
'description': clean_html(entity.get('description')) or None,
'license': str_or_none(entity.get('license')),
'timestamp': int_or_none(entity.get('time_created')),
'uploader': strip_or_none(owner.get('name')),
'uploader_id': uploader_id,
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
'view_count': int_or_none(entity.get('play:count')),
'like_count': int_or_none(entity.get('thumbs:up:count')),
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
'tags': tags,
'comment_count': int_or_none(entity.get('comments:count')),
'thumbnail': thumbnail,
}
class MindsFeedBaseIE(MindsBaseIE):
_PAGE_SIZE = 150
def _entries(self, feed_id):
query = {'limit': self._PAGE_SIZE, 'sync': 1}
i = 1
while True:
data = self._call_api(
'v2/feeds/container/%s/videos' % feed_id,
feed_id, 'page %s' % i, query)
entities = data.get('entities') or []
for entity in entities:
guid = entity.get('guid')
if not guid:
continue
yield self.url_result(
'https://www.minds.com/newsfeed/' + guid,
MindsIE.ie_key(), guid)
query['from_timestamp'] = data['load-next']
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
break
i += 1
def _real_extract(self, url):
feed_id = self._match_id(url)
feed = self._call_api(
'v1/%s/%s' % (self._FEED_PATH, feed_id),
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
return self.playlist_result(
self._entries(feed['guid']), feed_id,
strip_or_none(feed.get('name')),
feed.get('briefdescription'))
class MindsChannelIE(MindsFeedBaseIE):
_FEED_TYPE = 'channel'
IE_NAME = 'minds:' + _FEED_TYPE
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
_FEED_PATH = 'channel'
_TEST = {
'url': 'https://www.minds.com/ottman',
'info_dict': {
'id': 'ottman',
'title': 'Bill Ottman',
'description': 'Co-creator & CEO @minds',
},
'playlist_mincount': 54,
}
class MindsGroupIE(MindsFeedBaseIE):
_FEED_TYPE = 'group'
IE_NAME = 'minds:' + _FEED_TYPE
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
_FEED_PATH = 'groups/group'
_TEST = {
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
'info_dict': {
'id': '785582576369672204',
'title': 'Cooking Videos',
},
'playlist_mincount': 1,
}

View File

@@ -255,6 +255,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
@staticmethod
def _extract_child_with_type(parent, t):
return next(c for c in parent['children'] if c.get('type') == t)
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
if url == '':
return
@@ -332,6 +336,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
if not mgid:
mgid = self._extract_triforce_mgid(webpage, data_zone)
if not mgid:
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self._extract_child_with_type(data, 'MainContainer')
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
mgid = video_player['props']['media']['video']['config']['uri']
return mgid
def _real_extract(self, url):
@@ -403,18 +414,6 @@ class MTVIE(MTVServicesInfoExtractor):
'only_matching': True,
}]
@staticmethod
def extract_child_with_type(parent, t):
children = parent['children']
return next(c for c in children if c.get('type') == t)
def _extract_mgid(self, webpage):
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self.extract_child_with_type(data, 'MainContainer')
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
return video_player['props']['media']['video']['config']['uri']
class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvjapan'

View File

@@ -1,104 +1,125 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import str_to_int
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
try_get,
url_or_none,
)
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
_TEST = {
'url': 'https://9gag.com/gag/ae5Ag7B',
'info_dict': {
'id': 'kXzwOKyGlSA',
'id': 'ae5Ag7B',
'ext': 'mp4',
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
'uploader': 'CompilationChannel',
'upload_date': '20131110',
'view_count': int,
},
'add_ie': ['Youtube'],
}, {
'url': 'http://9gag.com/tv/p/aKolP3',
'info_dict': {
'id': 'aKolP3',
'ext': 'mp4',
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
'uploader_id': 'rickmereki',
'uploader': 'Rick Mereki',
'upload_date': '20110803',
'view_count': int,
},
'add_ie': ['Vimeo'],
}, {
'url': 'http://9gag.com/tv/p/KklwM',
'only_matching': True,
}, {
'url': 'http://9gag.tv/p/Kk2X5',
'only_matching': True,
}, {
'url': 'http://9gag.com/tv/embed/a5Dmvl',
'only_matching': True,
}]
_EXTERNAL_VIDEO_PROVIDER = {
'1': {
'url': '%s',
'ie_key': 'Youtube',
},
'2': {
'url': 'http://player.vimeo.com/video/%s',
'ie_key': 'Vimeo',
},
'3': {
'url': 'http://instagram.com/p/%s',
'ie_key': 'Instagram',
},
'4': {
'url': 'http://vine.co/v/%s',
'ie_key': 'Vine',
},
'title': 'Capybara Agility Training',
'upload_date': '20191108',
'timestamp': 1573237208,
'categories': ['Awesome'],
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
'duration': 44,
'like_count': int,
'dislike_count': int,
'comment_count': int,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
post_id = self._match_id(url)
post = self._download_json(
'https://9gag.com/v1/post', post_id, query={
'id': post_id
})['data']['post']
webpage = self._download_webpage(url, display_id)
if post.get('type') != 'Animated':
raise ExtractorError(
'The given url does not contain a video',
expected=True)
post_view = self._parse_json(
self._search_regex(
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
webpage, 'post view'),
display_id)
title = post['title']
ie_key = None
source_url = post_view.get('sourceUrl')
if not source_url:
external_video_id = post_view['videoExternalId']
external_video_provider = post_view['videoExternalProvider']
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
title = post_view['title']
description = post_view.get('description')
view_count = str_to_int(post_view.get('externalView'))
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
duration = None
formats = []
thumbnails = []
for key, image in (post.get('images') or {}).items():
image_url = url_or_none(image.get('url'))
if not image_url:
continue
ext = determine_ext(image_url)
image_id = key.strip('image')
common = {
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
}
if ext in ('jpg', 'png'):
webp_url = image.get('webpUrl')
if webp_url:
t = common.copy()
t.update({
'id': image_id + '-webp',
'url': webp_url,
})
thumbnails.append(t)
common.update({
'id': image_id,
'ext': ext,
})
thumbnails.append(common)
elif ext in ('webm', 'mp4'):
if not duration:
duration = int_or_none(image.get('duration'))
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
for vcodec in ('vp8', 'vp9', 'h265'):
c_url = image.get(vcodec + 'Url')
if not c_url:
continue
c_f = common.copy()
c_f.update({
'format_id': image_id + '-' + vcodec,
'url': c_url,
'vcodec': vcodec,
})
formats.append(c_f)
common.update({
'ext': ext,
'format_id': image_id,
})
formats.append(common)
self._sort_formats(formats)
section = try_get(post, lambda x: x['postSection']['name'])
tags = None
post_tags = post.get('tags')
if post_tags:
tags = []
for tag in post_tags:
tag_key = tag.get('key')
if not tag_key:
continue
tags.append(tag_key)
get_count = lambda x: int_or_none(post.get(x + 'Count'))
return {
'_type': 'url_transparent',
'url': source_url,
'ie_key': ie_key,
'id': video_id,
'display_id': display_id,
'id': post_id,
'title': title,
'description': description,
'view_count': view_count,
'thumbnail': thumbnail,
'timestamp': int_or_none(post.get('creationTs')),
'duration': duration,
'formats': formats,
'thumbnails': thumbnails,
'like_count': get_count('upVote'),
'dislike_count': get_count('downVote'),
'comment_count': get_count('comments'),
'age_limit': 18 if post.get('nsfw') == 1 else None,
'categories': [section] if section else None,
'tags': tags,
}

View File

@@ -6,30 +6,40 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
extract_attributes,
get_element_by_class,
urlencode_postdata,
)
class NJPWWorldIE(InfoExtractor):
_VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
_VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
IE_DESC = '新日本プロレスワールド'
_NETRC_MACHINE = 'njpwworld'
_TEST = {
_TESTS = [{
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
'info_dict': {
'id': 's_series_00155_1_9',
'ext': 'mp4',
'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
'tags': list,
},
'params': {
'skip_download': True, # AES-encrypted m3u8
},
'skip': 'Requires login',
}
}, {
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
'info_dict': {
'id': 's_series_00563_16_bs',
'ext': 'mp4',
'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
},
'params': {
'skip_download': True,
},
}]
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
@@ -64,35 +74,27 @@ class NJPWWorldIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
formats = []
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
player = extract_attributes(mobj.group(0))
player_path = player.get('href')
if not player_path:
continue
kind = self._search_regex(
r'(low|high)$', player.get('class') or '', 'kind',
default='low')
for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
player_path = '/intent?id=%s&type=url' % vid
player_url = compat_urlparse.urljoin(url, player_path)
player_page = self._download_webpage(
player_url, video_id, note='Downloading player page')
entries = self._parse_html5_media_entries(
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
m3u8_entry_protocol='m3u8_native')
kind_formats = entries[0]['formats']
for f in kind_formats:
f['quality'] = 2 if kind == 'high' else 1
formats.extend(kind_formats)
formats.append({
'url': player_url,
'format_id': kind,
'ext': 'mp4',
'protocol': 'm3u8',
'quality': 2 if kind == 'high' else 1,
})
self._sort_formats(formats)
post_content = get_element_by_class('post-content', webpage)
tag_block = get_element_by_class('tag-block', webpage)
tags = re.findall(
r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
) if post_content else None
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
) if tag_block else None
return {
'id': video_id,
'title': self._og_search_title(webpage),
'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
'formats': formats,
'tags': tags,
}

View File

@@ -20,19 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage, url):
mgid = None
if not mgid:
mgid = self._extract_triforce_mgid(webpage)
if not mgid:
mgid = self._extract_new_triforce_mgid(webpage, url)
return mgid
# TODO Remove - Reason: Outdated Site
class ParamountNetworkIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
@@ -56,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
def _get_feed_query(self, uri):
return {
'arcEp': 'paramountnetwork.com',
'imageEp': 'paramountnetwork.com',
'mgid': uri,
}
def _extract_mgid(self, webpage, url):
root_data = self._parse_json(self._search_regex(
r'window\.__DATA__\s*=\s*({.+})',
webpage, 'data'), None)
def find_sub_data(data, data_type):
return next(c for c in data['children'] if c.get('type') == data_type)
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
return c['props']['media']['video']['config']['uri']

View File

@@ -0,0 +1,156 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
clean_podcast_url,
float_or_none,
int_or_none,
strip_or_none,
try_get,
unified_strdate,
)
class SpotifyBaseIE(InfoExtractor):
_ACCESS_TOKEN = None
_OPERATION_HASHES = {
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
}
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
def _real_initialize(self):
self._ACCESS_TOKEN = self._download_json(
'https://open.spotify.com/get_access_token', None)['accessToken']
def _call_api(self, operation, video_id, variables):
return self._download_json(
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
'operationName': 'query' + operation,
'variables': json.dumps(variables),
'extensions': json.dumps({
'persistedQuery': {
'sha256Hash': self._OPERATION_HASHES[operation],
},
})
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
def _extract_episode(self, episode, series):
episode_id = episode['id']
title = episode['name'].strip()
formats = []
audio_preview = episode.get('audioPreview') or {}
audio_preview_url = audio_preview.get('url')
if audio_preview_url:
f = {
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
'vcodec': 'none',
}
audio_preview_format = audio_preview.get('format')
if audio_preview_format:
f['format_id'] = audio_preview_format
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
if mobj:
f.update({
'abr': int(mobj.group(2)),
'ext': mobj.group(1).lower(),
})
formats.append(f)
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
item_url = item.get('url')
if not (item_url and item.get('externallyHosted')):
continue
formats.append({
'url': clean_podcast_url(item_url),
'vcodec': 'none',
})
thumbnails = []
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
source_url = source.get('url')
if not source_url:
continue
thumbnails.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
return {
'id': episode_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': strip_or_none(episode.get('description')),
'duration': float_or_none(try_get(
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
'release_date': unified_strdate(try_get(
episode, lambda x: x['releaseDate']['isoString'])),
'series': series,
}
class SpotifyIE(SpotifyBaseIE):
IE_NAME = 'spotify'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
_TEST = {
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
'info_dict': {
'id': '4Z7GAJ50bgctf6uclHlWKo',
'ext': 'mp3',
'title': 'From the archive: Why time management is ruining our lives',
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
'duration': 2083.605,
'release_date': '20201217',
'series': "The Guardian's Audio Long Reads",
}
}
def _real_extract(self, url):
episode_id = self._match_id(url)
episode = self._call_api('Episode', episode_id, {
'uri': 'spotify:episode:' + episode_id
})['episode']
return self._extract_episode(
episode, try_get(episode, lambda x: x['podcast']['name']))
class SpotifyShowIE(SpotifyBaseIE):
IE_NAME = 'spotify:show'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
_TEST = {
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
'info_dict': {
'id': '4PM9Ke6l66IRNpottHKV9M',
'title': 'The Story from the Guardian',
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
},
'playlist_mincount': 36,
}
def _real_extract(self, url):
show_id = self._match_id(url)
podcast = self._call_api('ShowEpisodes', show_id, {
'limit': 1000000000,
'offset': 0,
'uri': 'spotify:show:' + show_id,
})['podcast']
podcast_name = podcast.get('name')
entries = []
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
episode = item.get('episode')
if not episode:
continue
entries.append(self._extract_episode(episode, podcast_name))
return self.playlist_result(
entries, show_id, podcast_name, podcast.get('description'))

View File

@@ -0,0 +1,193 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
try_get,
)
class TrovoBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
def _extract_streamer_info(self, data):
streamer_info = data.get('streamerInfo') or {}
username = streamer_info.get('userName')
return {
'uploader': streamer_info.get('nickName'),
'uploader_id': str_or_none(streamer_info.get('uid')),
'uploader_url': 'https://trovo.live/' + username if username else None,
}
class TrovoIE(TrovoBaseIE):
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
def _real_extract(self, url):
username = self._match_id(url)
live_info = self._download_json(
'https://gql.trovo.live/', username, query={
'query': '''{
getLiveInfo(params: {userName: "%s"}) {
isLive
programInfo {
coverUrl
id
streamInfo {
desc
playUrl
}
title
}
streamerInfo {
nickName
uid
userName
}
}
}''' % username,
})['data']['getLiveInfo']
if live_info.get('isLive') == 0:
raise ExtractorError('%s is offline' % username, expected=True)
program_info = live_info['programInfo']
program_id = program_info['id']
title = self._live_title(program_info['title'])
formats = []
for stream_info in (program_info.get('streamInfo') or []):
play_url = stream_info.get('playUrl')
if not play_url:
continue
format_id = stream_info.get('desc')
formats.append({
'format_id': format_id,
'height': int_or_none(format_id[:-1]) if format_id else None,
'url': play_url,
})
self._sort_formats(formats)
info = {
'id': program_id,
'title': title,
'formats': formats,
'thumbnail': program_info.get('coverUrl'),
'is_live': True,
}
info.update(self._extract_streamer_info(live_info))
return info
class TrovoVodIE(TrovoBaseIE):
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
'info_dict': {
'id': 'ltv-100095501_100095501_1609596043',
'ext': 'mp4',
'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
'uploader': 'Exsl',
'timestamp': 1609640305,
'upload_date': '20210103',
'uploader_id': '100095501',
'duration': 43977,
'view_count': int,
'like_count': int,
'comment_count': int,
'comments': 'mincount:8',
'categories': ['Grand Theft Auto V'],
},
}, {
'url': 'https://trovo.live/clip/lc-5285890810184026005',
'only_matching': True,
}]
def _real_extract(self, url):
vid = self._match_id(url)
resp = self._download_json(
'https://gql.trovo.live/', vid, data=json.dumps([{
'query': '''{
batchGetVodDetailInfo(params: {vids: ["%s"]}) {
VodDetailInfos
}
}''' % vid,
}, {
'query': '''{
getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
commentList {
author {
nickName
uid
}
commentID
content
createdAt
parentID
}
}
}''' % vid,
}]).encode(), headers={
'Content-Type': 'application/json',
})
vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
vod_info = vod_detail_info['vodInfo']
title = vod_info['title']
language = vod_info.get('languageName')
formats = []
for play_info in (vod_info.get('playInfos') or []):
play_url = play_info.get('playUrl')
if not play_url:
continue
format_id = play_info.get('desc')
formats.append({
'ext': 'mp4',
'filesize': int_or_none(play_info.get('fileSize')),
'format_id': format_id,
'height': int_or_none(format_id[:-1]) if format_id else None,
'language': language,
'protocol': 'm3u8_native',
'tbr': int_or_none(play_info.get('bitrate')),
'url': play_url,
})
self._sort_formats(formats)
category = vod_info.get('categoryName')
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
comments = []
for comment in comment_list:
content = comment.get('content')
if not content:
continue
author = comment.get('author') or {}
parent = comment.get('parentID')
comments.append({
'author': author.get('nickName'),
'author_id': str_or_none(author.get('uid')),
'id': str_or_none(comment.get('commentID')),
'text': content,
'timestamp': int_or_none(comment.get('createdAt')),
'parent': 'root' if parent == 0 else str_or_none(parent),
})
info = {
'id': vid,
'title': title,
'formats': formats,
'thumbnail': vod_info.get('coverUrl'),
'timestamp': int_or_none(vod_info.get('publishTs')),
'duration': int_or_none(vod_info.get('duration')),
'view_count': get_count('watch'),
'like_count': get_count('like'),
'comment_count': get_count('comment'),
'comments': comments,
'categories': [category] if category else None,
}
info.update(self._extract_streamer_info(vod_detail_info))
return info

View File

@@ -1,12 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
unified_strdate,
HEADRequest,
int_or_none,
@@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
},
]
_FORMATS = (
(200, 416, 234),
(400, 480, 270),
(600, 640, 360),
(1200, 640, 360),
(1800, 960, 540),
(2500, 1280, 720),
)
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
@@ -97,46 +85,20 @@ class WatIE(InfoExtractor):
return red_url
return None
def remove_bitrate_limit(manifest_url):
return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
formats = []
try:
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
m3u8_url = manifest_urls.get('hls')
if m3u8_url:
m3u8_url = remove_bitrate_limit(m3u8_url)
for m3u8_alt_url in alt_urls(m3u8_url):
formats.extend(self._extract_m3u8_formats(
m3u8_alt_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
mpd_url = manifest_urls.get('mpd')
if mpd_url:
mpd_url = remove_bitrate_limit(mpd_url)
for mpd_alt_url in alt_urls(mpd_url):
formats.extend(self._extract_mpd_formats(
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
except ExtractorError:
abr = 64
for vbr, width, height in self._FORMATS:
tbr = vbr + abr
format_id = 'http-%s' % tbr
fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
if self._is_valid_url(fmt_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': fmt_url,
'vbr': vbr,
'abr': abr,
'width': width,
'height': height,
})
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
m3u8_url = manifest_urls.get('hls')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
mpd_url = manifest_urls.get('mpd')
if mpd_url:
formats.extend(self._extract_mpd_formats(
mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
upload_date = unified_strdate(date_diffusion) if date_diffusion else None

View File

@@ -177,46 +177,9 @@ class YahooIE(InfoExtractor):
'only_matching': True,
}]
def _real_extract(self, url):
url, country, display_id = re.match(self._VALID_URL, url).groups()
if not country:
country = 'us'
else:
country = country.split('-')[0]
api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
content = self._download_json(
api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
display_id, 'Downloading content JSON metadata', fatal=i == 1)
if content:
item = content['items'][0]
break
if item.get('type') != 'video':
entries = []
cover = item.get('cover') or {}
if cover.get('type') == 'yvideo':
cover_url = cover.get('url')
if cover_url:
entries.append(self.url_result(
cover_url, 'Yahoo', cover.get('uuid')))
for e in item.get('body', []):
if e.get('type') == 'videoIframe':
iframe_url = e.get('url')
if not iframe_url:
continue
entries.append(self.url_result(iframe_url))
return self.playlist_result(
entries, item.get('uuid'),
item.get('title'), item.get('summary'))
video_id = item['uuid']
def _extract_yahoo_video(self, video_id, country):
video = self._download_json(
api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
video_id, 'Downloading video JSON metadata')[0]
title = video['title']
@@ -298,7 +261,6 @@ class YahooIE(InfoExtractor):
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
'display_id': display_id,
'thumbnails': thumbnails,
'description': clean_html(video.get('description')),
'timestamp': parse_iso8601(video.get('publish_time')),
@@ -311,6 +273,44 @@ class YahooIE(InfoExtractor):
'episode_number': int_or_none(series_info.get('episode_number')),
}
def _real_extract(self, url):
url, country, display_id = re.match(self._VALID_URL, url).groups()
if not country:
country = 'us'
else:
country = country.split('-')[0]
item = self._download_json(
'https://%s.yahoo.com/caas/content/article' % country, display_id,
'Downloading content JSON metadata', query={
'url': url
})['items'][0]['data']['partnerData']
if item.get('type') != 'video':
entries = []
cover = item.get('cover') or {}
if cover.get('type') == 'yvideo':
cover_url = cover.get('url')
if cover_url:
entries.append(self.url_result(
cover_url, 'Yahoo', cover.get('uuid')))
for e in (item.get('body') or []):
if e.get('type') == 'videoIframe':
iframe_url = e.get('url')
if not iframe_url:
continue
entries.append(self.url_result(iframe_url))
return self.playlist_result(
entries, item.get('uuid'),
item.get('title'), item.get('summary'))
info = self._extract_yahoo_video(item['uuid'], country)
info['display_id'] = display_id
return info
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'

View File

@@ -842,6 +842,10 @@ def parseOpts(overrideArguments=None):
'-o', '--output',
dest='outtmpl', metavar='TEMPLATE',
help='Output filename template, see "OUTPUT TEMPLATE" for details')
filesystem.add_option(
'--output-na-placeholder',
dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER', type=int,
@@ -997,7 +1001,7 @@ def parseOpts(overrideArguments=None):
postproc.add_option(
'-x', '--extract-audio',
action='store_true', dest='extractaudio', default=False,
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')