mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-09-27 01:00:16 +00:00
Update to ytdl-2021.01.24.1
This commit is contained in:
@@ -181,9 +181,12 @@ class YoutubeDL(object):
|
||||
allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
|
||||
allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
|
||||
outtmpl: Template for output names.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names.
|
||||
trim_file_name: Limit length of filename (extension excluded).
|
||||
ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
|
||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
trim_file_name: Limit length of filename (extension excluded)
|
||||
ignoreerrors: Do not stop on download errors
|
||||
(Default True when running youtube-dlc,
|
||||
but False when directly accessing YoutubeDL class)
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
overwrites: Overwrite all video and metadata files if True,
|
||||
overwrite only non-video files if None
|
||||
@@ -741,7 +744,7 @@ class YoutubeDL(object):
|
||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
|
||||
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
|
||||
@@ -761,8 +764,8 @@ class YoutubeDL(object):
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string 'NA' is returned for missing fields. We will patch output
|
||||
# template for missing fields to meet string presentation type.
|
||||
# string NA placeholder is returned for missing fields. We will patch
|
||||
# output template for missing fields to meet string presentation type.
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
if numeric_field not in template_dict:
|
||||
# As of [1] format syntax is:
|
||||
|
@@ -373,6 +373,7 @@ def _real_main(argv=None):
|
||||
'listformats': opts.listformats,
|
||||
'listformats_table': opts.listformats_table,
|
||||
'outtmpl': outtmpl,
|
||||
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
|
||||
'paths': opts.paths,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'autonumber_start': opts.autonumber_start,
|
||||
|
@@ -256,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
|
||||
'title': 'Ancient Aliens',
|
||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||
},
|
||||
'playlist_mincount': 168,
|
||||
'playlist_mincount': 150,
|
||||
}]
|
||||
_RESOURCE = 'series'
|
||||
_ITEMS_KEY = 'episodes'
|
||||
|
@@ -1,13 +1,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
|
||||
'info_dict': {
|
||||
'id': '3792260579001',
|
||||
'ext': 'mp4',
|
||||
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
post_type, name = re.match(self._VALID_URL, url).groups()
|
||||
post_type = {
|
||||
'features': 'post',
|
||||
'program': 'episode',
|
||||
'videos': 'video',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
'https://www.aljazeera.com/graphql', name, query={
|
||||
'operationName': 'SingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': name,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
'wp-site': 'aje',
|
||||
})['data']['article']['video']
|
||||
video_id = video['id']
|
||||
account_id = video.get('accountId') or '665003303001'
|
||||
player_id = video.get('playerId') or 'BkeSH5BDb'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
|
@@ -1,13 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'timestamp': 1523318400,
|
||||
'upload_date': '20180410',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
@@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
|
||||
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
|
||||
'md5': '06451608c57651e985a498e69cec17e5',
|
||||
'info_dict': {
|
||||
'id': '5fbe8c61bda2010001c6763b',
|
||||
'title': 'Simple Chicken Dinner',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1610755200,
|
||||
'upload_date': '20210116',
|
||||
'release_date': '20210116',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 21,
|
||||
'episode': 'Simple Chicken Dinner',
|
||||
'episode_number': 3,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
@@ -60,7 +84,76 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
'release_date': unified_strdate(video.get('publishDate')),
|
||||
'episode_number': int_or_none(episode.get('number')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'series': try_get(episode, lambda x: x['show']['title']),
|
||||
'episode': episode.get('title'),
|
||||
}
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
'info_dict': {
|
||||
'id': 'season_1',
|
||||
'title': 'Season 1',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# Cooks Country Season
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'info_dict': {
|
||||
'id': 'season_12',
|
||||
'title': 'Season 12',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name, season_number = re.match(self._VALID_URL, url).groups()
|
||||
season_number = int(season_number)
|
||||
|
||||
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
'Origin': 'https://www.%s.com' % show_name,
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps([
|
||||
'search_season_list:' + season,
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode in (season_search.get('hits') or []):
|
||||
search_url = episode.get('search_url')
|
||||
if not search_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), 'season_%d' % season_number, season)
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .yahoo import YahooIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -15,9 +15,9 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
class AolIE(YahooIE):
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Yahoo video
|
||||
'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if '-' in video_id:
|
||||
return self._extract_yahoo_video(video_id, 'us')
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
|
@@ -226,13 +226,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._html_search_regex(
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
@@ -289,18 +289,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
# available till 7.01.2022
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||
'info_dict': {
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'display_id': 'maischberger-die-woche',
|
||||
'id': '100',
|
||||
'ext': 'mp4',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'duration': 3687.0,
|
||||
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||
'upload_date': '20210107',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
@@ -355,17 +355,17 @@ class ARDIE(InfoExtractor):
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': '70153354',
|
||||
'id': '78566716',
|
||||
'title': 'Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
||||
'timestamp': 1577047500,
|
||||
'upload_date': '20191222',
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
|
||||
'timestamp': 1596658200,
|
||||
'upload_date': '20200805',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
|
@@ -1,142 +1,51 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
|
||||
/(?P<title>.*)'''
|
||||
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
|
||||
'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
|
||||
'md5': 'b8acb347177c680ff18a292aa2166f80',
|
||||
'info_dict': {
|
||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
|
||||
'ext': 'mp4',
|
||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
'timestamp': 1376798400,
|
||||
'upload_date': '20130818',
|
||||
'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
|
||||
'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
|
||||
'timestamp': 1598670000,
|
||||
'upload_date': '20200829',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(?:full-episodes|shows(?=/[^/]+/full-episodes))
|
||||
/(?P<id>[^?]+)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
|
||||
'info_dict': {
|
||||
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
|
||||
'title': 'November 28, 2016 - Ryan Speedo Green',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
return videos_info
|
||||
|
||||
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'info_dict': {
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'title': 'Twitter Users Share Summer Plans',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||
'info_dict': {
|
||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
# It's really reported to be published on year 2077
|
||||
'upload_date': '20770610',
|
||||
'timestamp': 3390510600,
|
||||
'subtitles': {
|
||||
'en': 'mincount:3',
|
||||
},
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||
'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||
'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
|
||||
'ext': 'mp4',
|
||||
'title': 'Josh Investigates',
|
||||
'description': 'Steht uns das Ende der Welt bevor?',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mrss_url = self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
||||
|
||||
class ComedyCentralShortnameIE(InfoExtractor):
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
||||
_TESTS = [{
|
||||
'url': ':tds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':thedailyshow',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':theopposition',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
shortcut_map = {
|
||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'accountOverride': 'intl.mtvi.com',
|
||||
'arcEp': 'web.cc.tv',
|
||||
'ep': 'b9032c3a',
|
||||
'imageEp': 'web.cc.tv',
|
||||
'mgid': uri,
|
||||
}
|
||||
return self.url_result(shortcut_map[video_id])
|
||||
|
@@ -50,7 +50,10 @@ from .animelab import (
|
||||
AnimeLabIE,
|
||||
AnimeLabShowsIE,
|
||||
)
|
||||
from .americastestkitchen import AmericasTestKitchenIE
|
||||
from .americastestkitchen import (
|
||||
AmericasTestKitchenIE,
|
||||
AmericasTestKitchenSeasonIE,
|
||||
)
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
@@ -244,11 +247,8 @@ from .cnn import (
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralFullEpisodesIE,
|
||||
ComedyCentralIE,
|
||||
ComedyCentralShortnameIE,
|
||||
ComedyCentralTVIE,
|
||||
ToshIE,
|
||||
)
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import (
|
||||
@@ -682,6 +682,11 @@ from .mildom import (
|
||||
MildomVodIE,
|
||||
MildomUserVodIE,
|
||||
)
|
||||
from .minds import (
|
||||
MindsIE,
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
)
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
@@ -1162,6 +1167,10 @@ from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .spotify import (
|
||||
SpotifyIE,
|
||||
SpotifyShowIE,
|
||||
)
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
@@ -1270,7 +1279,10 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovolive import TrovoLiveIE
|
||||
from .trovo import (
|
||||
TrovoIE,
|
||||
TrovoVodIE,
|
||||
)
|
||||
from .trunews import TruNewsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||
'info_dict': {
|
||||
'id': 'rendez-vous-au-pays-des-geeks',
|
||||
@@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140301',
|
||||
'timestamp': 1393642916,
|
||||
'timestamp': 1393700400,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# no thumbnail
|
||||
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
|
||||
</h1>|
|
||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||
).*?
|
||||
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
||||
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
||||
''',
|
||||
webpage, 'video data'))
|
||||
|
||||
video_url = video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
|
||||
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
webpage, 'thumbnail', default=None)
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', default=None)
|
||||
@@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if ext == 'mp3' else None,
|
||||
'uploader': uploader,
|
||||
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
|
||||
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
||||
'duration': int_or_none(video_data.get('data-duration')),
|
||||
}
|
||||
|
@@ -5,7 +5,10 @@ import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -131,6 +134,9 @@ class LBRYIE(LBRYBaseIE):
|
||||
}, {
|
||||
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -139,6 +145,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
display_id = display_id.replace(':', '#')
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
result_value = result['value']
|
||||
|
196
youtube_dlc/extractor/minds.py
Normal file
196
youtube_dlc/extractor/minds.py
Normal file
@@ -0,0 +1,196 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MindsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
api_url = 'https://www.minds.com/api/' + path
|
||||
token = self._get_cookies(api_url).get('XSRF-TOKEN')
|
||||
return self._download_json(
|
||||
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
|
||||
'Referer': 'https://www.minds.com/',
|
||||
'X-XSRF-TOKEN': token.value if token else '',
|
||||
}, query=query)
|
||||
|
||||
|
||||
class MindsIE(MindsBaseIE):
|
||||
IE_NAME = 'minds'
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.minds.com/media/100000000000086822',
|
||||
'md5': '215a658184a419764852239d4970b045',
|
||||
'info_dict': {
|
||||
'id': '100000000000086822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minds intro sequence',
|
||||
'thumbnail': r're:https?://.+\.png',
|
||||
'uploader_id': 'ottman',
|
||||
'upload_date': '20130524',
|
||||
'timestamp': 1369404826,
|
||||
'uploader': 'Bill Ottman',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'tags': ['animation'],
|
||||
'comment_count': int,
|
||||
'license': 'attribution-cc',
|
||||
},
|
||||
}, {
|
||||
# entity.type == 'activity' and empty title
|
||||
'url': 'https://www.minds.com/newsfeed/798025111988506624',
|
||||
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
|
||||
'info_dict': {
|
||||
'id': '798022190320226304',
|
||||
'ext': 'mp4',
|
||||
'title': '798022190320226304',
|
||||
'uploader': 'ColinFlaherty',
|
||||
'upload_date': '20180111',
|
||||
'timestamp': 1515639316,
|
||||
'uploader_id': 'ColinFlaherty',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.minds.com/archive/view/715172106794442752',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# youtube perma_url
|
||||
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._call_api(
|
||||
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
|
||||
if entity.get('type') == 'activity':
|
||||
if entity.get('custom_type') == 'video':
|
||||
video_id = entity['entity_guid']
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
'v2/media/video/' + video_id, video_id, 'video')
|
||||
|
||||
formats = []
|
||||
for source in (video.get('sources') or []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': source.get('label'),
|
||||
'height': int_or_none(source.get('size')),
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entity = video.get('entity') or entity
|
||||
owner = entity.get('ownerObj') or {}
|
||||
uploader_id = owner.get('username')
|
||||
|
||||
tags = entity.get('tags')
|
||||
if tags and isinstance(tags, compat_str):
|
||||
tags = [tags]
|
||||
|
||||
thumbnail = None
|
||||
poster = video.get('poster') or entity.get('thumbnail_src')
|
||||
if poster:
|
||||
urlh = self._request_webpage(poster, video_id, fatal=False)
|
||||
if urlh:
|
||||
thumbnail = urlh.geturl()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': entity.get('title') or video_id,
|
||||
'formats': formats,
|
||||
'description': clean_html(entity.get('description')) or None,
|
||||
'license': str_or_none(entity.get('license')),
|
||||
'timestamp': int_or_none(entity.get('time_created')),
|
||||
'uploader': strip_or_none(owner.get('name')),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
|
||||
'view_count': int_or_none(entity.get('play:count')),
|
||||
'like_count': int_or_none(entity.get('thumbs:up:count')),
|
||||
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
|
||||
'tags': tags,
|
||||
'comment_count': int_or_none(entity.get('comments:count')),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class MindsFeedBaseIE(MindsBaseIE):
|
||||
_PAGE_SIZE = 150
|
||||
|
||||
def _entries(self, feed_id):
|
||||
query = {'limit': self._PAGE_SIZE, 'sync': 1}
|
||||
i = 1
|
||||
while True:
|
||||
data = self._call_api(
|
||||
'v2/feeds/container/%s/videos' % feed_id,
|
||||
feed_id, 'page %s' % i, query)
|
||||
entities = data.get('entities') or []
|
||||
for entity in entities:
|
||||
guid = entity.get('guid')
|
||||
if not guid:
|
||||
continue
|
||||
yield self.url_result(
|
||||
'https://www.minds.com/newsfeed/' + guid,
|
||||
MindsIE.ie_key(), guid)
|
||||
query['from_timestamp'] = data['load-next']
|
||||
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
|
||||
break
|
||||
i += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_id = self._match_id(url)
|
||||
feed = self._call_api(
|
||||
'v1/%s/%s' % (self._FEED_PATH, feed_id),
|
||||
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(feed['guid']), feed_id,
|
||||
strip_or_none(feed.get('name')),
|
||||
feed.get('briefdescription'))
|
||||
|
||||
|
||||
class MindsChannelIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'channel'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
|
||||
_FEED_PATH = 'channel'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/ottman',
|
||||
'info_dict': {
|
||||
'id': 'ottman',
|
||||
'title': 'Bill Ottman',
|
||||
'description': 'Co-creator & CEO @minds',
|
||||
},
|
||||
'playlist_mincount': 54,
|
||||
}
|
||||
|
||||
|
||||
class MindsGroupIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'group'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
|
||||
_FEED_PATH = 'groups/group'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
|
||||
'info_dict': {
|
||||
'id': '785582576369672204',
|
||||
'title': 'Cooking Videos',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}
|
@@ -255,6 +255,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
||||
|
||||
@staticmethod
|
||||
def _extract_child_with_type(parent, t):
|
||||
return next(c for c in parent['children'] if c.get('type') == t)
|
||||
|
||||
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
|
||||
if url == '':
|
||||
return
|
||||
@@ -332,6 +336,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if not mgid:
|
||||
mgid = self._extract_triforce_mgid(webpage, data_zone)
|
||||
|
||||
if not mgid:
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
||||
mgid = video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
return mgid
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -403,18 +414,6 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def extract_child_with_type(parent, t):
|
||||
children = parent['children']
|
||||
return next(c for c in children if c.get('type') == t)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||
return video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
|
||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtvjapan'
|
||||
|
@@ -1,104 +1,125 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
_TEST = {
|
||||
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||
'info_dict': {
|
||||
'id': 'kXzwOKyGlSA',
|
||||
'id': 'ae5Ag7B',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
||||
'uploader': 'CompilationChannel',
|
||||
'upload_date': '20131110',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
||||
'info_dict': {
|
||||
'id': 'aKolP3',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
||||
'uploader_id': 'rickmereki',
|
||||
'uploader': 'Rick Mereki',
|
||||
'upload_date': '20110803',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/KklwM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/Kk2X5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_EXTERNAL_VIDEO_PROVIDER = {
|
||||
'1': {
|
||||
'url': '%s',
|
||||
'ie_key': 'Youtube',
|
||||
},
|
||||
'2': {
|
||||
'url': 'http://player.vimeo.com/video/%s',
|
||||
'ie_key': 'Vimeo',
|
||||
},
|
||||
'3': {
|
||||
'url': 'http://instagram.com/p/%s',
|
||||
'ie_key': 'Instagram',
|
||||
},
|
||||
'4': {
|
||||
'url': 'http://vine.co/v/%s',
|
||||
'ie_key': 'Vine',
|
||||
},
|
||||
'title': 'Capybara Agility Training',
|
||||
'upload_date': '20191108',
|
||||
'timestamp': 1573237208,
|
||||
'categories': ['Awesome'],
|
||||
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
|
||||
'duration': 44,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
post_id = self._match_id(url)
|
||||
post = self._download_json(
|
||||
'https://9gag.com/v1/post', post_id, query={
|
||||
'id': post_id
|
||||
})['data']['post']
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if post.get('type') != 'Animated':
|
||||
raise ExtractorError(
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
post_view = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
||||
webpage, 'post view'),
|
||||
display_id)
|
||||
title = post['title']
|
||||
|
||||
ie_key = None
|
||||
source_url = post_view.get('sourceUrl')
|
||||
if not source_url:
|
||||
external_video_id = post_view['videoExternalId']
|
||||
external_video_provider = post_view['videoExternalProvider']
|
||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
||||
title = post_view['title']
|
||||
description = post_view.get('description')
|
||||
view_count = str_to_int(post_view.get('externalView'))
|
||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||
duration = None
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for key, image in (post.get('images') or {}).items():
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
ext = determine_ext(image_url)
|
||||
image_id = key.strip('image')
|
||||
common = {
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
}
|
||||
if ext in ('jpg', 'png'):
|
||||
webp_url = image.get('webpUrl')
|
||||
if webp_url:
|
||||
t = common.copy()
|
||||
t.update({
|
||||
'id': image_id + '-webp',
|
||||
'url': webp_url,
|
||||
})
|
||||
thumbnails.append(t)
|
||||
common.update({
|
||||
'id': image_id,
|
||||
'ext': ext,
|
||||
})
|
||||
thumbnails.append(common)
|
||||
elif ext in ('webm', 'mp4'):
|
||||
if not duration:
|
||||
duration = int_or_none(image.get('duration'))
|
||||
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
|
||||
for vcodec in ('vp8', 'vp9', 'h265'):
|
||||
c_url = image.get(vcodec + 'Url')
|
||||
if not c_url:
|
||||
continue
|
||||
c_f = common.copy()
|
||||
c_f.update({
|
||||
'format_id': image_id + '-' + vcodec,
|
||||
'url': c_url,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
formats.append(c_f)
|
||||
common.update({
|
||||
'ext': ext,
|
||||
'format_id': image_id,
|
||||
})
|
||||
formats.append(common)
|
||||
self._sort_formats(formats)
|
||||
|
||||
section = try_get(post, lambda x: x['postSection']['name'])
|
||||
|
||||
tags = None
|
||||
post_tags = post.get('tags')
|
||||
if post_tags:
|
||||
tags = []
|
||||
for tag in post_tags:
|
||||
tag_key = tag.get('key')
|
||||
if not tag_key:
|
||||
continue
|
||||
tags.append(tag_key)
|
||||
|
||||
get_count = lambda x: int_or_none(post.get(x + 'Count'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': source_url,
|
||||
'ie_key': ie_key,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'id': post_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': int_or_none(post.get('creationTs')),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'like_count': get_count('upVote'),
|
||||
'dislike_count': get_count('downVote'),
|
||||
'comment_count': get_count('comments'),
|
||||
'age_limit': 18 if post.get('nsfw') == 1 else None,
|
||||
'categories': [section] if section else None,
|
||||
'tags': tags,
|
||||
}
|
||||
|
@@ -6,30 +6,40 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class NJPWWorldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
||||
IE_DESC = '新日本プロレスワールド'
|
||||
_NETRC_MACHINE = 'njpwworld'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
|
||||
'info_dict': {
|
||||
'id': 's_series_00155_1_9',
|
||||
'ext': 'mp4',
|
||||
'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
|
||||
'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # AES-encrypted m3u8
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
|
||||
'info_dict': {
|
||||
'id': 's_series_00563_16_bs',
|
||||
'ext': 'mp4',
|
||||
'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
|
||||
'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
||||
|
||||
@@ -64,35 +74,27 @@ class NJPWWorldIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
|
||||
player = extract_attributes(mobj.group(0))
|
||||
player_path = player.get('href')
|
||||
if not player_path:
|
||||
continue
|
||||
kind = self._search_regex(
|
||||
r'(low|high)$', player.get('class') or '', 'kind',
|
||||
default='low')
|
||||
for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
|
||||
player_path = '/intent?id=%s&type=url' % vid
|
||||
player_url = compat_urlparse.urljoin(url, player_path)
|
||||
player_page = self._download_webpage(
|
||||
player_url, video_id, note='Downloading player page')
|
||||
entries = self._parse_html5_media_entries(
|
||||
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
||||
m3u8_entry_protocol='m3u8_native')
|
||||
kind_formats = entries[0]['formats']
|
||||
for f in kind_formats:
|
||||
f['quality'] = 2 if kind == 'high' else 1
|
||||
formats.extend(kind_formats)
|
||||
formats.append({
|
||||
'url': player_url,
|
||||
'format_id': kind,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'quality': 2 if kind == 'high' else 1,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
post_content = get_element_by_class('post-content', webpage)
|
||||
tag_block = get_element_by_class('tag-block', webpage)
|
||||
tags = re.findall(
|
||||
r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
|
||||
) if post_content else None
|
||||
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
|
||||
) if tag_block else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
|
@@ -20,19 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
|
||||
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _extract_mgid(self, webpage, url):
|
||||
mgid = None
|
||||
|
||||
if not mgid:
|
||||
mgid = self._extract_triforce_mgid(webpage)
|
||||
|
||||
if not mgid:
|
||||
mgid = self._extract_new_triforce_mgid(webpage, url)
|
||||
|
||||
return mgid
|
||||
|
||||
# TODO Remove - Reason: Outdated Site
|
||||
|
||||
|
||||
class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
|
||||
@@ -56,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'arcEp': 'paramountnetwork.com',
|
||||
'imageEp': 'paramountnetwork.com',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage, url):
|
||||
root_data = self._parse_json(self._search_regex(
|
||||
r'window\.__DATA__\s*=\s*({.+})',
|
||||
webpage, 'data'), None)
|
||||
|
||||
def find_sub_data(data, data_type):
|
||||
return next(c for c in data['children'] if c.get('type') == data_type)
|
||||
|
||||
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
|
||||
return c['props']['media']['video']['config']['uri']
|
||||
|
156
youtube_dlc/extractor/spotify.py
Normal file
156
youtube_dlc/extractor/spotify.py
Normal file
@@ -0,0 +1,156 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyBaseIE(InfoExtractor):
|
||||
_ACCESS_TOKEN = None
|
||||
_OPERATION_HASHES = {
|
||||
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
|
||||
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
|
||||
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
|
||||
}
|
||||
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ACCESS_TOKEN = self._download_json(
|
||||
'https://open.spotify.com/get_access_token', None)['accessToken']
|
||||
|
||||
def _call_api(self, operation, video_id, variables):
|
||||
return self._download_json(
|
||||
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
|
||||
'operationName': 'query' + operation,
|
||||
'variables': json.dumps(variables),
|
||||
'extensions': json.dumps({
|
||||
'persistedQuery': {
|
||||
'sha256Hash': self._OPERATION_HASHES[operation],
|
||||
},
|
||||
})
|
||||
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
|
||||
|
||||
def _extract_episode(self, episode, series):
|
||||
episode_id = episode['id']
|
||||
title = episode['name'].strip()
|
||||
|
||||
formats = []
|
||||
audio_preview = episode.get('audioPreview') or {}
|
||||
audio_preview_url = audio_preview.get('url')
|
||||
if audio_preview_url:
|
||||
f = {
|
||||
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
audio_preview_format = audio_preview.get('format')
|
||||
if audio_preview_format:
|
||||
f['format_id'] = audio_preview_format
|
||||
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
|
||||
if mobj:
|
||||
f.update({
|
||||
'abr': int(mobj.group(2)),
|
||||
'ext': mobj.group(1).lower(),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
|
||||
item_url = item.get('url')
|
||||
if not (item_url and item.get('externallyHosted')):
|
||||
continue
|
||||
formats.append({
|
||||
'url': clean_podcast_url(item_url),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'duration': float_or_none(try_get(
|
||||
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
|
||||
'release_date': unified_strdate(try_get(
|
||||
episode, lambda x: x['releaseDate']['isoString'])),
|
||||
'series': series,
|
||||
}
|
||||
|
||||
|
||||
class SpotifyIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
|
||||
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
|
||||
'info_dict': {
|
||||
'id': '4Z7GAJ50bgctf6uclHlWKo',
|
||||
'ext': 'mp3',
|
||||
'title': 'From the archive: Why time management is ruining our lives',
|
||||
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
|
||||
'duration': 2083.605,
|
||||
'release_date': '20201217',
|
||||
'series': "The Guardian's Audio Long Reads",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('Episode', episode_id, {
|
||||
'uri': 'spotify:episode:' + episode_id
|
||||
})['episode']
|
||||
return self._extract_episode(
|
||||
episode, try_get(episode, lambda x: x['podcast']['name']))
|
||||
|
||||
|
||||
class SpotifyShowIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify:show'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
|
||||
'info_dict': {
|
||||
'id': '4PM9Ke6l66IRNpottHKV9M',
|
||||
'title': 'The Story from the Guardian',
|
||||
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
podcast = self._call_api('ShowEpisodes', show_id, {
|
||||
'limit': 1000000000,
|
||||
'offset': 0,
|
||||
'uri': 'spotify:show:' + show_id,
|
||||
})['podcast']
|
||||
podcast_name = podcast.get('name')
|
||||
|
||||
entries = []
|
||||
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
|
||||
episode = item.get('episode')
|
||||
if not episode:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, podcast_name))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, podcast_name, podcast.get('description'))
|
193
youtube_dlc/extractor/trovo.py
Normal file
193
youtube_dlc/extractor/trovo.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TrovoBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
|
||||
|
||||
def _extract_streamer_info(self, data):
|
||||
streamer_info = data.get('streamerInfo') or {}
|
||||
username = streamer_info.get('userName')
|
||||
return {
|
||||
'uploader': streamer_info.get('nickName'),
|
||||
'uploader_id': str_or_none(streamer_info.get('uid')),
|
||||
'uploader_url': 'https://trovo.live/' + username if username else None,
|
||||
}
|
||||
|
||||
|
||||
class TrovoIE(TrovoBaseIE):
|
||||
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
live_info = self._download_json(
|
||||
'https://gql.trovo.live/', username, query={
|
||||
'query': '''{
|
||||
getLiveInfo(params: {userName: "%s"}) {
|
||||
isLive
|
||||
programInfo {
|
||||
coverUrl
|
||||
id
|
||||
streamInfo {
|
||||
desc
|
||||
playUrl
|
||||
}
|
||||
title
|
||||
}
|
||||
streamerInfo {
|
||||
nickName
|
||||
uid
|
||||
userName
|
||||
}
|
||||
}
|
||||
}''' % username,
|
||||
})['data']['getLiveInfo']
|
||||
if live_info.get('isLive') == 0:
|
||||
raise ExtractorError('%s is offline' % username, expected=True)
|
||||
program_info = live_info['programInfo']
|
||||
program_id = program_info['id']
|
||||
title = self._live_title(program_info['title'])
|
||||
|
||||
formats = []
|
||||
for stream_info in (program_info.get('streamInfo') or []):
|
||||
play_url = stream_info.get('playUrl')
|
||||
if not play_url:
|
||||
continue
|
||||
format_id = stream_info.get('desc')
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||
'url': play_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': program_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': program_info.get('coverUrl'),
|
||||
'is_live': True,
|
||||
}
|
||||
info.update(self._extract_streamer_info(live_info))
|
||||
return info
|
||||
|
||||
|
||||
class TrovoVodIE(TrovoBaseIE):
|
||||
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
|
||||
'info_dict': {
|
||||
'id': 'ltv-100095501_100095501_1609596043',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
|
||||
'uploader': 'Exsl',
|
||||
'timestamp': 1609640305,
|
||||
'upload_date': '20210103',
|
||||
'uploader_id': '100095501',
|
||||
'duration': 43977,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': 'mincount:8',
|
||||
'categories': ['Grand Theft Auto V'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://trovo.live/clip/lc-5285890810184026005',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
vid = self._match_id(url)
|
||||
resp = self._download_json(
|
||||
'https://gql.trovo.live/', vid, data=json.dumps([{
|
||||
'query': '''{
|
||||
batchGetVodDetailInfo(params: {vids: ["%s"]}) {
|
||||
VodDetailInfos
|
||||
}
|
||||
}''' % vid,
|
||||
}, {
|
||||
'query': '''{
|
||||
getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
|
||||
commentList {
|
||||
author {
|
||||
nickName
|
||||
uid
|
||||
}
|
||||
commentID
|
||||
content
|
||||
createdAt
|
||||
parentID
|
||||
}
|
||||
}
|
||||
}''' % vid,
|
||||
}]).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
|
||||
vod_info = vod_detail_info['vodInfo']
|
||||
title = vod_info['title']
|
||||
|
||||
language = vod_info.get('languageName')
|
||||
formats = []
|
||||
for play_info in (vod_info.get('playInfos') or []):
|
||||
play_url = play_info.get('playUrl')
|
||||
if not play_url:
|
||||
continue
|
||||
format_id = play_info.get('desc')
|
||||
formats.append({
|
||||
'ext': 'mp4',
|
||||
'filesize': int_or_none(play_info.get('fileSize')),
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||
'language': language,
|
||||
'protocol': 'm3u8_native',
|
||||
'tbr': int_or_none(play_info.get('bitrate')),
|
||||
'url': play_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
category = vod_info.get('categoryName')
|
||||
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
|
||||
|
||||
comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
|
||||
comments = []
|
||||
for comment in comment_list:
|
||||
content = comment.get('content')
|
||||
if not content:
|
||||
continue
|
||||
author = comment.get('author') or {}
|
||||
parent = comment.get('parentID')
|
||||
comments.append({
|
||||
'author': author.get('nickName'),
|
||||
'author_id': str_or_none(author.get('uid')),
|
||||
'id': str_or_none(comment.get('commentID')),
|
||||
'text': content,
|
||||
'timestamp': int_or_none(comment.get('createdAt')),
|
||||
'parent': 'root' if parent == 0 else str_or_none(parent),
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': vid,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': vod_info.get('coverUrl'),
|
||||
'timestamp': int_or_none(vod_info.get('publishTs')),
|
||||
'duration': int_or_none(vod_info.get('duration')),
|
||||
'view_count': get_count('watch'),
|
||||
'like_count': get_count('like'),
|
||||
'comment_count': get_count('comment'),
|
||||
'comments': comments,
|
||||
'categories': [category] if category else None,
|
||||
}
|
||||
info.update(self._extract_streamer_info(vod_detail_info))
|
||||
return info
|
@@ -1,12 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
@@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
_FORMATS = (
|
||||
(200, 416, 234),
|
||||
(400, 480, 270),
|
||||
(600, 640, 360),
|
||||
(1200, 640, 360),
|
||||
(1800, 960, 540),
|
||||
(2500, 1280, 720),
|
||||
)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
||||
@@ -97,46 +85,20 @@ class WatIE(InfoExtractor):
|
||||
return red_url
|
||||
return None
|
||||
|
||||
def remove_bitrate_limit(manifest_url):
|
||||
return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
|
||||
|
||||
formats = []
|
||||
try:
|
||||
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
|
||||
manifest_urls = self._download_json(
|
||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
||||
m3u8_url = manifest_urls.get('hls')
|
||||
if m3u8_url:
|
||||
m3u8_url = remove_bitrate_limit(m3u8_url)
|
||||
for m3u8_alt_url in alt_urls(m3u8_url):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_alt_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
mpd_url = manifest_urls.get('mpd')
|
||||
if mpd_url:
|
||||
mpd_url = remove_bitrate_limit(mpd_url)
|
||||
for mpd_alt_url in alt_urls(mpd_url):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
except ExtractorError:
|
||||
abr = 64
|
||||
for vbr, width, height in self._FORMATS:
|
||||
tbr = vbr + abr
|
||||
format_id = 'http-%s' % tbr
|
||||
fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
|
||||
if self._is_valid_url(fmt_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': fmt_url,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
manifest_urls = self._download_json(
|
||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
||||
m3u8_url = manifest_urls.get('hls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
mpd_url = manifest_urls.get('mpd')
|
||||
if mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
|
||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||
|
@@ -177,46 +177,9 @@ class YahooIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, country, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not country:
|
||||
country = 'us'
|
||||
else:
|
||||
country = country.split('-')[0]
|
||||
api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
|
||||
|
||||
for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
|
||||
content = self._download_json(
|
||||
api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
|
||||
display_id, 'Downloading content JSON metadata', fatal=i == 1)
|
||||
if content:
|
||||
item = content['items'][0]
|
||||
break
|
||||
|
||||
if item.get('type') != 'video':
|
||||
entries = []
|
||||
|
||||
cover = item.get('cover') or {}
|
||||
if cover.get('type') == 'yvideo':
|
||||
cover_url = cover.get('url')
|
||||
if cover_url:
|
||||
entries.append(self.url_result(
|
||||
cover_url, 'Yahoo', cover.get('uuid')))
|
||||
|
||||
for e in item.get('body', []):
|
||||
if e.get('type') == 'videoIframe':
|
||||
iframe_url = e.get('url')
|
||||
if not iframe_url:
|
||||
continue
|
||||
entries.append(self.url_result(iframe_url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, item.get('uuid'),
|
||||
item.get('title'), item.get('summary'))
|
||||
|
||||
video_id = item['uuid']
|
||||
def _extract_yahoo_video(self, video_id, country):
|
||||
video = self._download_json(
|
||||
api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
|
||||
'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
|
||||
video_id, 'Downloading video JSON metadata')[0]
|
||||
title = video['title']
|
||||
|
||||
@@ -298,7 +261,6 @@ class YahooIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'formats': formats,
|
||||
'display_id': display_id,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': parse_iso8601(video.get('publish_time')),
|
||||
@@ -311,6 +273,44 @@ class YahooIE(InfoExtractor):
|
||||
'episode_number': int_or_none(series_info.get('episode_number')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, country, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not country:
|
||||
country = 'us'
|
||||
else:
|
||||
country = country.split('-')[0]
|
||||
|
||||
item = self._download_json(
|
||||
'https://%s.yahoo.com/caas/content/article' % country, display_id,
|
||||
'Downloading content JSON metadata', query={
|
||||
'url': url
|
||||
})['items'][0]['data']['partnerData']
|
||||
|
||||
if item.get('type') != 'video':
|
||||
entries = []
|
||||
|
||||
cover = item.get('cover') or {}
|
||||
if cover.get('type') == 'yvideo':
|
||||
cover_url = cover.get('url')
|
||||
if cover_url:
|
||||
entries.append(self.url_result(
|
||||
cover_url, 'Yahoo', cover.get('uuid')))
|
||||
|
||||
for e in (item.get('body') or []):
|
||||
if e.get('type') == 'videoIframe':
|
||||
iframe_url = e.get('url')
|
||||
if not iframe_url:
|
||||
continue
|
||||
entries.append(self.url_result(iframe_url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, item.get('uuid'),
|
||||
item.get('title'), item.get('summary'))
|
||||
|
||||
info = self._extract_yahoo_video(item['uuid'], country)
|
||||
info['display_id'] = display_id
|
||||
return info
|
||||
|
||||
|
||||
class YahooSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Yahoo screen search'
|
||||
|
@@ -842,6 +842,10 @@ def parseOpts(overrideArguments=None):
|
||||
'-o', '--output',
|
||||
dest='outtmpl', metavar='TEMPLATE',
|
||||
help='Output filename template, see "OUTPUT TEMPLATE" for details')
|
||||
filesystem.add_option(
|
||||
'--output-na-placeholder',
|
||||
dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
|
||||
help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
|
||||
filesystem.add_option(
|
||||
'--autonumber-size',
|
||||
dest='autonumber_size', metavar='NUMBER', type=int,
|
||||
@@ -997,7 +1001,7 @@ def parseOpts(overrideArguments=None):
|
||||
postproc.add_option(
|
||||
'-x', '--extract-audio',
|
||||
action='store_true', dest='extractaudio', default=False,
|
||||
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
||||
help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
|
||||
postproc.add_option(
|
||||
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
||||
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
|
||||
|
Reference in New Issue
Block a user