[extractor/common, downloader/ism] Extract SSTR subtitle tracks

_parse_ism_formats was extended into _parse_ism_formats_and_subtitles;
all direct users were updated, though _extract_ism_formats was left
as a compatibility wrapper.

The SSTR downloader was also modified in order to prepare for muxing
subtitle streams, although no support for any subtitle codecs was
added in this commit.
This commit is contained in:
Felix S
2021-04-18 12:47:42 +02:00
parent 171e59edd4
commit fd76a14259
2 changed files with 98 additions and 45 deletions

View File

@@ -2750,26 +2750,38 @@ class InfoExtractor(object):
else:
# Assuming direct URL to unfragmented media.
f['url'] = base_url
formats.append(f)
if content_type in ('video', 'audio'):
formats.append(f)
elif content_type == 'text':
subtitles.setdefault(lang or 'und', []).append(f)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats, subtitles
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
if subs:
self.report_warning(bug_reports_message(
"Ignoring subtitle tracks found in the ISM manifest; "
"if any subtitle tracks are missing,"
))
return fmts
def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle(
ism_url, video_id,
note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal, data=data, headers=headers, query=query)
if res is False:
return []
return [], {}
ism_doc, urlh = res
if ism_doc is None:
return []
return [], {}
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
"""
Parse formats from ISM manifest.
References:
@@ -2777,22 +2789,24 @@ class InfoExtractor(object):
https://msdn.microsoft.com/en-us/library/ff469518.aspx
"""
if ism_doc.get('IsLive') == 'TRUE':
return []
return [], {}
if (not self._downloader.params.get('allow_unplayable_formats')
and ism_doc.find('Protection') is not None):
return []
return [], {}
duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
formats = []
subtitles = {}
for stream in ism_doc.findall('StreamIndex'):
stream_type = stream.get('Type')
if stream_type not in ('video', 'audio'):
if stream_type not in ('video', 'audio', 'text'):
continue
url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
stream_language = stream.get('Language', 'und')
for track in stream.findall('QualityLevel'):
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
# TODO: add support for WVC1 and WMAP
@@ -2839,33 +2853,52 @@ class InfoExtractor(object):
format_id.append(stream_name)
format_id.append(compat_str(tbr))
formats.append({
'format_id': '-'.join(format_id),
'url': ism_url,
'manifest_url': ism_url,
'ext': 'ismv' if stream_type == 'video' else 'isma',
'width': width,
'height': height,
'tbr': tbr,
'asr': sampling_rate,
'vcodec': 'none' if stream_type == 'audio' else fourcc,
'acodec': 'none' if stream_type == 'video' else fourcc,
'protocol': 'ism',
'fragments': fragments,
'_download_params': {
'duration': duration,
'timescale': stream_timescale,
'width': width or 0,
'height': height or 0,
'fourcc': fourcc,
'codec_private_data': track.get('CodecPrivateData'),
'sampling_rate': sampling_rate,
'channels': int_or_none(track.get('Channels', 2)),
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
},
})
return formats
if stream_type == 'text':
subtitles.setdefault(stream_language, []).append({
'ext': 'ismt',
'protocol': 'ism',
'url': ism_url,
'manifest_url': ism_url,
'fragments': fragments,
'_download_params': {
'stream_type': stream_type,
'duration': duration,
'timescale': stream_timescale,
'fourcc': fourcc,
'language': stream_language,
'codec_private_data': track.get('CodecPrivateData'),
}
})
elif stream_type in ('video', 'audio'):
formats.append({
'format_id': '-'.join(format_id),
'url': ism_url,
'manifest_url': ism_url,
'ext': 'ismv' if stream_type == 'video' else 'isma',
'width': width,
'height': height,
'tbr': tbr,
'asr': sampling_rate,
'vcodec': 'none' if stream_type == 'audio' else fourcc,
'acodec': 'none' if stream_type == 'video' else fourcc,
'protocol': 'ism',
'fragments': fragments,
'_download_params': {
'stream_type': stream_type,
'duration': duration,
'timescale': stream_timescale,
'width': width or 0,
'height': height or 0,
'fourcc': fourcc,
'language': stream_language,
'codec_private_data': track.get('CodecPrivateData'),
'sampling_rate': sampling_rate,
'channels': int_or_none(track.get('Channels', 2)),
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
},
})
return formats, subtitles
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
def absolute_url(item_url):