#31 Features from animelover1984/youtube-dl

* Add `--get-comments`
* [youtube] Extract comments
* [billibilli] Added BiliBiliSearchIE, BilibiliChannelIE
* [billibilli] Extract comments
* [billibilli] Better video extraction
* Write playlist data to infojson
* [FFmpegMetadata] Embed infojson inside the video
* [EmbedThumbnail] Try embedding in mp4 using ffprobe and `-disposition`
* [EmbedThumbnail] Treat mka like mkv and mov like mp4
* [EmbedThumbnail] Embed in ogg/opus
* [VideoRemuxer] Conditionally remux video
* [VideoRemuxer] Add `-movflags +faststart` when remuxing from mp4
* [ffmpeg] Print entire stderr in verbose when there is error
* [EmbedSubtitle] Warn when embedding ass in mp4
* [avanto] Use NFLTokenGenerator if possible
This commit is contained in:
pukkandan
2021-01-27 20:32:51 +05:30
committed by GitHub
parent 4ff5e98991
commit 06167fbbd3
12 changed files with 583 additions and 68 deletions

View File

@@ -4,6 +4,15 @@ from __future__ import unicode_literals
import os
import subprocess
import struct
import re
import base64
try:
import mutagen
_has_mutagen = True
except ImportError:
_has_mutagen = False
from .ffmpeg import FFmpegPostProcessor
@@ -11,11 +20,12 @@ from ..utils import (
check_executable,
encodeArgument,
encodeFilename,
error_to_compat_str,
PostProcessingError,
prepend_extension,
process_communicate_or_kill,
replace_extension,
shell_quote,
process_communicate_or_kill,
)
@@ -73,6 +83,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
# Rename back to unescaped for further processing
os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename))
thumbnail_filename = thumbnail_jpg_filename
thumbnail_ext = 'jpg'
success = True
if info['ext'] == 'mp3':
@@ -83,47 +94,92 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self.to_screen('Adding thumbnail to "%s"' % filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
elif info['ext'] == 'mkv':
options = [
'-c', 'copy', '-map', '0', '-dn', '-attach', thumbnail_filename,
'-metadata:s:t', 'mimetype=image/jpeg', '-metadata:s:t', 'filename=cover.jpg']
elif info['ext'] in ['mkv', 'mka']:
options = ['-c', 'copy', '-map', '0', '-dn']
mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg')
old_stream, new_stream = self.get_stream_number(
filename, ('tags', 'mimetype'), mimetype)
if old_stream is not None:
options.extend(['-map', '-0:%d' % old_stream])
new_stream -= 1
options.extend([
'-attach', thumbnail_filename,
'-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
'-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
self.to_screen('Adding thumbnail to "%s"' % filename)
self.run_ffmpeg_multiple_files([filename], temp_filename, options)
self.run_ffmpeg(filename, temp_filename, options)
elif info['ext'] in ['m4a', 'mp4']:
if not check_executable('AtomicParsley', ['-v']):
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
elif info['ext'] in ['m4a', 'mp4', 'mov']:
try:
options = ['-c', 'copy', '-map', '0', '-dn', '-map', '1']
cmd = [encodeFilename('AtomicParsley', True),
encodeFilename(filename, True),
encodeArgument('--artwork'),
encodeFilename(thumbnail_filename, True),
encodeArgument('-o'),
encodeFilename(temp_filename, True)]
cmd += [encodeArgument(o) for o in self._configuration_args(exe='AtomicParsley')]
old_stream, new_stream = self.get_stream_number(
filename, ('disposition', 'attached_pic'), 1)
if old_stream is not None:
options.extend(['-map', '-0:%d' % old_stream])
new_stream -= 1
options.extend(['-disposition:%s' % new_stream, 'attached_pic'])
self.to_screen('Adding thumbnail to "%s"' % filename)
self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
self.to_screen('Adding thumbnail to "%s"' % filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process_communicate_or_kill(p)
except PostProcessingError as err:
self.report_warning('unable to embed using ffprobe & ffmpeg; %s' % error_to_compat_str(err))
if not check_executable('AtomicParsley', ['-v']):
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()
raise EmbedThumbnailPPError(msg)
# for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file
if b'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False
cmd = [encodeFilename('AtomicParsley', True),
encodeFilename(filename, True),
encodeArgument('--artwork'),
encodeFilename(thumbnail_filename, True),
encodeArgument('-o'),
encodeFilename(temp_filename, True)]
cmd += [encodeArgument(o) for o in self._configuration_args(exe='AtomicParsley')]
self.to_screen('Adding thumbnail to "%s"' % filename)
self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process_communicate_or_kill(p)
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()
raise EmbedThumbnailPPError(msg)
# for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file
if b'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False
elif info['ext'] in ['ogg', 'opus']:
if not _has_mutagen:
raise EmbedThumbnailPPError('module mutagen was not found. Please install.')
size_regex = r',\s*(?P<w>\d+)x(?P<h>\d+)\s*[,\[]'
size_result = self.run_ffmpeg_multiple_files([thumbnail_filename], '', ['-hide_banner'])
mobj = re.search(size_regex, size_result)
width, height = int(mobj.group('w')), int(mobj.group('h'))
mimetype = ('image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg')).encode('ascii')
# https://xiph.org/flac/format.html#metadata_block_picture
data = bytearray()
data += struct.pack('>II', 3, len(mimetype))
data += mimetype
data += struct.pack('>IIIIII', 0, width, height, 8, 0, os.stat(thumbnail_filename).st_size) # 32 if png else 24
fin = open(thumbnail_filename, "rb")
data += fin.read()
fin.close()
temp_filename = filename
f = mutagen.File(temp_filename)
f.tags['METADATA_BLOCK_PICTURE'] = base64.b64encode(data).decode('ascii')
f.save()
else:
raise EmbedThumbnailPPError('Only mp3, mkv, m4a and mp4 are supported for thumbnail embedding for now.')
raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus, m4a/mp4/mov')
if success:
if success and temp_filename != filename:
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
files_to_delete = [] if self._already_have_thumbnail else [thumbnail_filename]
return files_to_delete, info

View File

@@ -5,6 +5,7 @@ import os
import subprocess
import time
import re
import json
from .common import AudioConversionError, PostProcessor
@@ -20,8 +21,9 @@ from ..utils import (
subtitles_filename,
dfxp2srt,
ISO639Utils,
replace_extension,
process_communicate_or_kill,
replace_extension,
traverse_dict,
)
@@ -201,6 +203,37 @@ class FFmpegPostProcessor(PostProcessor):
return mobj.group(1)
return None
def get_metadata_object(self, path, opts=[]):
if self.probe_basename != 'ffprobe':
if self.probe_available:
self.report_warning('Only ffprobe is supported for metadata extraction')
raise PostProcessingError('ffprobe not found. Please install.')
self.check_version()
cmd = [
encodeFilename(self.probe_executable, True),
encodeArgument('-hide_banner'),
encodeArgument('-show_format'),
encodeArgument('-show_streams'),
encodeArgument('-print_format'),
encodeArgument('json'),
]
cmd += opts
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffprobe command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
return json.loads(stdout.decode('utf-8', 'replace'))
def get_stream_number(self, path, keys, value):
streams = self.get_metadata_object(path)['streams']
num = next(
(i for i, stream in enumerate(streams) if traverse_dict(stream, keys, casesense=False) == value),
None)
return num, len(streams)
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
self.check_version()
@@ -227,10 +260,12 @@ class FFmpegPostProcessor(PostProcessor):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = process_communicate_or_kill(p)
if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace')
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg)
stderr = stderr.decode('utf-8', 'replace').strip()
if self._downloader.params.get('verbose', False):
self.report_error(stderr)
raise FFmpegPostProcessorError(stderr.split('\n')[-1])
self.try_utime(out_path, oldest_mtime, oldest_mtime)
return stderr.decode('utf-8', 'replace')
def run_ffmpeg(self, path, out_path, opts):
self.run_ffmpeg_multiple_files([path], out_path, opts)
@@ -240,6 +275,8 @@ class FFmpegPostProcessor(PostProcessor):
# interprets that as a protocol) or can start with '-' (-- is broken in
# ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
# Also leave '-' intact in order not to break streaming to stdout.
if fn.startswith(('http://', 'https://')):
return fn
return 'file:' + fn if fn != '-' else fn
@@ -349,21 +386,35 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
class FFmpegVideoRemuxerPP(FFmpegPostProcessor):
def __init__(self, downloader=None, preferedformat=None):
super(FFmpegVideoRemuxerPP, self).__init__(downloader)
self._preferedformat = preferedformat
self._preferedformats = preferedformat.lower().split('/')
def run(self, information):
path = information['filepath']
if information['ext'] == self._preferedformat:
self.to_screen('Not remuxing video file %s - already is in target format %s' % (path, self._preferedformat))
sourceext, targetext = information['ext'].lower(), None
for pair in self._preferedformats:
kv = pair.split('>')
if len(kv) == 1 or kv[0].strip() == sourceext:
targetext = kv[-1].strip()
break
_skip_msg = (
'could not find a mapping for %s' if not targetext
else 'already is in target format %s' if sourceext == targetext
else None)
if _skip_msg:
self.to_screen('Not remuxing media file %s - %s' % (path, _skip_msg % sourceext))
return [], information
options = ['-c', 'copy', '-map', '0', '-dn']
prefix, sep, ext = path.rpartition('.')
outpath = prefix + sep + self._preferedformat
self.to_screen('Remuxing video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
if targetext in ['mp4', 'm4a', 'mov']:
options.extend(['-movflags', '+faststart'])
prefix, sep, oldext = path.rpartition('.')
outpath = prefix + sep + targetext
self.to_screen('Remuxing video from %s to %s; Destination: %s' % (sourceext, targetext, outpath))
self.run_ffmpeg(path, outpath, options)
information['filepath'] = outpath
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
information['format'] = targetext
information['ext'] = targetext
return [path], information
@@ -406,18 +457,22 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
sub_langs = []
sub_filenames = []
webm_vtt_warn = False
mp4_ass_warn = False
for lang, sub_info in subtitles.items():
sub_ext = sub_info['ext']
if sub_ext == 'json':
self.to_screen('JSON subtitles cannot be embedded')
self.report_warning('JSON subtitles cannot be embedded')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else:
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True
self.to_screen('Only WebVTT subtitles can be embedded in webm files')
self.report_warning('Only WebVTT subtitles can be embedded in webm files')
if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
mp4_ass_warn = True
self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
if not sub_langs:
return [], information
@@ -441,7 +496,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
temp_filename = prepend_extension(filename, 'temp')
self.to_screen('Embedding subtitles in \'%s\'' % filename)
self.to_screen('Embedding subtitles in "%s"' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
@@ -471,7 +526,6 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
# 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
# 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
# 3. https://kodi.wiki/view/Video_file_tagging
# 4. http://atomicparsley.sourceforge.net/mpeg-4files.html
add('title', ('track', 'title'))
add('date', 'upload_date')
@@ -524,6 +578,18 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
in_filenames.append(metadata_filename)
options.extend(['-map_metadata', '1'])
if '__infojson_filepath' in info and info['ext'] in ('mkv', 'mka'):
old_stream, new_stream = self.get_stream_number(
filename, ('tags', 'mimetype'), 'application/json')
if old_stream is not None:
options.extend(['-map', '-0:%d' % old_stream])
new_stream -= 1
options.extend([
'-attach', info['__infojson_filepath'],
'-metadata:s:%d' % new_stream, 'mimetype=application/json'
])
self.to_screen('Adding metadata to \'%s\'' % filename)
self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
if chapters: