mirror of
https://github.com/meeb/tubesync.git
synced 2025-06-20 12:06:35 +00:00
Merge branch 'main' into patch-3
This commit is contained in:
commit
a7f872e902
@ -373,6 +373,10 @@ COPY config/root /
|
||||
COPY patches/background_task/ \
|
||||
/usr/local/lib/python3/dist-packages/background_task/
|
||||
|
||||
# patch yt_dlp
|
||||
COPY patches/yt_dlp/ \
|
||||
/usr/local/lib/python3/dist-packages/yt_dlp/
|
||||
|
||||
# Create a healthcheck
|
||||
HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"]
|
||||
|
||||
|
393
patches/yt_dlp/postprocessor/modify_chapters.py
Normal file
393
patches/yt_dlp/postprocessor/modify_chapters.py
Normal file
@ -0,0 +1,393 @@
|
||||
import copy
|
||||
import heapq
|
||||
import itertools
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import PostProcessor
|
||||
from .ffmpeg import (
|
||||
FFmpegPostProcessor,
|
||||
FFmpegPostProcessorError,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
)
|
||||
from .sponsorblock import SponsorBlockPP
|
||||
from ..utils import (
|
||||
Popen,
|
||||
PostProcessingError,
|
||||
encodeArgument,
|
||||
orderedSet,
|
||||
prepend_extension,
|
||||
shell_quote,
|
||||
variadic,
|
||||
)
|
||||
|
||||
_TINY_CHAPTER_DURATION = 1
|
||||
DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
|
||||
|
||||
|
||||
class ModifyChaptersPP(FFmpegPostProcessor):
|
||||
def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None,
|
||||
*, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
|
||||
FFmpegPostProcessor.__init__(self, downloader)
|
||||
self._remove_chapters_patterns = set(remove_chapters_patterns or [])
|
||||
self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())
|
||||
self._ranges_to_remove = set(remove_ranges or [])
|
||||
self._sponsorblock_chapter_title = sponsorblock_chapter_title
|
||||
self._force_keyframes = force_keyframes
|
||||
|
||||
@PostProcessor._restrict_to(images=False)
|
||||
def run(self, info):
|
||||
self._fixup_chapters(info)
|
||||
# Chapters must be preserved intact when downloading multiple formats of the same video.
|
||||
chapters, sponsor_chapters = self._mark_chapters_to_remove(
|
||||
copy.deepcopy(info.get('chapters')) or [],
|
||||
copy.deepcopy(info.get('sponsorblock_chapters')) or [])
|
||||
if not chapters and not sponsor_chapters:
|
||||
return [], info
|
||||
|
||||
real_duration = self._get_real_video_duration(info['filepath'])
|
||||
if not chapters:
|
||||
chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}]
|
||||
|
||||
info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
|
||||
if not cuts:
|
||||
return [], info
|
||||
elif not info['chapters']:
|
||||
self.report_warning('You have requested to remove the entire video, which is not possible')
|
||||
return [], info
|
||||
|
||||
original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time']
|
||||
if self._duration_mismatch(real_duration, original_duration, 1):
|
||||
if not self._duration_mismatch(real_duration, info['duration']):
|
||||
self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
|
||||
return [], info
|
||||
if not info.get('__real_download'):
|
||||
raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. '
|
||||
'Different chapters may have already been removed')
|
||||
else:
|
||||
self.write_debug('Expected and actual durations mismatch')
|
||||
|
||||
concat_opts = self._make_concat_opts(cuts, real_duration)
|
||||
self.write_debug('Concat spec = {}'.format(', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts)))
|
||||
|
||||
def remove_chapters(file, is_sub):
|
||||
return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub)
|
||||
|
||||
in_out_files = [remove_chapters(info['filepath'], False)]
|
||||
in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info))
|
||||
|
||||
# Renaming should only happen after all files are processed
|
||||
files_to_remove = []
|
||||
for in_file, out_file in in_out_files:
|
||||
mtime = os.stat(in_file).st_mtime
|
||||
uncut_file = prepend_extension(in_file, 'uncut')
|
||||
os.replace(in_file, uncut_file)
|
||||
os.replace(out_file, in_file)
|
||||
self.try_utime(in_file, mtime, mtime)
|
||||
files_to_remove.append(uncut_file)
|
||||
|
||||
return files_to_remove, info
|
||||
|
||||
def _mark_chapters_to_remove(self, chapters, sponsor_chapters):
|
||||
if self._remove_chapters_patterns:
|
||||
warn_no_chapter_to_remove = True
|
||||
if not chapters:
|
||||
self.to_screen('Chapter information is unavailable')
|
||||
warn_no_chapter_to_remove = False
|
||||
for c in chapters:
|
||||
if any(regex.search(c['title']) for regex in self._remove_chapters_patterns):
|
||||
c['remove'] = True
|
||||
warn_no_chapter_to_remove = False
|
||||
if warn_no_chapter_to_remove:
|
||||
self.to_screen('There are no chapters matching the regex')
|
||||
|
||||
if self._remove_sponsor_segments:
|
||||
warn_no_chapter_to_remove = True
|
||||
if not sponsor_chapters:
|
||||
self.to_screen('SponsorBlock information is unavailable')
|
||||
warn_no_chapter_to_remove = False
|
||||
for c in sponsor_chapters:
|
||||
if c['category'] in self._remove_sponsor_segments:
|
||||
c['remove'] = True
|
||||
warn_no_chapter_to_remove = False
|
||||
if warn_no_chapter_to_remove:
|
||||
self.to_screen('There are no matching SponsorBlock chapters')
|
||||
|
||||
sponsor_chapters.extend({
|
||||
'start_time': start,
|
||||
'end_time': end,
|
||||
'category': 'manually_removed',
|
||||
'_categories': [('manually_removed', start, end, 'Manually removed')],
|
||||
'remove': True,
|
||||
} for start, end in self._ranges_to_remove)
|
||||
|
||||
return chapters, sponsor_chapters
|
||||
|
||||
def _get_supported_subs(self, info):
|
||||
for sub in (info.get('requested_subtitles') or {}).values():
|
||||
sub_file = sub.get('filepath')
|
||||
# The file might have been removed by --embed-subs
|
||||
if not sub_file or not os.path.exists(sub_file):
|
||||
continue
|
||||
ext = sub['ext']
|
||||
if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS:
|
||||
self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync')
|
||||
continue
|
||||
# TODO: create __real_download for subs?
|
||||
yield sub_file
|
||||
|
||||
def _remove_marked_arrange_sponsors(self, chapters):
|
||||
# Store cuts separately, since adjacent and overlapping cuts must be merged.
|
||||
cuts = []
|
||||
|
||||
def append_cut(c):
|
||||
assert 'remove' in c, 'Not a cut is appended to cuts'
|
||||
last_to_cut = cuts[-1] if cuts else None
|
||||
if last_to_cut and last_to_cut['end_time'] >= c['start_time']:
|
||||
last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time'])
|
||||
else:
|
||||
cuts.append(c)
|
||||
return len(cuts) - 1
|
||||
|
||||
def excess_duration(c):
|
||||
# Cuts that are completely within the chapter reduce chapters' duration.
|
||||
# Since cuts can overlap, excess duration may be less that the sum of cuts' durations.
|
||||
# To avoid that, chapter stores the index to the fist cut within the chapter,
|
||||
# instead of storing excess duration. append_cut ensures that subsequent cuts (if any)
|
||||
# will be merged with previous ones (if necessary).
|
||||
cut_idx, excess = c.pop('cut_idx', len(cuts)), 0
|
||||
while cut_idx < len(cuts):
|
||||
cut = cuts[cut_idx]
|
||||
if cut['start_time'] >= c['end_time']:
|
||||
break
|
||||
if cut['end_time'] > c['start_time']:
|
||||
excess += min(cut['end_time'], c['end_time'])
|
||||
excess -= max(cut['start_time'], c['start_time'])
|
||||
cut_idx += 1
|
||||
return excess
|
||||
|
||||
new_chapters = []
|
||||
|
||||
def append_chapter(c):
|
||||
assert 'remove' not in c, 'Cut is appended to chapters'
|
||||
length = c['end_time'] - c['start_time'] - excess_duration(c)
|
||||
# Chapter is completely covered by cuts or sponsors.
|
||||
if length <= 0:
|
||||
return
|
||||
start = new_chapters[-1]['end_time'] if new_chapters else 0
|
||||
c.update(start_time=start, end_time=start + length)
|
||||
new_chapters.append(c)
|
||||
|
||||
# Turn into a priority queue, index is a tie breaker.
|
||||
# Plain stack sorted by start_time is not enough: after splitting the chapter,
|
||||
# the part returned to the stack is not guaranteed to have start_time
|
||||
# less than or equal to the that of the stack's head.
|
||||
chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)]
|
||||
heapq.heapify(chapters)
|
||||
|
||||
_, cur_i, cur_chapter = heapq.heappop(chapters)
|
||||
while chapters:
|
||||
_, i, c = heapq.heappop(chapters)
|
||||
# Non-overlapping chapters or cuts can be appended directly. However,
|
||||
# adjacent non-overlapping cuts must be merged, which is handled by append_cut.
|
||||
if cur_chapter['end_time'] <= c['start_time']:
|
||||
(append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
|
||||
cur_i, cur_chapter = i, c
|
||||
continue
|
||||
|
||||
# Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor),
|
||||
# (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor),
|
||||
# (sponsor, normal), and (normal, sponsor). There is no (normal, normal):
|
||||
# normal chapters are assumed not to overlap.
|
||||
if 'remove' in cur_chapter:
|
||||
# (cut, cut): adjust end_time.
|
||||
if 'remove' in c:
|
||||
cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time'])
|
||||
# (cut, sponsor/normal): chop the beginning of the later chapter
|
||||
# (if it's not completely hidden by the cut). Push to the priority queue
|
||||
# to restore sorting by start_time: with beginning chopped, c may actually
|
||||
# start later than the remaining chapters from the queue.
|
||||
elif cur_chapter['end_time'] < c['end_time']:
|
||||
c['start_time'] = cur_chapter['end_time']
|
||||
c['_was_cut'] = True
|
||||
heapq.heappush(chapters, (c['start_time'], i, c))
|
||||
# (sponsor/normal, cut).
|
||||
elif 'remove' in c:
|
||||
cur_chapter['_was_cut'] = True
|
||||
# Chop the end of the current chapter if the cut is not contained within it.
|
||||
# Chopping the end doesn't break start_time sorting, no PQ push is necessary.
|
||||
if cur_chapter['end_time'] <= c['end_time']:
|
||||
cur_chapter['end_time'] = c['start_time']
|
||||
append_chapter(cur_chapter)
|
||||
cur_i, cur_chapter = i, c
|
||||
continue
|
||||
# Current chapter contains the cut within it. If the current chapter is
|
||||
# a sponsor chapter, check whether the categories before and after the cut differ.
|
||||
if '_categories' in cur_chapter:
|
||||
after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[])
|
||||
cur_cats = []
|
||||
for cat_start_end in cur_chapter['_categories']:
|
||||
if cat_start_end[1] < c['start_time']:
|
||||
cur_cats.append(cat_start_end)
|
||||
if cat_start_end[2] > c['end_time']:
|
||||
after_c['_categories'].append(cat_start_end)
|
||||
cur_chapter['_categories'] = cur_cats
|
||||
if cur_chapter['_categories'] != after_c['_categories']:
|
||||
# Categories before and after the cut differ: push the after part to PQ.
|
||||
heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
|
||||
cur_chapter['end_time'] = c['start_time']
|
||||
append_chapter(cur_chapter)
|
||||
cur_i, cur_chapter = i, c
|
||||
continue
|
||||
# Either sponsor categories before and after the cut are the same or
|
||||
# we're dealing with a normal chapter. Just register an outstanding cut:
|
||||
# subsequent append_chapter will reduce the duration.
|
||||
cur_chapter.setdefault('cut_idx', append_cut(c))
|
||||
# (sponsor, normal): if a normal chapter is not completely overlapped,
|
||||
# chop the beginning of it and push it to PQ.
|
||||
elif '_categories' in cur_chapter and '_categories' not in c:
|
||||
if cur_chapter['end_time'] < c['end_time']:
|
||||
c['start_time'] = cur_chapter['end_time']
|
||||
c['_was_cut'] = True
|
||||
heapq.heappush(chapters, (c['start_time'], i, c))
|
||||
# (normal, sponsor) and (sponsor, sponsor)
|
||||
else:
|
||||
assert '_categories' in c, 'Normal chapters overlap'
|
||||
cur_chapter['_was_cut'] = True
|
||||
c['_was_cut'] = True
|
||||
# Push the part after the sponsor to PQ.
|
||||
if cur_chapter['end_time'] > c['end_time']:
|
||||
# deepcopy to make categories in after_c and cur_chapter/c refer to different lists.
|
||||
after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time'])
|
||||
heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
|
||||
# Push the part after the overlap to PQ.
|
||||
elif c['end_time'] > cur_chapter['end_time']:
|
||||
after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time'])
|
||||
heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur))
|
||||
c['end_time'] = cur_chapter['end_time']
|
||||
# (sponsor, sponsor): merge categories in the overlap.
|
||||
if '_categories' in cur_chapter:
|
||||
c['_categories'] = cur_chapter['_categories'] + c['_categories']
|
||||
# Inherit the cuts that the current chapter has accumulated within it.
|
||||
if 'cut_idx' in cur_chapter:
|
||||
c['cut_idx'] = cur_chapter['cut_idx']
|
||||
cur_chapter['end_time'] = c['start_time']
|
||||
append_chapter(cur_chapter)
|
||||
cur_i, cur_chapter = i, c
|
||||
(append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
|
||||
return self._remove_tiny_rename_sponsors(new_chapters), cuts
|
||||
|
||||
def _remove_tiny_rename_sponsors(self, chapters):
|
||||
new_chapters = []
|
||||
for i, c in enumerate(chapters):
|
||||
# Merge with the previous/next if the chapter is tiny.
|
||||
# Only tiny chapters resulting from a cut can be skipped.
|
||||
# Chapters that were already tiny in the original list will be preserved.
|
||||
if (('_was_cut' in c or '_categories' in c)
|
||||
and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION):
|
||||
if not new_chapters:
|
||||
# Prepend tiny chapter to the next one if possible.
|
||||
if i < len(chapters) - 1:
|
||||
chapters[i + 1]['start_time'] = c['start_time']
|
||||
continue
|
||||
else:
|
||||
old_c = new_chapters[-1]
|
||||
if i < len(chapters) - 1:
|
||||
next_c = chapters[i + 1]
|
||||
# Not a typo: key names in old_c and next_c are really different.
|
||||
prev_is_sponsor = 'categories' in old_c
|
||||
next_is_sponsor = '_categories' in next_c
|
||||
# Preferentially prepend tiny normals to normals and sponsors to sponsors.
|
||||
if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor)
|
||||
or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)):
|
||||
next_c['start_time'] = c['start_time']
|
||||
continue
|
||||
old_c['end_time'] = c['end_time']
|
||||
continue
|
||||
|
||||
c.pop('_was_cut', None)
|
||||
cats = c.pop('_categories', None)
|
||||
if cats:
|
||||
category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1])
|
||||
c.update({
|
||||
'category': category,
|
||||
'categories': orderedSet(x[0] for x in cats),
|
||||
'name': category_name,
|
||||
'category_names': orderedSet(x[3] for x in cats),
|
||||
})
|
||||
c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy())
|
||||
# Merge identically named sponsors.
|
||||
if (new_chapters and 'categories' in new_chapters[-1]
|
||||
and new_chapters[-1]['title'] == c['title']):
|
||||
new_chapters[-1]['end_time'] = c['end_time']
|
||||
continue
|
||||
new_chapters.append(c)
|
||||
return new_chapters
|
||||
|
||||
def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False):
|
||||
in_file = filename
|
||||
out_file = prepend_extension(in_file, 'temp')
|
||||
if force_keyframes:
|
||||
in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time'])))
|
||||
self.to_screen(f'Removing chapters from {filename}')
|
||||
self.concat_files([in_file] * len(concat_opts), out_file, concat_opts)
|
||||
if in_file != filename:
|
||||
self._delete_downloaded_files(in_file, msg=None)
|
||||
return out_file
|
||||
|
||||
|
||||
# override to change the args ordering
|
||||
def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
|
||||
self.check_version()
|
||||
|
||||
oldest_mtime = min(
|
||||
os.stat(path).st_mtime for path, _ in input_path_opts if path)
|
||||
|
||||
cmd = [self.executable, encodeArgument('-y')]
|
||||
# avconv does not have repeat option
|
||||
if self.basename == 'ffmpeg':
|
||||
cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
|
||||
|
||||
def make_args(file, args, name, number):
|
||||
keys = [f'_{name}{number}', f'_{name}']
|
||||
if name == 'o':
|
||||
args += ['-movflags', '+faststart']
|
||||
if number == 1:
|
||||
keys.append('')
|
||||
args = self._configuration_args(self.basename, keys) + args
|
||||
if name == 'i':
|
||||
args.append('-i')
|
||||
return (
|
||||
[encodeArgument(arg) for arg in args]
|
||||
+ [self._ffmpeg_filename_argument(file)])
|
||||
|
||||
for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
|
||||
cmd += itertools.chain.from_iterable(
|
||||
make_args(path, list(opts), arg_type, i + 1)
|
||||
for i, (path, opts) in enumerate(path_opts) if path)
|
||||
|
||||
self.write_debug(f'ffmpeg command line: {shell_quote(cmd)}')
|
||||
_, stderr, returncode = Popen.run(
|
||||
cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
if returncode not in variadic(expected_retcodes):
|
||||
self.write_debug(stderr)
|
||||
raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
|
||||
for out_path, _ in output_path_opts:
|
||||
if out_path:
|
||||
self.try_utime(out_path, oldest_mtime, oldest_mtime)
|
||||
return stderr
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _make_concat_opts(chapters_to_remove, duration):
|
||||
opts = [{}]
|
||||
for s in chapters_to_remove:
|
||||
# Do not create 0 duration chunk at the beginning.
|
||||
if s['start_time'] == 0:
|
||||
opts[-1]['inpoint'] = f'{s["end_time"]:.6f}'
|
||||
continue
|
||||
opts[-1]['outpoint'] = f'{s["start_time"]:.6f}'
|
||||
# Do not create 0 duration chunk at the end.
|
||||
if s['end_time'] < duration:
|
||||
opts.append({'inpoint': f'{s["end_time"]:.6f}'})
|
||||
return opts
|
@ -5,83 +5,179 @@ from common.logger import log
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class ProgressHookStatus:
|
||||
progress_hook = {
|
||||
'status': dict(),
|
||||
}
|
||||
|
||||
postprocessor_hook = {
|
||||
'status': dict(),
|
||||
}
|
||||
|
||||
|
||||
class BaseStatus:
|
||||
status_dict = dict()
|
||||
valid = set()
|
||||
|
||||
@classmethod
|
||||
def get(cls, key):
|
||||
return cls.status_dict.get(key, None)
|
||||
|
||||
@classmethod
|
||||
def valid_status(cls, status):
|
||||
return status in cls.valid
|
||||
|
||||
def __init__(self, hook_status_dict=None):
|
||||
self.media_key = None
|
||||
self.task_status = '[Started: 0%]'
|
||||
self.task_verbose_name = None
|
||||
self._status_dict = hook_status_dict or self.status_dict
|
||||
self._registered_keys = set()
|
||||
|
||||
def register(self, *args):
|
||||
additions = dict()
|
||||
for key in args:
|
||||
if key is not None:
|
||||
self._registered_keys.add(key)
|
||||
additions[key] = self
|
||||
self._status_dict.update(additions)
|
||||
|
||||
def cleanup(self):
|
||||
for key in self._registered_keys:
|
||||
if key in self._status_dict:
|
||||
del self._status_dict[key]
|
||||
|
||||
def update_task(self):
|
||||
if self.media_key is None:
|
||||
return
|
||||
from .models import Media
|
||||
from .tasks import get_media_download_task
|
||||
|
||||
media = Media.objects.get(key=self.media_key)
|
||||
task = get_media_download_task(str(media.pk))
|
||||
if task:
|
||||
if self.task_verbose_name is None:
|
||||
# clean up any previously prepended task_status
|
||||
# this happened because of duplicated tasks on my test system
|
||||
s = task.verbose_name
|
||||
cleaned = s[1+s.find(' Downloading '):]
|
||||
self.task_verbose_name = cleaned
|
||||
task.verbose_name = f'{self.task_status} {self.task_verbose_name}'
|
||||
task.save()
|
||||
|
||||
class ProgressHookStatus(BaseStatus):
|
||||
status_dict = progress_hook['status']
|
||||
valid = frozenset((
|
||||
'downloading',
|
||||
'finished',
|
||||
'error',
|
||||
))
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, *args, status=None, info_dict={}, filename=None, **kwargs):
|
||||
super().__init__(self.status_dict)
|
||||
self.filename = filename
|
||||
self.info = info_dict
|
||||
self.status = status
|
||||
self.download_progress = 0
|
||||
|
||||
class PPHookStatus:
|
||||
def next_progress(self):
|
||||
if 0 == self.download_progress:
|
||||
return 0
|
||||
return 1 + self.download_progress
|
||||
|
||||
class PPHookStatus(BaseStatus):
|
||||
status_dict = postprocessor_hook['status']
|
||||
valid = frozenset((
|
||||
'started',
|
||||
'processing',
|
||||
'finished',
|
||||
))
|
||||
|
||||
def __init__(self, *args, status=None, postprocessor=None, info_dict={}, **kwargs):
|
||||
def __init__(self, *args, status=None, postprocessor=None, info_dict={}, filename=None, **kwargs):
|
||||
super().__init__(self.status_dict)
|
||||
self.filename = filename
|
||||
self.info = info_dict
|
||||
self.media_name = None
|
||||
self.name = postprocessor
|
||||
self.status = status
|
||||
|
||||
|
||||
def yt_dlp_progress_hook(event):
|
||||
hook = progress_hook.get('status', None)
|
||||
filename = os.path.basename(event['filename'])
|
||||
if hook is None:
|
||||
log.error('yt_dlp_progress_hook: failed to get hook status object')
|
||||
if not ProgressHookStatus.valid_status(event['status']):
|
||||
log.warn(f'[youtube-dl] unknown progress event: {str(event)}')
|
||||
return None
|
||||
|
||||
if event['status'] not in ProgressHookStatus.valid:
|
||||
log.warn(f'[youtube-dl] unknown event: {str(event)}')
|
||||
return None
|
||||
key = None
|
||||
if 'display_id' in event['info_dict']:
|
||||
key = event['info_dict']['display_id']
|
||||
elif 'id' in event['info_dict']:
|
||||
key = event['info_dict']['id']
|
||||
|
||||
if event.get('downloaded_bytes') is None or event.get('total_bytes') is None:
|
||||
return None
|
||||
|
||||
if event['status'] == 'error':
|
||||
filename = os.path.basename(event.get('filename', '???'))
|
||||
if 'error' == event['status']:
|
||||
log.error(f'[youtube-dl] error occured downloading: {filename}')
|
||||
elif event['status'] == 'downloading':
|
||||
downloaded_bytes = event.get('downloaded_bytes', 0)
|
||||
total_bytes = event.get('total_bytes', 0)
|
||||
elif 'downloading' == event['status']:
|
||||
# get or create the status for filename
|
||||
status = ProgressHookStatus.get(filename)
|
||||
if status is None:
|
||||
status = ProgressHookStatus(**event)
|
||||
status.register(key, filename, status.filename)
|
||||
|
||||
downloaded_bytes = event.get('downloaded_bytes', 0) or 0
|
||||
total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0
|
||||
total_bytes = event.get('total_bytes', 0) or total_bytes_estimate
|
||||
fragment_index = event.get('fragment_index', 0) or 0
|
||||
fragment_count = event.get('fragment_count', 0) or 0
|
||||
eta = event.get('_eta_str', '?').strip()
|
||||
percent_done = event.get('_percent_str', '?').strip()
|
||||
percent_str = event.get('_percent_str', '?').strip()
|
||||
speed = event.get('_speed_str', '?').strip()
|
||||
total = event.get('_total_bytes_str', '?').strip()
|
||||
if downloaded_bytes > 0 and total_bytes > 0:
|
||||
p = round((event['downloaded_bytes'] / event['total_bytes']) * 100)
|
||||
if (p % 5 == 0) and p > hook.download_progress:
|
||||
hook.download_progress = p
|
||||
log.info(f'[youtube-dl] downloading: {filename} - {percent_done} '
|
||||
f'of {total} at {speed}, {eta} remaining')
|
||||
else:
|
||||
# No progress to monitor, just spam every 10 download messages instead
|
||||
hook.download_progress += 1
|
||||
if hook.download_progress % 10 == 0:
|
||||
log.info(f'[youtube-dl] downloading: {filename} - {percent_done} '
|
||||
f'of {total} at {speed}, {eta} remaining')
|
||||
elif event['status'] == 'finished':
|
||||
percent = None
|
||||
try:
|
||||
percent = int(float(percent_str.rstrip('%')))
|
||||
except:
|
||||
pass
|
||||
if fragment_index >= 0 and fragment_count > 0:
|
||||
percent = round(100 * fragment_index / fragment_count)
|
||||
percent_str = f'{percent}%'
|
||||
elif downloaded_bytes >= 0 and total_bytes > 0:
|
||||
percent = round(100 * downloaded_bytes / total_bytes)
|
||||
if percent and (status.next_progress() < percent) and (0 == percent % 5):
|
||||
status.download_progress = percent
|
||||
if key:
|
||||
status.media_key = key
|
||||
status.task_status = f'[downloading: {percent_str}]'
|
||||
status.update_task()
|
||||
log.info(f'[youtube-dl] downloading: {filename} - {percent_str} '
|
||||
f'of {total} at {speed}, {eta} remaining')
|
||||
elif 'finished' == event['status']:
|
||||
# update the status for filename to the finished value
|
||||
status = ProgressHookStatus.get(filename)
|
||||
if status is None:
|
||||
status = ProgressHookStatus(**event)
|
||||
status.register(key, filename, status.filename)
|
||||
status.download_progress = 100
|
||||
|
||||
total_size_str = event.get('_total_bytes_str', '?').strip()
|
||||
elapsed_str = event.get('_elapsed_str', '?').strip()
|
||||
log.info(f'[youtube-dl] finished downloading: {filename} - '
|
||||
f'{total_size_str} in {elapsed_str}')
|
||||
|
||||
status.cleanup()
|
||||
|
||||
def yt_dlp_postprocessor_hook(event):
|
||||
if event['status'] not in PPHookStatus.valid:
|
||||
log.warn(f'[youtube-dl] unknown event: {str(event)}')
|
||||
if not PPHookStatus.valid_status(event['status']):
|
||||
log.warn(f'[youtube-dl] unknown postprocessor event: {str(event)}')
|
||||
return None
|
||||
|
||||
postprocessor_hook['status'] = PPHookStatus(*event)
|
||||
|
||||
name = key = 'Unknown'
|
||||
filename = os.path.basename(event.get('filename', '???'))
|
||||
if 'display_id' in event['info_dict']:
|
||||
key = event['info_dict']['display_id']
|
||||
elif 'id' in event['info_dict']:
|
||||
key = event['info_dict']['id']
|
||||
|
||||
status = PPHookStatus(**event)
|
||||
status.register(key, filename, status.filename)
|
||||
|
||||
title = None
|
||||
if 'fulltitle' in event['info_dict']:
|
||||
title = event['info_dict']['fulltitle']
|
||||
@ -91,6 +187,8 @@ def yt_dlp_postprocessor_hook(event):
|
||||
if title:
|
||||
name = f'{key}: {title}'
|
||||
|
||||
status.media_name = name
|
||||
|
||||
if 'started' == event['status']:
|
||||
if 'formats' in event['info_dict']:
|
||||
del event['info_dict']['formats']
|
||||
@ -98,16 +196,23 @@ def yt_dlp_postprocessor_hook(event):
|
||||
del event['info_dict']['automatic_captions']
|
||||
log.debug(repr(event['info_dict']))
|
||||
|
||||
if 'Unknown' != key:
|
||||
status.media_key = key
|
||||
status.task_status = f'[{event["postprocessor"]}: {event["status"]}]'
|
||||
status.update_task()
|
||||
|
||||
log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}')
|
||||
if 'finished' == event['status']:
|
||||
status.cleanup()
|
||||
|
||||
|
||||
progress_hook = {
|
||||
'status': ProgressHookStatus(),
|
||||
progress_hook.update({
|
||||
'class': ProgressHookStatus(),
|
||||
'function': yt_dlp_progress_hook,
|
||||
}
|
||||
})
|
||||
|
||||
postprocessor_hook = {
|
||||
'status': PPHookStatus(),
|
||||
postprocessor_hook.update({
|
||||
'class': PPHookStatus(),
|
||||
'function': yt_dlp_postprocessor_hook,
|
||||
}
|
||||
})
|
||||
|
||||
|
@ -664,6 +664,11 @@ class Media(models.Model):
|
||||
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date',
|
||||
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date',
|
||||
},
|
||||
'timestamp': {
|
||||
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp',
|
||||
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp',
|
||||
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp',
|
||||
},
|
||||
'title': {
|
||||
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title',
|
||||
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title',
|
||||
@ -930,7 +935,7 @@ class Media(models.Model):
|
||||
def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
|
||||
# Trigger an update of derived fields from metadata
|
||||
if self.metadata:
|
||||
self.title = self.metadata_title
|
||||
self.title = self.metadata_title[:200]
|
||||
self.duration = self.metadata_duration
|
||||
if update_fields is not None and "metadata" in update_fields:
|
||||
# If only some fields are being updated, make sure we update title and duration if metadata changes
|
||||
@ -944,7 +949,7 @@ class Media(models.Model):
|
||||
|
||||
def get_metadata_field(self, field):
|
||||
fields = self.METADATA_FIELDS.get(field, {})
|
||||
return fields.get(self.source.source_type, '')
|
||||
return fields.get(self.source.source_type, field)
|
||||
|
||||
def iter_formats(self):
|
||||
for fmt in self.formats:
|
||||
@ -1561,6 +1566,8 @@ class Media(models.Model):
|
||||
if self.downloaded and self.media_file:
|
||||
old_video_path = Path(self.media_file.path)
|
||||
new_video_path = Path(get_media_file_path(self, None))
|
||||
if old_video_path == new_video_path:
|
||||
return
|
||||
if old_video_path.exists() and not new_video_path.exists():
|
||||
old_video_path = old_video_path.resolve(strict=True)
|
||||
|
||||
|
@ -14,7 +14,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
|
||||
map_task_to_instance, check_source_directory_exists,
|
||||
download_media, rescan_media_server, download_source_images,
|
||||
save_all_media_for_source, rename_all_media_for_source,
|
||||
get_media_metadata_task)
|
||||
get_media_metadata_task, get_media_download_task)
|
||||
from .utils import delete_file, glob_quote
|
||||
from .filtering import filter_media
|
||||
|
||||
@ -86,7 +86,7 @@ def source_post_save(sender, instance, created, **kwargs):
|
||||
queue=str(instance.pk),
|
||||
priority=1,
|
||||
verbose_name=verbose_name.format(instance.name),
|
||||
remove_existing_tasks=False
|
||||
remove_existing_tasks=True
|
||||
)
|
||||
verbose_name = _('Checking all media for source "{}"')
|
||||
save_all_media_for_source(
|
||||
@ -156,8 +156,9 @@ def media_post_save(sender, instance, created, **kwargs):
|
||||
post_save.disconnect(media_post_save, sender=Media)
|
||||
instance.save()
|
||||
post_save.connect(media_post_save, sender=Media)
|
||||
existing_media_metadata_task = get_media_metadata_task(str(instance.pk))
|
||||
# If the media is missing metadata schedule it to be downloaded
|
||||
if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk):
|
||||
if not (instance.skip or instance.metadata or existing_media_metadata_task):
|
||||
log.info(f'Scheduling task to download metadata for: {instance.url}')
|
||||
verbose_name = _('Downloading metadata for "{}"')
|
||||
download_media_metadata(
|
||||
@ -183,13 +184,13 @@ def media_post_save(sender, instance, created, **kwargs):
|
||||
verbose_name=verbose_name.format(instance.name),
|
||||
remove_existing_tasks=True
|
||||
)
|
||||
existing_media_download_task = get_media_download_task(str(instance.pk))
|
||||
# If the media has not yet been downloaded schedule it to be downloaded
|
||||
if not instance.media_file_exists:
|
||||
if not (instance.media_file_exists or existing_media_download_task):
|
||||
instance.downloaded = False
|
||||
instance.media_file = None
|
||||
if (not instance.downloaded and instance.can_download and not instance.skip
|
||||
and instance.source.download_media):
|
||||
delete_task_by_media('sync.tasks.download_media', (str(instance.pk),))
|
||||
if (instance.source.download_media and instance.can_download) and not (
|
||||
instance.skip or instance.downloaded or existing_media_download_task):
|
||||
verbose_name = _('Downloading media for "{}"')
|
||||
download_media(
|
||||
str(instance.pk),
|
||||
@ -225,6 +226,11 @@ def media_post_delete(sender, instance, **kwargs):
|
||||
other_path = video_path.with_suffix(f'.{suffix}').resolve()
|
||||
log.info(f'Deleting file for: {instance} path: {other_path!s}')
|
||||
delete_file(other_path)
|
||||
# subtitles include language code
|
||||
subtitle_files = video_path.parent.glob(f'{glob_quote(video_path.with_suffix("").name)}*.vtt')
|
||||
for file in subtitle_files:
|
||||
log.info(f'Deleting file for: {instance} path: {file}')
|
||||
delete_file(file)
|
||||
# Jellyfin creates .trickplay directories and posters
|
||||
for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)):
|
||||
# with_suffix insists on suffix beginning with '.' for no good reason
|
||||
|
@ -10,7 +10,7 @@ import math
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from hashlib import sha1
|
||||
from datetime import timedelta, datetime
|
||||
from datetime import datetime, timedelta, timezone as tz
|
||||
from shutil import copyfile
|
||||
from PIL import Image
|
||||
from django.conf import settings
|
||||
@ -27,7 +27,6 @@ from common.utils import json_serial
|
||||
from .models import Source, Media, MediaServer
|
||||
from .utils import (get_remote_image, resize_image_to_height, delete_file,
|
||||
write_text_file, filter_response)
|
||||
from .filtering import filter_media
|
||||
from .youtube import YouTubeError
|
||||
|
||||
|
||||
@ -202,6 +201,7 @@ def index_source_task(source_id):
|
||||
source.last_crawl = timezone.now()
|
||||
source.save()
|
||||
log.info(f'Found {len(videos)} media items for source: {source}')
|
||||
fields = lambda f, m: m.get_metadata_field(f)
|
||||
for video in videos:
|
||||
# Create or update each video as a Media object
|
||||
key = video.get(source.key_field, None)
|
||||
@ -213,6 +213,18 @@ def index_source_task(source_id):
|
||||
except Media.DoesNotExist:
|
||||
media = Media(key=key)
|
||||
media.source = source
|
||||
media.duration = float(video.get(fields('duration', media), 0)) or None
|
||||
media.title = str(video.get(fields('title', media), ''))[:200]
|
||||
timestamp = video.get(fields('timestamp', media), None)
|
||||
if timestamp is not None:
|
||||
try:
|
||||
timestamp_float = float(timestamp)
|
||||
posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
|
||||
published_dt = posix_epoch + timedelta(seconds=timestamp_float)
|
||||
except Exception as e:
|
||||
log.warn(f'Could not set published for: {source} / {media} with "{e}"')
|
||||
else:
|
||||
media.published = published_dt
|
||||
try:
|
||||
media.save()
|
||||
log.debug(f'Indexed media: {source} / {media}')
|
||||
|
@ -203,22 +203,39 @@ def normalize_codec(codec_str):
|
||||
return result
|
||||
|
||||
|
||||
def list_of_dictionaries(arg_list, arg_function=lambda x: x):
|
||||
assert callable(arg_function)
|
||||
if isinstance(arg_list, list):
|
||||
def _call_func_with_dict(arg_dict):
|
||||
if isinstance(arg_dict, dict):
|
||||
return arg_function(arg_dict)
|
||||
return arg_dict
|
||||
return (True, list(map(_call_func_with_dict, arg_list)),)
|
||||
return (False, arg_list,)
|
||||
|
||||
|
||||
def _url_keys(arg_dict, filter_func):
|
||||
result = {}
|
||||
for key in arg_dict.keys():
|
||||
if 'url' in key:
|
||||
result.update(
|
||||
{key: filter_func(key=key, url=arg_dict[key])}
|
||||
)
|
||||
if isinstance(arg_dict, dict):
|
||||
for key, value in arg_dict.items():
|
||||
if 'url' in key:
|
||||
result.update(
|
||||
{key: filter_func(key=key, url=value)}
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# expects a dictionary where the value at key is a:
|
||||
# list of dictionaries
|
||||
def _drop_url_keys(arg_dict, key, filter_func):
|
||||
def _del_url_keys(_arg_dict):
|
||||
for url_key, remove in _url_keys(_arg_dict, filter_func).items():
|
||||
if remove is True:
|
||||
del _arg_dict[url_key]
|
||||
|
||||
assert isinstance(arg_dict, dict)
|
||||
if key in arg_dict.keys():
|
||||
for val_dict in arg_dict[key]:
|
||||
for url_key, remove in _url_keys(val_dict, filter_func).items():
|
||||
if remove is True:
|
||||
del val_dict[url_key]
|
||||
list_of_dictionaries(arg_dict[key], _del_url_keys)
|
||||
|
||||
|
||||
def filter_response(arg_dict, copy_arg=False):
|
||||
@ -260,13 +277,15 @@ def filter_response(arg_dict, copy_arg=False):
|
||||
'__needs_testing',
|
||||
'__working',
|
||||
))
|
||||
for key in frozenset(('formats', 'requested_formats',)):
|
||||
_drop_url_keys(response_dict, key, drop_format_url)
|
||||
def del_drop_keys(arg_dict):
|
||||
for drop_key in drop_keys:
|
||||
if drop_key in arg_dict.keys():
|
||||
del arg_dict[drop_key]
|
||||
|
||||
for key in ('formats', 'requested_formats',):
|
||||
if key in response_dict.keys():
|
||||
for format in response_dict[key]:
|
||||
for drop_key in drop_keys:
|
||||
if drop_key in format.keys():
|
||||
del format[drop_key]
|
||||
_drop_url_keys(response_dict, key, drop_format_url)
|
||||
list_of_dictionaries(response_dict[key], del_drop_keys)
|
||||
# end of formats cleanup }}}
|
||||
|
||||
# beginning of subtitles cleanup {{{
|
||||
@ -282,12 +301,19 @@ def filter_response(arg_dict, copy_arg=False):
|
||||
)
|
||||
)
|
||||
|
||||
for key in frozenset(('subtitles', 'automatic_captions',)):
|
||||
for key in ('subtitles', 'requested_subtitles', 'automatic_captions',):
|
||||
if key in response_dict.keys():
|
||||
key_dict = response_dict[key]
|
||||
for lang_code in key_dict:
|
||||
_drop_url_keys(key_dict, lang_code, drop_subtitles_url)
|
||||
lang_codes = response_dict[key]
|
||||
if isinstance(lang_codes, dict):
|
||||
for lang_code in lang_codes.keys():
|
||||
_drop_url_keys(lang_codes, lang_code, drop_subtitles_url)
|
||||
# end of subtitles cleanup }}}
|
||||
|
||||
# beginning of heatmap cleanup {{{
|
||||
for key in ('heatmap',):
|
||||
if key in response_dict.keys():
|
||||
del response_dict[key]
|
||||
# end of heatmap cleanup }}}
|
||||
|
||||
return response_dict
|
||||
|
||||
|
@ -143,6 +143,7 @@ def get_media_info(url):
|
||||
'simulate': True,
|
||||
'logger': log,
|
||||
'extract_flat': True,
|
||||
'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
|
||||
})
|
||||
response = {}
|
||||
with yt_dlp.YoutubeDL(opts) as y:
|
||||
@ -224,6 +225,10 @@ def download_media(
|
||||
'sponskrub': False,
|
||||
})
|
||||
|
||||
pp_opts.exec_cmd.update(
|
||||
opts.get('exec_cmd', default_opts.exec_cmd)
|
||||
)
|
||||
|
||||
if skip_sponsors:
|
||||
# Let yt_dlp convert from human for us.
|
||||
pp_opts.sponsorblock_mark = yt_dlp.parse_options(
|
||||
@ -242,7 +247,7 @@ def download_media(
|
||||
'writesubtitles': write_subtitles,
|
||||
'writeautomaticsub': auto_subtitles,
|
||||
'subtitleslangs': sub_langs.split(','),
|
||||
'writethumbnail': True,
|
||||
'writethumbnail': embed_thumbnail,
|
||||
'check_formats': False,
|
||||
'overwrites': None,
|
||||
'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),
|
||||
@ -279,9 +284,11 @@ def download_media(
|
||||
codec_options = list()
|
||||
ofn = ytopts['outtmpl']
|
||||
if 'av1-' in ofn:
|
||||
codec_options = ['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35']
|
||||
codec_options.extend(['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35'])
|
||||
elif 'vp9-' in ofn:
|
||||
codec_options = ['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31']
|
||||
codec_options.extend(['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31', '-row-mt', '1', '-tile-columns', '2'])
|
||||
if '-opus' in ofn:
|
||||
codec_options.extend(['-c:a', 'libopus'])
|
||||
set_ffmpeg_codec = not (
|
||||
ytopts['postprocessor_args'] and
|
||||
ytopts['postprocessor_args']['modifychapters+ffmpeg']
|
||||
|
Loading…
Reference in New Issue
Block a user