Merge branch 'main' into patch-3

This commit is contained in:
tcely 2025-02-11 15:30:14 -05:00 committed by GitHub
commit a7f872e902
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 636 additions and 76 deletions

View File

@ -373,6 +373,10 @@ COPY config/root /
COPY patches/background_task/ \
/usr/local/lib/python3/dist-packages/background_task/
# patch yt_dlp
COPY patches/yt_dlp/ \
/usr/local/lib/python3/dist-packages/yt_dlp/
# Create a healthcheck
HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"]

View File

@ -0,0 +1,393 @@
import copy
import heapq
import itertools
import os
import subprocess
from .common import PostProcessor
from .ffmpeg import (
FFmpegPostProcessor,
FFmpegPostProcessorError,
FFmpegSubtitlesConvertorPP,
)
from .sponsorblock import SponsorBlockPP
from ..utils import (
Popen,
PostProcessingError,
encodeArgument,
orderedSet,
prepend_extension,
shell_quote,
variadic,
)
_TINY_CHAPTER_DURATION = 1
DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
class ModifyChaptersPP(FFmpegPostProcessor):
def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None,
*, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
FFmpegPostProcessor.__init__(self, downloader)
self._remove_chapters_patterns = set(remove_chapters_patterns or [])
self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())
self._ranges_to_remove = set(remove_ranges or [])
self._sponsorblock_chapter_title = sponsorblock_chapter_title
self._force_keyframes = force_keyframes
@PostProcessor._restrict_to(images=False)
def run(self, info):
self._fixup_chapters(info)
# Chapters must be preserved intact when downloading multiple formats of the same video.
chapters, sponsor_chapters = self._mark_chapters_to_remove(
copy.deepcopy(info.get('chapters')) or [],
copy.deepcopy(info.get('sponsorblock_chapters')) or [])
if not chapters and not sponsor_chapters:
return [], info
real_duration = self._get_real_video_duration(info['filepath'])
if not chapters:
chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}]
info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
if not cuts:
return [], info
elif not info['chapters']:
self.report_warning('You have requested to remove the entire video, which is not possible')
return [], info
original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time']
if self._duration_mismatch(real_duration, original_duration, 1):
if not self._duration_mismatch(real_duration, info['duration']):
self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
return [], info
if not info.get('__real_download'):
raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. '
'Different chapters may have already been removed')
else:
self.write_debug('Expected and actual durations mismatch')
concat_opts = self._make_concat_opts(cuts, real_duration)
self.write_debug('Concat spec = {}'.format(', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts)))
def remove_chapters(file, is_sub):
return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub)
in_out_files = [remove_chapters(info['filepath'], False)]
in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info))
# Renaming should only happen after all files are processed
files_to_remove = []
for in_file, out_file in in_out_files:
mtime = os.stat(in_file).st_mtime
uncut_file = prepend_extension(in_file, 'uncut')
os.replace(in_file, uncut_file)
os.replace(out_file, in_file)
self.try_utime(in_file, mtime, mtime)
files_to_remove.append(uncut_file)
return files_to_remove, info
def _mark_chapters_to_remove(self, chapters, sponsor_chapters):
if self._remove_chapters_patterns:
warn_no_chapter_to_remove = True
if not chapters:
self.to_screen('Chapter information is unavailable')
warn_no_chapter_to_remove = False
for c in chapters:
if any(regex.search(c['title']) for regex in self._remove_chapters_patterns):
c['remove'] = True
warn_no_chapter_to_remove = False
if warn_no_chapter_to_remove:
self.to_screen('There are no chapters matching the regex')
if self._remove_sponsor_segments:
warn_no_chapter_to_remove = True
if not sponsor_chapters:
self.to_screen('SponsorBlock information is unavailable')
warn_no_chapter_to_remove = False
for c in sponsor_chapters:
if c['category'] in self._remove_sponsor_segments:
c['remove'] = True
warn_no_chapter_to_remove = False
if warn_no_chapter_to_remove:
self.to_screen('There are no matching SponsorBlock chapters')
sponsor_chapters.extend({
'start_time': start,
'end_time': end,
'category': 'manually_removed',
'_categories': [('manually_removed', start, end, 'Manually removed')],
'remove': True,
} for start, end in self._ranges_to_remove)
return chapters, sponsor_chapters
def _get_supported_subs(self, info):
for sub in (info.get('requested_subtitles') or {}).values():
sub_file = sub.get('filepath')
# The file might have been removed by --embed-subs
if not sub_file or not os.path.exists(sub_file):
continue
ext = sub['ext']
if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS:
self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync')
continue
# TODO: create __real_download for subs?
yield sub_file
def _remove_marked_arrange_sponsors(self, chapters):
# Store cuts separately, since adjacent and overlapping cuts must be merged.
cuts = []
def append_cut(c):
assert 'remove' in c, 'Not a cut is appended to cuts'
last_to_cut = cuts[-1] if cuts else None
if last_to_cut and last_to_cut['end_time'] >= c['start_time']:
last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time'])
else:
cuts.append(c)
return len(cuts) - 1
def excess_duration(c):
# Cuts that are completely within the chapter reduce chapters' duration.
# Since cuts can overlap, excess duration may be less that the sum of cuts' durations.
# To avoid that, chapter stores the index to the fist cut within the chapter,
# instead of storing excess duration. append_cut ensures that subsequent cuts (if any)
# will be merged with previous ones (if necessary).
cut_idx, excess = c.pop('cut_idx', len(cuts)), 0
while cut_idx < len(cuts):
cut = cuts[cut_idx]
if cut['start_time'] >= c['end_time']:
break
if cut['end_time'] > c['start_time']:
excess += min(cut['end_time'], c['end_time'])
excess -= max(cut['start_time'], c['start_time'])
cut_idx += 1
return excess
new_chapters = []
def append_chapter(c):
assert 'remove' not in c, 'Cut is appended to chapters'
length = c['end_time'] - c['start_time'] - excess_duration(c)
# Chapter is completely covered by cuts or sponsors.
if length <= 0:
return
start = new_chapters[-1]['end_time'] if new_chapters else 0
c.update(start_time=start, end_time=start + length)
new_chapters.append(c)
# Turn into a priority queue, index is a tie breaker.
# Plain stack sorted by start_time is not enough: after splitting the chapter,
# the part returned to the stack is not guaranteed to have start_time
# less than or equal to the that of the stack's head.
chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)]
heapq.heapify(chapters)
_, cur_i, cur_chapter = heapq.heappop(chapters)
while chapters:
_, i, c = heapq.heappop(chapters)
# Non-overlapping chapters or cuts can be appended directly. However,
# adjacent non-overlapping cuts must be merged, which is handled by append_cut.
if cur_chapter['end_time'] <= c['start_time']:
(append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
cur_i, cur_chapter = i, c
continue
# Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor),
# (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor),
# (sponsor, normal), and (normal, sponsor). There is no (normal, normal):
# normal chapters are assumed not to overlap.
if 'remove' in cur_chapter:
# (cut, cut): adjust end_time.
if 'remove' in c:
cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time'])
# (cut, sponsor/normal): chop the beginning of the later chapter
# (if it's not completely hidden by the cut). Push to the priority queue
# to restore sorting by start_time: with beginning chopped, c may actually
# start later than the remaining chapters from the queue.
elif cur_chapter['end_time'] < c['end_time']:
c['start_time'] = cur_chapter['end_time']
c['_was_cut'] = True
heapq.heappush(chapters, (c['start_time'], i, c))
# (sponsor/normal, cut).
elif 'remove' in c:
cur_chapter['_was_cut'] = True
# Chop the end of the current chapter if the cut is not contained within it.
# Chopping the end doesn't break start_time sorting, no PQ push is necessary.
if cur_chapter['end_time'] <= c['end_time']:
cur_chapter['end_time'] = c['start_time']
append_chapter(cur_chapter)
cur_i, cur_chapter = i, c
continue
# Current chapter contains the cut within it. If the current chapter is
# a sponsor chapter, check whether the categories before and after the cut differ.
if '_categories' in cur_chapter:
after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[])
cur_cats = []
for cat_start_end in cur_chapter['_categories']:
if cat_start_end[1] < c['start_time']:
cur_cats.append(cat_start_end)
if cat_start_end[2] > c['end_time']:
after_c['_categories'].append(cat_start_end)
cur_chapter['_categories'] = cur_cats
if cur_chapter['_categories'] != after_c['_categories']:
# Categories before and after the cut differ: push the after part to PQ.
heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
cur_chapter['end_time'] = c['start_time']
append_chapter(cur_chapter)
cur_i, cur_chapter = i, c
continue
# Either sponsor categories before and after the cut are the same or
# we're dealing with a normal chapter. Just register an outstanding cut:
# subsequent append_chapter will reduce the duration.
cur_chapter.setdefault('cut_idx', append_cut(c))
# (sponsor, normal): if a normal chapter is not completely overlapped,
# chop the beginning of it and push it to PQ.
elif '_categories' in cur_chapter and '_categories' not in c:
if cur_chapter['end_time'] < c['end_time']:
c['start_time'] = cur_chapter['end_time']
c['_was_cut'] = True
heapq.heappush(chapters, (c['start_time'], i, c))
# (normal, sponsor) and (sponsor, sponsor)
else:
assert '_categories' in c, 'Normal chapters overlap'
cur_chapter['_was_cut'] = True
c['_was_cut'] = True
# Push the part after the sponsor to PQ.
if cur_chapter['end_time'] > c['end_time']:
# deepcopy to make categories in after_c and cur_chapter/c refer to different lists.
after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time'])
heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
# Push the part after the overlap to PQ.
elif c['end_time'] > cur_chapter['end_time']:
after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time'])
heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur))
c['end_time'] = cur_chapter['end_time']
# (sponsor, sponsor): merge categories in the overlap.
if '_categories' in cur_chapter:
c['_categories'] = cur_chapter['_categories'] + c['_categories']
# Inherit the cuts that the current chapter has accumulated within it.
if 'cut_idx' in cur_chapter:
c['cut_idx'] = cur_chapter['cut_idx']
cur_chapter['end_time'] = c['start_time']
append_chapter(cur_chapter)
cur_i, cur_chapter = i, c
(append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
return self._remove_tiny_rename_sponsors(new_chapters), cuts
def _remove_tiny_rename_sponsors(self, chapters):
new_chapters = []
for i, c in enumerate(chapters):
# Merge with the previous/next if the chapter is tiny.
# Only tiny chapters resulting from a cut can be skipped.
# Chapters that were already tiny in the original list will be preserved.
if (('_was_cut' in c or '_categories' in c)
and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION):
if not new_chapters:
# Prepend tiny chapter to the next one if possible.
if i < len(chapters) - 1:
chapters[i + 1]['start_time'] = c['start_time']
continue
else:
old_c = new_chapters[-1]
if i < len(chapters) - 1:
next_c = chapters[i + 1]
# Not a typo: key names in old_c and next_c are really different.
prev_is_sponsor = 'categories' in old_c
next_is_sponsor = '_categories' in next_c
# Preferentially prepend tiny normals to normals and sponsors to sponsors.
if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor)
or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)):
next_c['start_time'] = c['start_time']
continue
old_c['end_time'] = c['end_time']
continue
c.pop('_was_cut', None)
cats = c.pop('_categories', None)
if cats:
category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1])
c.update({
'category': category,
'categories': orderedSet(x[0] for x in cats),
'name': category_name,
'category_names': orderedSet(x[3] for x in cats),
})
c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy())
# Merge identically named sponsors.
if (new_chapters and 'categories' in new_chapters[-1]
and new_chapters[-1]['title'] == c['title']):
new_chapters[-1]['end_time'] = c['end_time']
continue
new_chapters.append(c)
return new_chapters
def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False):
in_file = filename
out_file = prepend_extension(in_file, 'temp')
if force_keyframes:
in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time'])))
self.to_screen(f'Removing chapters from {filename}')
self.concat_files([in_file] * len(concat_opts), out_file, concat_opts)
if in_file != filename:
self._delete_downloaded_files(in_file, msg=None)
return out_file
# override to change the args ordering
def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
self.check_version()
oldest_mtime = min(
os.stat(path).st_mtime for path, _ in input_path_opts if path)
cmd = [self.executable, encodeArgument('-y')]
# avconv does not have repeat option
if self.basename == 'ffmpeg':
cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
def make_args(file, args, name, number):
keys = [f'_{name}{number}', f'_{name}']
if name == 'o':
args += ['-movflags', '+faststart']
if number == 1:
keys.append('')
args = self._configuration_args(self.basename, keys) + args
if name == 'i':
args.append('-i')
return (
[encodeArgument(arg) for arg in args]
+ [self._ffmpeg_filename_argument(file)])
for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
cmd += itertools.chain.from_iterable(
make_args(path, list(opts), arg_type, i + 1)
for i, (path, opts) in enumerate(path_opts) if path)
self.write_debug(f'ffmpeg command line: {shell_quote(cmd)}')
_, stderr, returncode = Popen.run(
cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
if returncode not in variadic(expected_retcodes):
self.write_debug(stderr)
raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
for out_path, _ in output_path_opts:
if out_path:
self.try_utime(out_path, oldest_mtime, oldest_mtime)
return stderr
@staticmethod
def _make_concat_opts(chapters_to_remove, duration):
opts = [{}]
for s in chapters_to_remove:
# Do not create 0 duration chunk at the beginning.
if s['start_time'] == 0:
opts[-1]['inpoint'] = f'{s["end_time"]:.6f}'
continue
opts[-1]['outpoint'] = f'{s["start_time"]:.6f}'
# Do not create 0 duration chunk at the end.
if s['end_time'] < duration:
opts.append({'inpoint': f'{s["end_time"]:.6f}'})
return opts

View File

@ -5,83 +5,179 @@ from common.logger import log
from django.conf import settings
class ProgressHookStatus:
progress_hook = {
'status': dict(),
}
postprocessor_hook = {
'status': dict(),
}
class BaseStatus:
status_dict = dict()
valid = set()
@classmethod
def get(cls, key):
return cls.status_dict.get(key, None)
@classmethod
def valid_status(cls, status):
return status in cls.valid
def __init__(self, hook_status_dict=None):
self.media_key = None
self.task_status = '[Started: 0%]'
self.task_verbose_name = None
self._status_dict = hook_status_dict or self.status_dict
self._registered_keys = set()
def register(self, *args):
additions = dict()
for key in args:
if key is not None:
self._registered_keys.add(key)
additions[key] = self
self._status_dict.update(additions)
def cleanup(self):
for key in self._registered_keys:
if key in self._status_dict:
del self._status_dict[key]
def update_task(self):
if self.media_key is None:
return
from .models import Media
from .tasks import get_media_download_task
media = Media.objects.get(key=self.media_key)
task = get_media_download_task(str(media.pk))
if task:
if self.task_verbose_name is None:
# clean up any previously prepended task_status
# this happened because of duplicated tasks on my test system
s = task.verbose_name
cleaned = s[1+s.find(' Downloading '):]
self.task_verbose_name = cleaned
task.verbose_name = f'{self.task_status} {self.task_verbose_name}'
task.save()
class ProgressHookStatus(BaseStatus):
status_dict = progress_hook['status']
valid = frozenset((
'downloading',
'finished',
'error',
))
def __init__(self):
def __init__(self, *args, status=None, info_dict={}, filename=None, **kwargs):
super().__init__(self.status_dict)
self.filename = filename
self.info = info_dict
self.status = status
self.download_progress = 0
class PPHookStatus:
def next_progress(self):
if 0 == self.download_progress:
return 0
return 1 + self.download_progress
class PPHookStatus(BaseStatus):
status_dict = postprocessor_hook['status']
valid = frozenset((
'started',
'processing',
'finished',
))
def __init__(self, *args, status=None, postprocessor=None, info_dict={}, **kwargs):
def __init__(self, *args, status=None, postprocessor=None, info_dict={}, filename=None, **kwargs):
super().__init__(self.status_dict)
self.filename = filename
self.info = info_dict
self.media_name = None
self.name = postprocessor
self.status = status
def yt_dlp_progress_hook(event):
hook = progress_hook.get('status', None)
filename = os.path.basename(event['filename'])
if hook is None:
log.error('yt_dlp_progress_hook: failed to get hook status object')
if not ProgressHookStatus.valid_status(event['status']):
log.warn(f'[youtube-dl] unknown progress event: {str(event)}')
return None
if event['status'] not in ProgressHookStatus.valid:
log.warn(f'[youtube-dl] unknown event: {str(event)}')
return None
key = None
if 'display_id' in event['info_dict']:
key = event['info_dict']['display_id']
elif 'id' in event['info_dict']:
key = event['info_dict']['id']
if event.get('downloaded_bytes') is None or event.get('total_bytes') is None:
return None
if event['status'] == 'error':
filename = os.path.basename(event.get('filename', '???'))
if 'error' == event['status']:
log.error(f'[youtube-dl] error occured downloading: {filename}')
elif event['status'] == 'downloading':
downloaded_bytes = event.get('downloaded_bytes', 0)
total_bytes = event.get('total_bytes', 0)
elif 'downloading' == event['status']:
# get or create the status for filename
status = ProgressHookStatus.get(filename)
if status is None:
status = ProgressHookStatus(**event)
status.register(key, filename, status.filename)
downloaded_bytes = event.get('downloaded_bytes', 0) or 0
total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0
total_bytes = event.get('total_bytes', 0) or total_bytes_estimate
fragment_index = event.get('fragment_index', 0) or 0
fragment_count = event.get('fragment_count', 0) or 0
eta = event.get('_eta_str', '?').strip()
percent_done = event.get('_percent_str', '?').strip()
percent_str = event.get('_percent_str', '?').strip()
speed = event.get('_speed_str', '?').strip()
total = event.get('_total_bytes_str', '?').strip()
if downloaded_bytes > 0 and total_bytes > 0:
p = round((event['downloaded_bytes'] / event['total_bytes']) * 100)
if (p % 5 == 0) and p > hook.download_progress:
hook.download_progress = p
log.info(f'[youtube-dl] downloading: {filename} - {percent_done} '
f'of {total} at {speed}, {eta} remaining')
else:
# No progress to monitor, just spam every 10 download messages instead
hook.download_progress += 1
if hook.download_progress % 10 == 0:
log.info(f'[youtube-dl] downloading: {filename} - {percent_done} '
f'of {total} at {speed}, {eta} remaining')
elif event['status'] == 'finished':
percent = None
try:
percent = int(float(percent_str.rstrip('%')))
except:
pass
if fragment_index >= 0 and fragment_count > 0:
percent = round(100 * fragment_index / fragment_count)
percent_str = f'{percent}%'
elif downloaded_bytes >= 0 and total_bytes > 0:
percent = round(100 * downloaded_bytes / total_bytes)
if percent and (status.next_progress() < percent) and (0 == percent % 5):
status.download_progress = percent
if key:
status.media_key = key
status.task_status = f'[downloading: {percent_str}]'
status.update_task()
log.info(f'[youtube-dl] downloading: {filename} - {percent_str} '
f'of {total} at {speed}, {eta} remaining')
elif 'finished' == event['status']:
# update the status for filename to the finished value
status = ProgressHookStatus.get(filename)
if status is None:
status = ProgressHookStatus(**event)
status.register(key, filename, status.filename)
status.download_progress = 100
total_size_str = event.get('_total_bytes_str', '?').strip()
elapsed_str = event.get('_elapsed_str', '?').strip()
log.info(f'[youtube-dl] finished downloading: {filename} - '
f'{total_size_str} in {elapsed_str}')
status.cleanup()
def yt_dlp_postprocessor_hook(event):
if event['status'] not in PPHookStatus.valid:
log.warn(f'[youtube-dl] unknown event: {str(event)}')
if not PPHookStatus.valid_status(event['status']):
log.warn(f'[youtube-dl] unknown postprocessor event: {str(event)}')
return None
postprocessor_hook['status'] = PPHookStatus(*event)
name = key = 'Unknown'
filename = os.path.basename(event.get('filename', '???'))
if 'display_id' in event['info_dict']:
key = event['info_dict']['display_id']
elif 'id' in event['info_dict']:
key = event['info_dict']['id']
status = PPHookStatus(**event)
status.register(key, filename, status.filename)
title = None
if 'fulltitle' in event['info_dict']:
title = event['info_dict']['fulltitle']
@ -91,6 +187,8 @@ def yt_dlp_postprocessor_hook(event):
if title:
name = f'{key}: {title}'
status.media_name = name
if 'started' == event['status']:
if 'formats' in event['info_dict']:
del event['info_dict']['formats']
@ -98,16 +196,23 @@ def yt_dlp_postprocessor_hook(event):
del event['info_dict']['automatic_captions']
log.debug(repr(event['info_dict']))
if 'Unknown' != key:
status.media_key = key
status.task_status = f'[{event["postprocessor"]}: {event["status"]}]'
status.update_task()
log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}')
if 'finished' == event['status']:
status.cleanup()
progress_hook = {
'status': ProgressHookStatus(),
progress_hook.update({
'class': ProgressHookStatus(),
'function': yt_dlp_progress_hook,
}
})
postprocessor_hook = {
'status': PPHookStatus(),
postprocessor_hook.update({
'class': PPHookStatus(),
'function': yt_dlp_postprocessor_hook,
}
})

View File

@ -664,6 +664,11 @@ class Media(models.Model):
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date',
},
'timestamp': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp',
},
'title': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title',
@ -930,7 +935,7 @@ class Media(models.Model):
def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
# Trigger an update of derived fields from metadata
if self.metadata:
self.title = self.metadata_title
self.title = self.metadata_title[:200]
self.duration = self.metadata_duration
if update_fields is not None and "metadata" in update_fields:
# If only some fields are being updated, make sure we update title and duration if metadata changes
@ -944,7 +949,7 @@ class Media(models.Model):
def get_metadata_field(self, field):
fields = self.METADATA_FIELDS.get(field, {})
return fields.get(self.source.source_type, '')
return fields.get(self.source.source_type, field)
def iter_formats(self):
for fmt in self.formats:
@ -1561,6 +1566,8 @@ class Media(models.Model):
if self.downloaded and self.media_file:
old_video_path = Path(self.media_file.path)
new_video_path = Path(get_media_file_path(self, None))
if old_video_path == new_video_path:
return
if old_video_path.exists() and not new_video_path.exists():
old_video_path = old_video_path.resolve(strict=True)

View File

@ -14,7 +14,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
map_task_to_instance, check_source_directory_exists,
download_media, rescan_media_server, download_source_images,
save_all_media_for_source, rename_all_media_for_source,
get_media_metadata_task)
get_media_metadata_task, get_media_download_task)
from .utils import delete_file, glob_quote
from .filtering import filter_media
@ -86,7 +86,7 @@ def source_post_save(sender, instance, created, **kwargs):
queue=str(instance.pk),
priority=1,
verbose_name=verbose_name.format(instance.name),
remove_existing_tasks=False
remove_existing_tasks=True
)
verbose_name = _('Checking all media for source "{}"')
save_all_media_for_source(
@ -156,8 +156,9 @@ def media_post_save(sender, instance, created, **kwargs):
post_save.disconnect(media_post_save, sender=Media)
instance.save()
post_save.connect(media_post_save, sender=Media)
existing_media_metadata_task = get_media_metadata_task(str(instance.pk))
# If the media is missing metadata schedule it to be downloaded
if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk):
if not (instance.skip or instance.metadata or existing_media_metadata_task):
log.info(f'Scheduling task to download metadata for: {instance.url}')
verbose_name = _('Downloading metadata for "{}"')
download_media_metadata(
@ -183,13 +184,13 @@ def media_post_save(sender, instance, created, **kwargs):
verbose_name=verbose_name.format(instance.name),
remove_existing_tasks=True
)
existing_media_download_task = get_media_download_task(str(instance.pk))
# If the media has not yet been downloaded schedule it to be downloaded
if not instance.media_file_exists:
if not (instance.media_file_exists or existing_media_download_task):
instance.downloaded = False
instance.media_file = None
if (not instance.downloaded and instance.can_download and not instance.skip
and instance.source.download_media):
delete_task_by_media('sync.tasks.download_media', (str(instance.pk),))
if (instance.source.download_media and instance.can_download) and not (
instance.skip or instance.downloaded or existing_media_download_task):
verbose_name = _('Downloading media for "{}"')
download_media(
str(instance.pk),
@ -225,6 +226,11 @@ def media_post_delete(sender, instance, **kwargs):
other_path = video_path.with_suffix(f'.{suffix}').resolve()
log.info(f'Deleting file for: {instance} path: {other_path!s}')
delete_file(other_path)
# subtitles include language code
subtitle_files = video_path.parent.glob(f'{glob_quote(video_path.with_suffix("").name)}*.vtt')
for file in subtitle_files:
log.info(f'Deleting file for: {instance} path: {file}')
delete_file(file)
# Jellyfin creates .trickplay directories and posters
for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)):
# with_suffix insists on suffix beginning with '.' for no good reason

View File

@ -10,7 +10,7 @@ import math
import uuid
from io import BytesIO
from hashlib import sha1
from datetime import timedelta, datetime
from datetime import datetime, timedelta, timezone as tz
from shutil import copyfile
from PIL import Image
from django.conf import settings
@ -27,7 +27,6 @@ from common.utils import json_serial
from .models import Source, Media, MediaServer
from .utils import (get_remote_image, resize_image_to_height, delete_file,
write_text_file, filter_response)
from .filtering import filter_media
from .youtube import YouTubeError
@ -202,6 +201,7 @@ def index_source_task(source_id):
source.last_crawl = timezone.now()
source.save()
log.info(f'Found {len(videos)} media items for source: {source}')
fields = lambda f, m: m.get_metadata_field(f)
for video in videos:
# Create or update each video as a Media object
key = video.get(source.key_field, None)
@ -213,6 +213,18 @@ def index_source_task(source_id):
except Media.DoesNotExist:
media = Media(key=key)
media.source = source
media.duration = float(video.get(fields('duration', media), 0)) or None
media.title = str(video.get(fields('title', media), ''))[:200]
timestamp = video.get(fields('timestamp', media), None)
if timestamp is not None:
try:
timestamp_float = float(timestamp)
posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
published_dt = posix_epoch + timedelta(seconds=timestamp_float)
except Exception as e:
log.warn(f'Could not set published for: {source} / {media} with "{e}"')
else:
media.published = published_dt
try:
media.save()
log.debug(f'Indexed media: {source} / {media}')

View File

@ -203,22 +203,39 @@ def normalize_codec(codec_str):
return result
def list_of_dictionaries(arg_list, arg_function=lambda x: x):
assert callable(arg_function)
if isinstance(arg_list, list):
def _call_func_with_dict(arg_dict):
if isinstance(arg_dict, dict):
return arg_function(arg_dict)
return arg_dict
return (True, list(map(_call_func_with_dict, arg_list)),)
return (False, arg_list,)
def _url_keys(arg_dict, filter_func):
result = {}
for key in arg_dict.keys():
if 'url' in key:
result.update(
{key: filter_func(key=key, url=arg_dict[key])}
)
if isinstance(arg_dict, dict):
for key, value in arg_dict.items():
if 'url' in key:
result.update(
{key: filter_func(key=key, url=value)}
)
return result
# expects a dictionary where the value at key is a:
# list of dictionaries
def _drop_url_keys(arg_dict, key, filter_func):
def _del_url_keys(_arg_dict):
for url_key, remove in _url_keys(_arg_dict, filter_func).items():
if remove is True:
del _arg_dict[url_key]
assert isinstance(arg_dict, dict)
if key in arg_dict.keys():
for val_dict in arg_dict[key]:
for url_key, remove in _url_keys(val_dict, filter_func).items():
if remove is True:
del val_dict[url_key]
list_of_dictionaries(arg_dict[key], _del_url_keys)
def filter_response(arg_dict, copy_arg=False):
@ -260,13 +277,15 @@ def filter_response(arg_dict, copy_arg=False):
'__needs_testing',
'__working',
))
for key in frozenset(('formats', 'requested_formats',)):
_drop_url_keys(response_dict, key, drop_format_url)
def del_drop_keys(arg_dict):
for drop_key in drop_keys:
if drop_key in arg_dict.keys():
del arg_dict[drop_key]
for key in ('formats', 'requested_formats',):
if key in response_dict.keys():
for format in response_dict[key]:
for drop_key in drop_keys:
if drop_key in format.keys():
del format[drop_key]
_drop_url_keys(response_dict, key, drop_format_url)
list_of_dictionaries(response_dict[key], del_drop_keys)
# end of formats cleanup }}}
# beginning of subtitles cleanup {{{
@ -282,12 +301,19 @@ def filter_response(arg_dict, copy_arg=False):
)
)
for key in frozenset(('subtitles', 'automatic_captions',)):
for key in ('subtitles', 'requested_subtitles', 'automatic_captions',):
if key in response_dict.keys():
key_dict = response_dict[key]
for lang_code in key_dict:
_drop_url_keys(key_dict, lang_code, drop_subtitles_url)
lang_codes = response_dict[key]
if isinstance(lang_codes, dict):
for lang_code in lang_codes.keys():
_drop_url_keys(lang_codes, lang_code, drop_subtitles_url)
# end of subtitles cleanup }}}
# beginning of heatmap cleanup {{{
for key in ('heatmap',):
if key in response_dict.keys():
del response_dict[key]
# end of heatmap cleanup }}}
return response_dict

View File

@ -143,6 +143,7 @@ def get_media_info(url):
'simulate': True,
'logger': log,
'extract_flat': True,
'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
})
response = {}
with yt_dlp.YoutubeDL(opts) as y:
@ -224,6 +225,10 @@ def download_media(
'sponskrub': False,
})
pp_opts.exec_cmd.update(
opts.get('exec_cmd', default_opts.exec_cmd)
)
if skip_sponsors:
# Let yt_dlp convert from human for us.
pp_opts.sponsorblock_mark = yt_dlp.parse_options(
@ -242,7 +247,7 @@ def download_media(
'writesubtitles': write_subtitles,
'writeautomaticsub': auto_subtitles,
'subtitleslangs': sub_langs.split(','),
'writethumbnail': True,
'writethumbnail': embed_thumbnail,
'check_formats': False,
'overwrites': None,
'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),
@ -279,9 +284,11 @@ def download_media(
codec_options = list()
ofn = ytopts['outtmpl']
if 'av1-' in ofn:
codec_options = ['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35']
codec_options.extend(['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35'])
elif 'vp9-' in ofn:
codec_options = ['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31']
codec_options.extend(['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31', '-row-mt', '1', '-tile-columns', '2'])
if '-opus' in ofn:
codec_options.extend(['-c:a', 'libopus'])
set_ffmpeg_codec = not (
ytopts['postprocessor_args'] and
ytopts['postprocessor_args']['modifychapters+ffmpeg']