Merge branch 'main' into patch-3

2025-06-20 12:06:35 +00:00 · 2025-02-11 15:30:14 -05:00 · 2025-02-11 15:30:14 -05:00 · a7f872e902
commit a7f872e902
parent c4a4b7fe1b b87c1253a0
8 changed files with 636 additions and 76 deletions
--- a/4
+++ b/4
@ -373,6 +373,10 @@ COPY config/root /
 COPY patches/background_task/ \
    /usr/local/lib/python3/dist-packages/background_task/

+# patch yt_dlp
+COPY patches/yt_dlp/ \
+    /usr/local/lib/python3/dist-packages/yt_dlp/
+
 # Create a healthcheck
 HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"]

--- a/patches/yt_dlp/postprocessor/modify_chapters.py
+++ b/patches/yt_dlp/postprocessor/modify_chapters.py
@ -0,0 +1,393 @@
+import copy
+import heapq
+import itertools
+import os
+import subprocess
+
+from .common import PostProcessor
+from .ffmpeg import (
+    FFmpegPostProcessor,
+    FFmpegPostProcessorError,
+    FFmpegSubtitlesConvertorPP,
+)
+from .sponsorblock import SponsorBlockPP
+from ..utils import (
+    Popen,
+    PostProcessingError,
+    encodeArgument,
+    orderedSet,
+    prepend_extension,
+    shell_quote,
+    variadic,
+)
+
+_TINY_CHAPTER_DURATION = 1
+DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
+
+
+class ModifyChaptersPP(FFmpegPostProcessor):
+    def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None,
+                 *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
+        FFmpegPostProcessor.__init__(self, downloader)
+        self._remove_chapters_patterns = set(remove_chapters_patterns or [])
+        self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())
+        self._ranges_to_remove = set(remove_ranges or [])
+        self._sponsorblock_chapter_title = sponsorblock_chapter_title
+        self._force_keyframes = force_keyframes
+
+    @PostProcessor._restrict_to(images=False)
+    def run(self, info):
+        self._fixup_chapters(info)
+        # Chapters must be preserved intact when downloading multiple formats of the same video.
+        chapters, sponsor_chapters = self._mark_chapters_to_remove(
+            copy.deepcopy(info.get('chapters')) or [],
+            copy.deepcopy(info.get('sponsorblock_chapters')) or [])
+        if not chapters and not sponsor_chapters:
+            return [], info
+
+        real_duration = self._get_real_video_duration(info['filepath'])
+        if not chapters:
+            chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}]
+
+        info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
+        if not cuts:
+            return [], info
+        elif not info['chapters']:
+            self.report_warning('You have requested to remove the entire video, which is not possible')
+            return [], info
+
+        original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time']
+        if self._duration_mismatch(real_duration, original_duration, 1):
+            if not self._duration_mismatch(real_duration, info['duration']):
+                self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
+                return [], info
+            if not info.get('__real_download'):
+                raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. '
+                                          'Different chapters may have already been removed')
+            else:
+                self.write_debug('Expected and actual durations mismatch')
+
+        concat_opts = self._make_concat_opts(cuts, real_duration)
+        self.write_debug('Concat spec = {}'.format(', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts)))
+
+        def remove_chapters(file, is_sub):
+            return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub)
+
+        in_out_files = [remove_chapters(info['filepath'], False)]
+        in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info))
+
+        # Renaming should only happen after all files are processed
+        files_to_remove = []
+        for in_file, out_file in in_out_files:
+            mtime = os.stat(in_file).st_mtime
+            uncut_file = prepend_extension(in_file, 'uncut')
+            os.replace(in_file, uncut_file)
+            os.replace(out_file, in_file)
+            self.try_utime(in_file, mtime, mtime)
+            files_to_remove.append(uncut_file)
+
+        return files_to_remove, info
+
+    def _mark_chapters_to_remove(self, chapters, sponsor_chapters):
+        if self._remove_chapters_patterns:
+            warn_no_chapter_to_remove = True
+            if not chapters:
+                self.to_screen('Chapter information is unavailable')
+                warn_no_chapter_to_remove = False
+            for c in chapters:
+                if any(regex.search(c['title']) for regex in self._remove_chapters_patterns):
+                    c['remove'] = True
+                    warn_no_chapter_to_remove = False
+            if warn_no_chapter_to_remove:
+                self.to_screen('There are no chapters matching the regex')
+
+        if self._remove_sponsor_segments:
+            warn_no_chapter_to_remove = True
+            if not sponsor_chapters:
+                self.to_screen('SponsorBlock information is unavailable')
+                warn_no_chapter_to_remove = False
+            for c in sponsor_chapters:
+                if c['category'] in self._remove_sponsor_segments:
+                    c['remove'] = True
+                    warn_no_chapter_to_remove = False
+            if warn_no_chapter_to_remove:
+                self.to_screen('There are no matching SponsorBlock chapters')
+
+        sponsor_chapters.extend({
+            'start_time': start,
+            'end_time': end,
+            'category': 'manually_removed',
+            '_categories': [('manually_removed', start, end, 'Manually removed')],
+            'remove': True,
+        } for start, end in self._ranges_to_remove)
+
+        return chapters, sponsor_chapters
+
+    def _get_supported_subs(self, info):
+        for sub in (info.get('requested_subtitles') or {}).values():
+            sub_file = sub.get('filepath')
+            # The file might have been removed by --embed-subs
+            if not sub_file or not os.path.exists(sub_file):
+                continue
+            ext = sub['ext']
+            if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS:
+                self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync')
+                continue
+            # TODO: create __real_download for subs?
+            yield sub_file
+
+    def _remove_marked_arrange_sponsors(self, chapters):
+        # Store cuts separately, since adjacent and overlapping cuts must be merged.
+        cuts = []
+
+        def append_cut(c):
+            assert 'remove' in c, 'Not a cut is appended to cuts'
+            last_to_cut = cuts[-1] if cuts else None
+            if last_to_cut and last_to_cut['end_time'] >= c['start_time']:
+                last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time'])
+            else:
+                cuts.append(c)
+            return len(cuts) - 1
+
+        def excess_duration(c):
+            # Cuts that are completely within the chapter reduce chapters' duration.
+            # Since cuts can overlap, excess duration may be less that the sum of cuts' durations.
+            # To avoid that, chapter stores the index to the fist cut within the chapter,
+            # instead of storing excess duration. append_cut ensures that subsequent cuts (if any)
+            # will be merged with previous ones (if necessary).
+            cut_idx, excess = c.pop('cut_idx', len(cuts)), 0
+            while cut_idx < len(cuts):
+                cut = cuts[cut_idx]
+                if cut['start_time'] >= c['end_time']:
+                    break
+                if cut['end_time'] > c['start_time']:
+                    excess += min(cut['end_time'], c['end_time'])
+                    excess -= max(cut['start_time'], c['start_time'])
+                cut_idx += 1
+            return excess
+
+        new_chapters = []
+
+        def append_chapter(c):
+            assert 'remove' not in c, 'Cut is appended to chapters'
+            length = c['end_time'] - c['start_time'] - excess_duration(c)
+            # Chapter is completely covered by cuts or sponsors.
+            if length <= 0:
+                return
+            start = new_chapters[-1]['end_time'] if new_chapters else 0
+            c.update(start_time=start, end_time=start + length)
+            new_chapters.append(c)
+
+        # Turn into a priority queue, index is a tie breaker.
+        # Plain stack sorted by start_time is not enough: after splitting the chapter,
+        # the part returned to the stack is not guaranteed to have start_time
+        # less than or equal to the that of the stack's head.
+        chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)]
+        heapq.heapify(chapters)
+
+        _, cur_i, cur_chapter = heapq.heappop(chapters)
+        while chapters:
+            _, i, c = heapq.heappop(chapters)
+            # Non-overlapping chapters or cuts can be appended directly. However,
+            # adjacent non-overlapping cuts must be merged, which is handled by append_cut.
+            if cur_chapter['end_time'] <= c['start_time']:
+                (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
+                cur_i, cur_chapter = i, c
+                continue
+
+            # Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor),
+            # (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor),
+            # (sponsor, normal), and (normal, sponsor). There is no (normal, normal):
+            # normal chapters are assumed not to overlap.
+            if 'remove' in cur_chapter:
+                # (cut, cut): adjust end_time.
+                if 'remove' in c:
+                    cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time'])
+                # (cut, sponsor/normal): chop the beginning of the later chapter
+                # (if it's not completely hidden by the cut). Push to the priority queue
+                # to restore sorting by start_time: with beginning chopped, c may actually
+                # start later than the remaining chapters from the queue.
+                elif cur_chapter['end_time'] < c['end_time']:
+                    c['start_time'] = cur_chapter['end_time']
+                    c['_was_cut'] = True
+                    heapq.heappush(chapters, (c['start_time'], i, c))
+            # (sponsor/normal, cut).
+            elif 'remove' in c:
+                cur_chapter['_was_cut'] = True
+                # Chop the end of the current chapter if the cut is not contained within it.
+                # Chopping the end doesn't break start_time sorting, no PQ push is necessary.
+                if cur_chapter['end_time'] <= c['end_time']:
+                    cur_chapter['end_time'] = c['start_time']
+                    append_chapter(cur_chapter)
+                    cur_i, cur_chapter = i, c
+                    continue
+                # Current chapter contains the cut within it. If the current chapter is
+                # a sponsor chapter, check whether the categories before and after the cut differ.
+                if '_categories' in cur_chapter:
+                    after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[])
+                    cur_cats = []
+                    for cat_start_end in cur_chapter['_categories']:
+                        if cat_start_end[1] < c['start_time']:
+                            cur_cats.append(cat_start_end)
+                        if cat_start_end[2] > c['end_time']:
+                            after_c['_categories'].append(cat_start_end)
+                    cur_chapter['_categories'] = cur_cats
+                    if cur_chapter['_categories'] != after_c['_categories']:
+                        # Categories before and after the cut differ: push the after part to PQ.
+                        heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
+                        cur_chapter['end_time'] = c['start_time']
+                        append_chapter(cur_chapter)
+                        cur_i, cur_chapter = i, c
+                        continue
+                # Either sponsor categories before and after the cut are the same or
+                # we're dealing with a normal chapter. Just register an outstanding cut:
+                # subsequent append_chapter will reduce the duration.
+                cur_chapter.setdefault('cut_idx', append_cut(c))
+            # (sponsor, normal): if a normal chapter is not completely overlapped,
+            # chop the beginning of it and push it to PQ.
+            elif '_categories' in cur_chapter and '_categories' not in c:
+                if cur_chapter['end_time'] < c['end_time']:
+                    c['start_time'] = cur_chapter['end_time']
+                    c['_was_cut'] = True
+                    heapq.heappush(chapters, (c['start_time'], i, c))
+            # (normal, sponsor) and (sponsor, sponsor)
+            else:
+                assert '_categories' in c, 'Normal chapters overlap'
+                cur_chapter['_was_cut'] = True
+                c['_was_cut'] = True
+                # Push the part after the sponsor to PQ.
+                if cur_chapter['end_time'] > c['end_time']:
+                    # deepcopy to make categories in after_c and cur_chapter/c refer to different lists.
+                    after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time'])
+                    heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
+                # Push the part after the overlap to PQ.
+                elif c['end_time'] > cur_chapter['end_time']:
+                    after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time'])
+                    heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur))
+                    c['end_time'] = cur_chapter['end_time']
+                # (sponsor, sponsor): merge categories in the overlap.
+                if '_categories' in cur_chapter:
+                    c['_categories'] = cur_chapter['_categories'] + c['_categories']
+                # Inherit the cuts that the current chapter has accumulated within it.
+                if 'cut_idx' in cur_chapter:
+                    c['cut_idx'] = cur_chapter['cut_idx']
+                cur_chapter['end_time'] = c['start_time']
+                append_chapter(cur_chapter)
+                cur_i, cur_chapter = i, c
+        (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
+        return self._remove_tiny_rename_sponsors(new_chapters), cuts
+
+    def _remove_tiny_rename_sponsors(self, chapters):
+        new_chapters = []
+        for i, c in enumerate(chapters):
+            # Merge with the previous/next if the chapter is tiny.
+            # Only tiny chapters resulting from a cut can be skipped.
+            # Chapters that were already tiny in the original list will be preserved.
+            if (('_was_cut' in c or '_categories' in c)
+                    and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION):
+                if not new_chapters:
+                    # Prepend tiny chapter to the next one if possible.
+                    if i < len(chapters) - 1:
+                        chapters[i + 1]['start_time'] = c['start_time']
+                        continue
+                else:
+                    old_c = new_chapters[-1]
+                    if i < len(chapters) - 1:
+                        next_c = chapters[i + 1]
+                        # Not a typo: key names in old_c and next_c are really different.
+                        prev_is_sponsor = 'categories' in old_c
+                        next_is_sponsor = '_categories' in next_c
+                        # Preferentially prepend tiny normals to normals and sponsors to sponsors.
+                        if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor)
+                                or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)):
+                            next_c['start_time'] = c['start_time']
+                            continue
+                    old_c['end_time'] = c['end_time']
+                    continue
+
+            c.pop('_was_cut', None)
+            cats = c.pop('_categories', None)
+            if cats:
+                category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1])
+                c.update({
+                    'category': category,
+                    'categories': orderedSet(x[0] for x in cats),
+                    'name': category_name,
+                    'category_names': orderedSet(x[3] for x in cats),
+                })
+                c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy())
+                # Merge identically named sponsors.
+                if (new_chapters and 'categories' in new_chapters[-1]
+                        and new_chapters[-1]['title'] == c['title']):
+                    new_chapters[-1]['end_time'] = c['end_time']
+                    continue
+            new_chapters.append(c)
+        return new_chapters
+
+    def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False):
+        in_file = filename
+        out_file = prepend_extension(in_file, 'temp')
+        if force_keyframes:
+            in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time'])))
+        self.to_screen(f'Removing chapters from {filename}')
+        self.concat_files([in_file] * len(concat_opts), out_file, concat_opts)
+        if in_file != filename:
+            self._delete_downloaded_files(in_file, msg=None)
+        return out_file
+
+
+    # override to change the args ordering
+    def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
+        self.check_version()
+
+        oldest_mtime = min(
+            os.stat(path).st_mtime for path, _ in input_path_opts if path)
+
+        cmd = [self.executable, encodeArgument('-y')]
+        # avconv does not have repeat option
+        if self.basename == 'ffmpeg':
+            cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
+
+        def make_args(file, args, name, number):
+            keys = [f'_{name}{number}', f'_{name}']
+            if name == 'o':
+                args += ['-movflags', '+faststart']
+                if number == 1:
+                    keys.append('')
+            args = self._configuration_args(self.basename, keys) + args
+            if name == 'i':
+                args.append('-i')
+            return (
+                [encodeArgument(arg) for arg in args]
+                + [self._ffmpeg_filename_argument(file)])
+
+        for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
+            cmd += itertools.chain.from_iterable(
+                make_args(path, list(opts), arg_type, i + 1)
+                for i, (path, opts) in enumerate(path_opts) if path)
+
+        self.write_debug(f'ffmpeg command line: {shell_quote(cmd)}')
+        _, stderr, returncode = Popen.run(
+            cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+        if returncode not in variadic(expected_retcodes):
+            self.write_debug(stderr)
+            raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
+        for out_path, _ in output_path_opts:
+            if out_path:
+                self.try_utime(out_path, oldest_mtime, oldest_mtime)
+        return stderr
+
+
+    @staticmethod
+    def _make_concat_opts(chapters_to_remove, duration):
+        opts = [{}]
+        for s in chapters_to_remove:
+            # Do not create 0 duration chunk at the beginning.
+            if s['start_time'] == 0:
+                opts[-1]['inpoint'] = f'{s["end_time"]:.6f}'
+                continue
+            opts[-1]['outpoint'] = f'{s["start_time"]:.6f}'
+            # Do not create 0 duration chunk at the end.
+            if s['end_time'] < duration:
+                opts.append({'inpoint': f'{s["end_time"]:.6f}'})
+        return opts
--- a/tubesync/sync/hooks.py
+++ b/tubesync/sync/hooks.py
@ -5,83 +5,179 @@ from common.logger import log
 from django.conf import settings


-class ProgressHookStatus:
+progress_hook = {
+    'status': dict(),
+}
+
+postprocessor_hook = {
+    'status': dict(),
+}
+
+
+class BaseStatus:
+    status_dict = dict()
+    valid = set()
+
+    @classmethod
+    def get(cls, key):
+        return cls.status_dict.get(key, None)
+
+    @classmethod
+    def valid_status(cls, status):
+        return status in cls.valid
+
+    def __init__(self, hook_status_dict=None):
+        self.media_key = None
+        self.task_status = '[Started: 0%]'
+        self.task_verbose_name = None
+        self._status_dict = hook_status_dict or self.status_dict
+        self._registered_keys = set()
+
+    def register(self, *args):
+        additions = dict()
+        for key in args:
+            if key is not None:
+                self._registered_keys.add(key)
+                additions[key] = self
+        self._status_dict.update(additions)
+
+    def cleanup(self):
+        for key in self._registered_keys:
+            if key in self._status_dict:
+                del self._status_dict[key]
+
+    def update_task(self):
+        if self.media_key is None:
+            return
+        from .models import Media
+        from .tasks import get_media_download_task
+
+        media = Media.objects.get(key=self.media_key)
+        task = get_media_download_task(str(media.pk))
+        if task:
+            if self.task_verbose_name is None:
+                # clean up any previously prepended task_status
+                # this happened because of duplicated tasks on my test system
+                s = task.verbose_name
+                cleaned = s[1+s.find(' Downloading '):]
+                self.task_verbose_name = cleaned
+            task.verbose_name = f'{self.task_status} {self.task_verbose_name}'
+            task.save()
+
+class ProgressHookStatus(BaseStatus):
+    status_dict = progress_hook['status']
    valid = frozenset((
        'downloading',
        'finished',
        'error',
    ))

-    def __init__(self):
+    def __init__(self, *args, status=None, info_dict={}, filename=None, **kwargs):
+        super().__init__(self.status_dict)
+        self.filename = filename
+        self.info = info_dict
+        self.status = status
        self.download_progress = 0

-class PPHookStatus:
+    def next_progress(self):
+        if 0 == self.download_progress:
+            return 0
+        return 1 + self.download_progress
+
+class PPHookStatus(BaseStatus):
+    status_dict = postprocessor_hook['status']
    valid = frozenset((
        'started',
        'processing',
        'finished',
    ))

-    def __init__(self, *args, status=None, postprocessor=None, info_dict={}, **kwargs):
+    def __init__(self, *args, status=None, postprocessor=None, info_dict={}, filename=None, **kwargs):
+        super().__init__(self.status_dict)
+        self.filename = filename
        self.info = info_dict
+        self.media_name = None
        self.name = postprocessor
        self.status = status

-
 def yt_dlp_progress_hook(event):
-    hook = progress_hook.get('status', None)
-    filename = os.path.basename(event['filename'])
-    if hook is None:
-        log.error('yt_dlp_progress_hook: failed to get hook status object')
+    if not ProgressHookStatus.valid_status(event['status']):
+        log.warn(f'[youtube-dl] unknown progress event: {str(event)}')
        return None

-    if event['status'] not in ProgressHookStatus.valid:
-        log.warn(f'[youtube-dl] unknown event: {str(event)}')
-        return None
+    key = None
+    if 'display_id' in event['info_dict']:
+        key = event['info_dict']['display_id']
+    elif 'id' in event['info_dict']:
+        key = event['info_dict']['id']

-    if event.get('downloaded_bytes') is None or event.get('total_bytes') is None:
-        return None
-
-    if event['status'] == 'error':
+    filename = os.path.basename(event.get('filename', '???'))
+    if 'error' == event['status']:
        log.error(f'[youtube-dl] error occured downloading: {filename}')
-    elif event['status'] == 'downloading':
-        downloaded_bytes = event.get('downloaded_bytes', 0)
-        total_bytes = event.get('total_bytes', 0)
+    elif 'downloading' == event['status']:
+        # get or create the status for filename
+        status = ProgressHookStatus.get(filename)
+        if status is None:
+            status = ProgressHookStatus(**event)
+            status.register(key, filename, status.filename)
+
+        downloaded_bytes = event.get('downloaded_bytes', 0) or 0
+        total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0
+        total_bytes = event.get('total_bytes', 0) or total_bytes_estimate
+        fragment_index = event.get('fragment_index', 0) or 0
+        fragment_count = event.get('fragment_count', 0) or 0
        eta = event.get('_eta_str', '?').strip()
-        percent_done = event.get('_percent_str', '?').strip()
+        percent_str = event.get('_percent_str', '?').strip()
        speed = event.get('_speed_str', '?').strip()
        total = event.get('_total_bytes_str', '?').strip()
-        if downloaded_bytes > 0 and total_bytes > 0:
-            p = round((event['downloaded_bytes'] / event['total_bytes']) * 100)
-            if (p % 5 == 0) and p > hook.download_progress:
-                hook.download_progress = p
-                log.info(f'[youtube-dl] downloading: {filename} - {percent_done} '
-                         f'of {total} at {speed}, {eta} remaining')
-        else:
-            # No progress to monitor, just spam every 10 download messages instead
-            hook.download_progress += 1
-            if hook.download_progress % 10 == 0:
-                log.info(f'[youtube-dl] downloading: {filename} - {percent_done} '
-                         f'of {total} at {speed}, {eta} remaining')
-    elif event['status'] == 'finished':
+        percent = None
+        try:
+            percent = int(float(percent_str.rstrip('%')))
+        except:
+            pass
+        if fragment_index >= 0 and fragment_count > 0:
+            percent = round(100 * fragment_index / fragment_count)
+            percent_str = f'{percent}%'
+        elif downloaded_bytes >= 0 and total_bytes > 0:
+            percent = round(100 * downloaded_bytes / total_bytes)
+        if percent and (status.next_progress() < percent) and (0 == percent % 5):
+            status.download_progress = percent
+            if key:
+                status.media_key = key
+            status.task_status = f'[downloading: {percent_str}]'
+            status.update_task()
+            log.info(f'[youtube-dl] downloading: {filename} - {percent_str} '
+                     f'of {total} at {speed}, {eta} remaining')
+    elif 'finished' == event['status']:
+        # update the status for filename to the finished value
+        status = ProgressHookStatus.get(filename)
+        if status is None:
+            status = ProgressHookStatus(**event)
+            status.register(key, filename, status.filename)
+        status.download_progress = 100
+
        total_size_str = event.get('_total_bytes_str', '?').strip()
        elapsed_str = event.get('_elapsed_str', '?').strip()
        log.info(f'[youtube-dl] finished downloading: {filename} - '
                 f'{total_size_str} in {elapsed_str}')

+        status.cleanup()
+
 def yt_dlp_postprocessor_hook(event):
-    if event['status'] not in PPHookStatus.valid:
-        log.warn(f'[youtube-dl] unknown event: {str(event)}')
+    if not PPHookStatus.valid_status(event['status']):
+        log.warn(f'[youtube-dl] unknown postprocessor event: {str(event)}')
        return None

-    postprocessor_hook['status'] = PPHookStatus(*event)
-
    name = key = 'Unknown'
+    filename = os.path.basename(event.get('filename', '???'))
    if 'display_id' in event['info_dict']:
        key = event['info_dict']['display_id']
    elif 'id' in event['info_dict']:
        key = event['info_dict']['id']

+    status = PPHookStatus(**event)
+    status.register(key, filename, status.filename)
+
    title = None
    if 'fulltitle' in event['info_dict']:
        title = event['info_dict']['fulltitle']
@ -91,6 +187,8 @@ def yt_dlp_postprocessor_hook(event):
    if title:
        name = f'{key}: {title}'

+    status.media_name = name
+
    if 'started' == event['status']:
        if 'formats' in event['info_dict']:
            del event['info_dict']['formats']
@ -98,16 +196,23 @@ def yt_dlp_postprocessor_hook(event):
            del event['info_dict']['automatic_captions']
        log.debug(repr(event['info_dict']))

+    if 'Unknown' != key:
+        status.media_key = key
+    status.task_status = f'[{event["postprocessor"]}: {event["status"]}]'
+    status.update_task()
+
    log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}')
+    if 'finished' == event['status']:
+        status.cleanup()


-progress_hook = {
-    'status': ProgressHookStatus(),
+progress_hook.update({
+    'class': ProgressHookStatus(),
    'function': yt_dlp_progress_hook,
-}
+})

-postprocessor_hook = {
-    'status': PPHookStatus(),
+postprocessor_hook.update({
+    'class': PPHookStatus(),
    'function': yt_dlp_postprocessor_hook,
-}
+})

--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@ -664,6 +664,11 @@ class Media(models.Model):
            Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date',
            Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date',
        },
+        'timestamp': {
+            Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp',
+            Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp',
+            Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp',
+        },
        'title': {
            Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title',
            Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title',
@ -930,7 +935,7 @@ class Media(models.Model):
    def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
        # Trigger an update of derived fields from metadata
        if self.metadata:
-            self.title = self.metadata_title
+            self.title = self.metadata_title[:200]
            self.duration = self.metadata_duration
        if update_fields is not None and "metadata" in update_fields:
            # If only some fields are being updated, make sure we update title and duration if metadata changes
@ -944,7 +949,7 @@ class Media(models.Model):

    def get_metadata_field(self, field):
        fields = self.METADATA_FIELDS.get(field, {})
-        return fields.get(self.source.source_type, '')
+        return fields.get(self.source.source_type, field)

    def iter_formats(self):
        for fmt in self.formats:
@ -1561,6 +1566,8 @@ class Media(models.Model):
        if self.downloaded and self.media_file:
            old_video_path = Path(self.media_file.path)
            new_video_path = Path(get_media_file_path(self, None))
+            if old_video_path == new_video_path:
+                return
            if old_video_path.exists() and not new_video_path.exists():
                old_video_path = old_video_path.resolve(strict=True)

--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@ -14,7 +14,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
                    map_task_to_instance, check_source_directory_exists,
                    download_media, rescan_media_server, download_source_images,
                    save_all_media_for_source, rename_all_media_for_source,
-                    get_media_metadata_task)
+                    get_media_metadata_task, get_media_download_task)
 from .utils import delete_file, glob_quote
 from .filtering import filter_media

@ -86,7 +86,7 @@ def source_post_save(sender, instance, created, **kwargs):
            queue=str(instance.pk),
            priority=1,
            verbose_name=verbose_name.format(instance.name),
-            remove_existing_tasks=False
+            remove_existing_tasks=True
        )
    verbose_name = _('Checking all media for source "{}"')
    save_all_media_for_source(
@ -156,8 +156,9 @@ def media_post_save(sender, instance, created, **kwargs):
        post_save.disconnect(media_post_save, sender=Media)
        instance.save()
        post_save.connect(media_post_save, sender=Media)
+    existing_media_metadata_task = get_media_metadata_task(str(instance.pk))
    # If the media is missing metadata schedule it to be downloaded
-    if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk):
+    if not (instance.skip or instance.metadata or existing_media_metadata_task):
        log.info(f'Scheduling task to download metadata for: {instance.url}')
        verbose_name = _('Downloading metadata for "{}"')
        download_media_metadata(
@ -183,13 +184,13 @@ def media_post_save(sender, instance, created, **kwargs):
                verbose_name=verbose_name.format(instance.name),
                remove_existing_tasks=True
            )
+    existing_media_download_task = get_media_download_task(str(instance.pk))
    # If the media has not yet been downloaded schedule it to be downloaded
-    if not instance.media_file_exists:
+    if not (instance.media_file_exists or existing_media_download_task):
        instance.downloaded = False
        instance.media_file = None
-    if (not instance.downloaded and instance.can_download and not instance.skip
-        and instance.source.download_media):
-        delete_task_by_media('sync.tasks.download_media', (str(instance.pk),))
+    if (instance.source.download_media and instance.can_download) and not (
+        instance.skip or instance.downloaded or existing_media_download_task):
        verbose_name = _('Downloading media for "{}"')
        download_media(
            str(instance.pk),
@ -225,6 +226,11 @@ def media_post_delete(sender, instance, **kwargs):
            other_path = video_path.with_suffix(f'.{suffix}').resolve()
            log.info(f'Deleting file for: {instance} path: {other_path!s}')
            delete_file(other_path)
+        # subtitles include language code
+        subtitle_files = video_path.parent.glob(f'{glob_quote(video_path.with_suffix("").name)}*.vtt')
+        for file in subtitle_files:
+            log.info(f'Deleting file for: {instance} path: {file}')
+            delete_file(file)
        # Jellyfin creates .trickplay directories and posters
        for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)):
            # with_suffix insists on suffix beginning with '.' for no good reason
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@ -10,7 +10,7 @@ import math
 import uuid
 from io import BytesIO
 from hashlib import sha1
-from datetime import timedelta, datetime
+from datetime import datetime, timedelta, timezone as tz
 from shutil import copyfile
 from PIL import Image
 from django.conf import settings
@ -27,7 +27,6 @@ from common.utils import json_serial
 from .models import Source, Media, MediaServer
 from .utils import (get_remote_image, resize_image_to_height, delete_file,
                    write_text_file, filter_response)
-from .filtering import filter_media
 from .youtube import YouTubeError


@ -202,6 +201,7 @@ def index_source_task(source_id):
    source.last_crawl = timezone.now()
    source.save()
    log.info(f'Found {len(videos)} media items for source: {source}')
+    fields = lambda f, m: m.get_metadata_field(f)
    for video in videos:
        # Create or update each video as a Media object
        key = video.get(source.key_field, None)
@ -213,6 +213,18 @@ def index_source_task(source_id):
        except Media.DoesNotExist:
            media = Media(key=key)
        media.source = source
+        media.duration = float(video.get(fields('duration', media), 0)) or None
+        media.title = str(video.get(fields('title', media), ''))[:200]
+        timestamp = video.get(fields('timestamp', media), None)
+        if timestamp is not None:
+            try:
+                timestamp_float = float(timestamp)
+                posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
+                published_dt = posix_epoch + timedelta(seconds=timestamp_float)
+            except Exception as e:
+                log.warn(f'Could not set published for: {source} / {media} with "{e}"')
+            else:
+                media.published = published_dt
        try:
            media.save()
            log.debug(f'Indexed media: {source} / {media}')
--- a/tubesync/sync/utils.py
+++ b/tubesync/sync/utils.py
@ -203,22 +203,39 @@ def normalize_codec(codec_str):
    return result


+def list_of_dictionaries(arg_list, arg_function=lambda x: x):
+    assert callable(arg_function)
+    if isinstance(arg_list, list):
+        def _call_func_with_dict(arg_dict):
+            if isinstance(arg_dict, dict):
+                return arg_function(arg_dict)
+            return arg_dict
+        return (True, list(map(_call_func_with_dict, arg_list)),)
+    return (False, arg_list,)
+
+
 def _url_keys(arg_dict, filter_func):
    result = {}
-    for key in arg_dict.keys():
-        if 'url' in key:
-            result.update(
-                {key: filter_func(key=key, url=arg_dict[key])}
-            )
+    if isinstance(arg_dict, dict):
+        for key, value in arg_dict.items():
+            if 'url' in key:
+                result.update(
+                    {key: filter_func(key=key, url=value)}
+                )
    return result


+# expects a dictionary where the value at key is a:
+# list of dictionaries 
 def _drop_url_keys(arg_dict, key, filter_func):
+    def _del_url_keys(_arg_dict):
+        for url_key, remove in _url_keys(_arg_dict, filter_func).items():
+            if remove is True:
+                del _arg_dict[url_key]
+
+    assert isinstance(arg_dict, dict)
    if key in arg_dict.keys():
-        for val_dict in arg_dict[key]:
-            for url_key, remove in _url_keys(val_dict, filter_func).items():
-                if remove is True:
-                    del val_dict[url_key]
+        list_of_dictionaries(arg_dict[key], _del_url_keys)


 def filter_response(arg_dict, copy_arg=False):
@ -260,13 +277,15 @@ def filter_response(arg_dict, copy_arg=False):
        '__needs_testing',
        '__working',
    ))
-    for key in frozenset(('formats', 'requested_formats',)):
-        _drop_url_keys(response_dict, key, drop_format_url)
+    def del_drop_keys(arg_dict):
+        for drop_key in drop_keys:
+            if drop_key in arg_dict.keys():
+                del arg_dict[drop_key]
+
+    for key in ('formats', 'requested_formats',):
        if key in response_dict.keys():
-            for format in response_dict[key]:
-                for drop_key in drop_keys:
-                    if drop_key in format.keys():
-                        del format[drop_key]
+            _drop_url_keys(response_dict, key, drop_format_url)
+            list_of_dictionaries(response_dict[key], del_drop_keys)
    # end of formats cleanup }}}

    # beginning of subtitles cleanup {{{
@ -282,12 +301,19 @@ def filter_response(arg_dict, copy_arg=False):
            )
        )

-    for key in frozenset(('subtitles', 'automatic_captions',)):
+    for key in ('subtitles', 'requested_subtitles', 'automatic_captions',):
        if key in response_dict.keys():
-            key_dict = response_dict[key]
-            for lang_code in key_dict:
-                _drop_url_keys(key_dict, lang_code, drop_subtitles_url)
+            lang_codes = response_dict[key]
+            if isinstance(lang_codes, dict):
+                for lang_code in lang_codes.keys():
+                    _drop_url_keys(lang_codes, lang_code, drop_subtitles_url)
    # end of subtitles cleanup }}}
+ 
+    # beginning of heatmap cleanup {{{
+    for key in ('heatmap',):
+        if key in response_dict.keys():
+            del response_dict[key]
+    # end of heatmap cleanup }}}

    return response_dict

--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@ -143,6 +143,7 @@ def get_media_info(url):
        'simulate': True,
        'logger': log,
        'extract_flat': True,
+        'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
    })
    response = {}
    with yt_dlp.YoutubeDL(opts) as y:
@ -224,6 +225,10 @@ def download_media(
        'sponskrub': False,
    })

+    pp_opts.exec_cmd.update(
+        opts.get('exec_cmd', default_opts.exec_cmd)
+    )
+
    if skip_sponsors:
        # Let yt_dlp convert from human for us.
        pp_opts.sponsorblock_mark = yt_dlp.parse_options(
@ -242,7 +247,7 @@ def download_media(
        'writesubtitles': write_subtitles,
        'writeautomaticsub': auto_subtitles,
        'subtitleslangs': sub_langs.split(','),
-        'writethumbnail': True,
+        'writethumbnail': embed_thumbnail,
        'check_formats': False,
        'overwrites': None,
        'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),
@ -279,9 +284,11 @@ def download_media(
    codec_options = list()
    ofn = ytopts['outtmpl']
    if 'av1-' in ofn:
-        codec_options = ['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35']
+        codec_options.extend(['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35'])
    elif 'vp9-' in ofn:
-        codec_options = ['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31']
+        codec_options.extend(['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31', '-row-mt', '1', '-tile-columns', '2'])
+    if '-opus' in ofn:
+        codec_options.extend(['-c:a', 'libopus'])
    set_ffmpeg_codec = not (
        ytopts['postprocessor_args'] and
        ytopts['postprocessor_args']['modifychapters+ffmpeg']