From 5a477eb97e954c9dbe7974719f5756c15b981570 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 5 Feb 2025 03:56:48 -0500 Subject: [PATCH 01/59] Stop when the paths match --- tubesync/sync/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2daeb094..69b602d8 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1561,6 +1561,8 @@ class Media(models.Model): if self.downloaded and self.media_file: old_video_path = Path(self.media_file.path) new_video_path = Path(get_media_file_path(self, None)) + if old_video_path == new_video_path: + return if old_video_path.exists() and not new_video_path.exists(): old_video_path = old_video_path.resolve(strict=True) From dfc63a2b94ff8dafbbecccb9d12d7353d571ac16 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Feb 2025 18:06:15 -0500 Subject: [PATCH 02/59] Reworked download progress hook Be more resilient about missing keys. Log the downloading state the first time through. --- tubesync/sync/hooks.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index cd0f208f..f14226cf 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -30,7 +30,6 @@ class PPHookStatus: def yt_dlp_progress_hook(event): hook = progress_hook.get('status', None) - filename = os.path.basename(event['filename']) if hook is None: log.error('yt_dlp_progress_hook: failed to get hook status object') return None @@ -39,31 +38,29 @@ def yt_dlp_progress_hook(event): log.warn(f'[youtube-dl] unknown event: {str(event)}') return None - if event.get('downloaded_bytes') is None or event.get('total_bytes') is None: - return None - - if event['status'] == 'error': + filename = os.path.basename(event.get('filename', '???')) + if 'error' == event['status']: log.error(f'[youtube-dl] error occured downloading: {filename}') - elif event['status'] == 'downloading': - downloaded_bytes = event.get('downloaded_bytes', 0) - total_bytes = event.get('total_bytes', 0) + elif 'downloading' == event['status']: + downloaded_bytes = event.get('downloaded_bytes', 0) or 0 + total_bytes = event.get('total_bytes', 0) or 0 eta = event.get('_eta_str', '?').strip() percent_done = event.get('_percent_str', '?').strip() speed = event.get('_speed_str', '?').strip() total = event.get('_total_bytes_str', '?').strip() if downloaded_bytes > 0 and total_bytes > 0: - p = round((event['downloaded_bytes'] / event['total_bytes']) * 100) - if (p % 5 == 0) and p > hook.download_progress: + p = round(100 * downloaded_bytes / total_bytes) + if (0 == p % 5) and p > hook.download_progress: hook.download_progress = p log.info(f'[youtube-dl] downloading: {filename} - {percent_done} ' f'of {total} at {speed}, {eta} remaining') else: # No progress to monitor, just spam every 10 download messages instead - hook.download_progress += 1 - if hook.download_progress % 10 == 0: + if 0 == hook.download_progress % 10: log.info(f'[youtube-dl] downloading: {filename} - {percent_done} ' f'of {total} at {speed}, {eta} remaining') - elif event['status'] == 'finished': + hook.download_progress += 1 + elif 'finished' == event['status']: total_size_str = event.get('_total_bytes_str', '?').strip() elapsed_str = event.get('_elapsed_str', '?').strip() log.info(f'[youtube-dl] finished downloading: {filename} - ' From 6d71c698783283ddc6f6af09bbadb3e7bb1b2c43 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Feb 2025 20:02:14 -0500 Subject: [PATCH 03/59] Reset `hook.download_progress` so that it isn't above 100% --- tubesync/sync/hooks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index f14226cf..3ea32f9b 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -43,7 +43,8 @@ def yt_dlp_progress_hook(event): log.error(f'[youtube-dl] error occured downloading: {filename}') elif 'downloading' == event['status']: downloaded_bytes = event.get('downloaded_bytes', 0) or 0 - total_bytes = event.get('total_bytes', 0) or 0 + total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 + total_bytes = event.get('total_bytes', 0) or total_bytes_estimate eta = event.get('_eta_str', '?').strip() percent_done = event.get('_percent_str', '?').strip() speed = event.get('_speed_str', '?').strip() @@ -57,6 +58,7 @@ def yt_dlp_progress_hook(event): else: # No progress to monitor, just spam every 10 download messages instead if 0 == hook.download_progress % 10: + hook.download_progress = 0 log.info(f'[youtube-dl] downloading: {filename} - {percent_done} ' f'of {total} at {speed}, {eta} remaining') hook.download_progress += 1 From 36a9902d26fc07eae565a14d59bb0210e8363cab Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Feb 2025 20:31:32 -0500 Subject: [PATCH 04/59] Reset `hook.download_progress` before downloading --- tubesync/sync/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 3ea3c333..b9763856 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -274,6 +274,9 @@ def download_media( progress_hook_func = progress_hook.get('function', None) if progress_hook_func: + hook = progress_hook.get('status', None) + if hook: + hook.download_progress = 0 ytopts['progress_hooks'].append(progress_hook_func) codec_options = list() From c24178f7d67cb0f7d1c1b6bb92e53c934d5358c8 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Feb 2025 22:24:43 -0500 Subject: [PATCH 05/59] Support multiple independent statuses --- tubesync/sync/hooks.py | 106 ++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 44 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 3ea32f9b..3244a8f5 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -29,58 +29,72 @@ class PPHookStatus: def yt_dlp_progress_hook(event): - hook = progress_hook.get('status', None) - if hook is None: - log.error('yt_dlp_progress_hook: failed to get hook status object') - return None - if event['status'] not in ProgressHookStatus.valid: log.warn(f'[youtube-dl] unknown event: {str(event)}') return None - filename = os.path.basename(event.get('filename', '???')) - if 'error' == event['status']: - log.error(f'[youtube-dl] error occured downloading: {filename}') - elif 'downloading' == event['status']: - downloaded_bytes = event.get('downloaded_bytes', 0) or 0 - total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 - total_bytes = event.get('total_bytes', 0) or total_bytes_estimate - eta = event.get('_eta_str', '?').strip() - percent_done = event.get('_percent_str', '?').strip() - speed = event.get('_speed_str', '?').strip() - total = event.get('_total_bytes_str', '?').strip() - if downloaded_bytes > 0 and total_bytes > 0: - p = round(100 * downloaded_bytes / total_bytes) - if (0 == p % 5) and p > hook.download_progress: - hook.download_progress = p - log.info(f'[youtube-dl] downloading: {filename} - {percent_done} ' - f'of {total} at {speed}, {eta} remaining') - else: - # No progress to monitor, just spam every 10 download messages instead - if 0 == hook.download_progress % 10: - hook.download_progress = 0 - log.info(f'[youtube-dl] downloading: {filename} - {percent_done} ' - f'of {total} at {speed}, {eta} remaining') - hook.download_progress += 1 - elif 'finished' == event['status']: - total_size_str = event.get('_total_bytes_str', '?').strip() - elapsed_str = event.get('_elapsed_str', '?').strip() - log.info(f'[youtube-dl] finished downloading: {filename} - ' - f'{total_size_str} in {elapsed_str}') - -def yt_dlp_postprocessor_hook(event): - if event['status'] not in PPHookStatus.valid: - log.warn(f'[youtube-dl] unknown event: {str(event)}') - return None - - postprocessor_hook['status'] = PPHookStatus(*event) - name = key = 'Unknown' if 'display_id' in event['info_dict']: key = event['info_dict']['display_id'] elif 'id' in event['info_dict']: key = event['info_dict']['id'] + filename = os.path.basename(event.get('filename', '???')) + if 'error' == event['status']: + log.error(f'[youtube-dl] error occured downloading: {filename}') + elif 'downloading' == event['status']: + # get or create the status for key + status = progress_hook['status'].get(key, None) + if status is None: + progress_hook['status'].update({key: ProgressHookStatus()}) + + downloaded_bytes = event.get('downloaded_bytes', 0) or 0 + total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 + total_bytes = event.get('total_bytes', 0) or total_bytes_estimate + eta = event.get('_eta_str', '?').strip() + percent_str = event.get('_percent_str', '?').strip() + speed = event.get('_speed_str', '?').strip() + total = event.get('_total_bytes_str', '?').strip() + percent = None + try: + percent = int(float(percent_str.rstrip('%'))) + except: + pass + if downloaded_bytes > 0 and total_bytes > 0: + percent = round(100 * downloaded_bytes / total_bytes) + if percent and (0 < percent) and (0 == percent % 5): + log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' + f'of {total} at {speed}, {eta} remaining') + status.download_progress = percent or 0 + elif 'finished' == event['status']: + # update the status for key to the finished value + status = progress_hook['status'].get(key, None) + if status is None: + progress_hook['status'].update({key: ProgressHookStatus()}) + status.download_progress = 100 + + total_size_str = event.get('_total_bytes_str', '?').strip() + elapsed_str = event.get('_elapsed_str', '?').strip() + log.info(f'[youtube-dl] finished downloading: {filename} - ' + f'{total_size_str} in {elapsed_str}') + + # clean up the status for key + if key in progress_hook['status']: + del progress_hook['status'][key] + +def yt_dlp_postprocessor_hook(event): + if event['status'] not in PPHookStatus.valid: + log.warn(f'[youtube-dl] unknown event: {str(event)}') + return None + + name = key = 'Unknown' + if 'display_id' in event['info_dict']: + key = event['info_dict']['display_id'] + elif 'id' in event['info_dict']: + key = event['info_dict']['id'] + + postprocessor_hook['status'].update({key: PPHookStatus(*event)}) + title = None if 'fulltitle' in event['info_dict']: title = event['info_dict']['fulltitle'] @@ -98,15 +112,19 @@ def yt_dlp_postprocessor_hook(event): log.debug(repr(event['info_dict'])) log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}') + if 'finished' == event['status'] and key in postprocessor_hook['status']: + del postprocessor_hook['status'][key] progress_hook = { - 'status': ProgressHookStatus(), + 'class': ProgressHookStatus(), 'function': yt_dlp_progress_hook, + 'status': dict(), } postprocessor_hook = { - 'status': PPHookStatus(), + 'class': PPHookStatus(), 'function': yt_dlp_postprocessor_hook, + 'status': dict(), } From 3ec1d2dc29dbc901c3c4c398c27ec8240195e4a4 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Feb 2025 22:26:23 -0500 Subject: [PATCH 06/59] Initialization is done by the class now --- tubesync/sync/youtube.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index b9763856..3ea3c333 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -274,9 +274,6 @@ def download_media( progress_hook_func = progress_hook.get('function', None) if progress_hook_func: - hook = progress_hook.get('status', None) - if hook: - hook.download_progress = 0 ytopts['progress_hooks'].append(progress_hook_func) codec_options = list() From 194e84b074eef94a1640986f4fcbb47822019da9 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Feb 2025 22:36:56 -0500 Subject: [PATCH 07/59] Differentiate the unknown event logs --- tubesync/sync/hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 3244a8f5..f39f7e76 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -30,7 +30,7 @@ class PPHookStatus: def yt_dlp_progress_hook(event): if event['status'] not in ProgressHookStatus.valid: - log.warn(f'[youtube-dl] unknown event: {str(event)}') + log.warn(f'[youtube-dl] unknown progress event: {str(event)}') return None name = key = 'Unknown' @@ -84,7 +84,7 @@ def yt_dlp_progress_hook(event): def yt_dlp_postprocessor_hook(event): if event['status'] not in PPHookStatus.valid: - log.warn(f'[youtube-dl] unknown event: {str(event)}') + log.warn(f'[youtube-dl] unknown postprocessor event: {str(event)}') return None name = key = 'Unknown' From 3b14e03842d1ae46b24850472621379fd5aeedc7 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 01:50:23 -0500 Subject: [PATCH 08/59] Add & use functions from status classes --- tubesync/sync/hooks.py | 78 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index f39f7e76..491d0bbb 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -12,8 +12,31 @@ class ProgressHookStatus: 'error', )) - def __init__(self): + def valid_status(status): + return status in self.valid + + def get(key): + return progress_hook['status'].get(key, None) + + def __init__(self, *args, status=None, info_dict={}, filename=None, **kwargs): + self.filename = filename + self.info = info_dict + self.status = status self.download_progress = 0 + self._registered_keys = set() + + def register(self, *args): + additions = dict() + for key in args: + if key is not None: + self._registered_keys.add(key) + additions[key] = self + progress_hook['status'].update(additions) + + def cleanup(self): + for key in self._registered_keys: + if key in progress_hook['status']: + del progress_hook['status'][key] class PPHookStatus: valid = frozenset(( @@ -22,18 +45,39 @@ class PPHookStatus: 'finished', )) - def __init__(self, *args, status=None, postprocessor=None, info_dict={}, **kwargs): + def valid_status(status): + return status in self.valid + + def get(key): + return postprocessor_hook['status'].get(key, None) + + def __init__(self, *args, status=None, postprocessor=None, info_dict={}, filename=None, **kwargs): + self.filename = filename self.info = info_dict + self.media_name = None self.name = postprocessor self.status = status + self._registered_keys = set() + def register(self, *args): + additions = dict() + for key in args: + if key is not None: + self._registered_keys.add(key) + additions[key] = self + postprocessor_hook['status'].update(additions) + + def cleanup(self): + for key in self._registered_keys: + if key in postprocessor_hook['status']: + del postprocessor_hook['status'][key] def yt_dlp_progress_hook(event): - if event['status'] not in ProgressHookStatus.valid: + if not ProgressHookStatus.valid_status(event['status']): log.warn(f'[youtube-dl] unknown progress event: {str(event)}') return None - name = key = 'Unknown' + key = 'Unknown' if 'display_id' in event['info_dict']: key = event['info_dict']['display_id'] elif 'id' in event['info_dict']: @@ -44,9 +88,10 @@ def yt_dlp_progress_hook(event): log.error(f'[youtube-dl] error occured downloading: {filename}') elif 'downloading' == event['status']: # get or create the status for key - status = progress_hook['status'].get(key, None) + status = ProgressHookStatus.get(key) if status is None: - progress_hook['status'].update({key: ProgressHookStatus()}) + status = ProgressHookStatus(**event) + status.register(key, filename, status.filename) downloaded_bytes = event.get('downloaded_bytes', 0) or 0 total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 @@ -68,9 +113,10 @@ def yt_dlp_progress_hook(event): status.download_progress = percent or 0 elif 'finished' == event['status']: # update the status for key to the finished value - status = progress_hook['status'].get(key, None) + status = ProgressHookStatus.get(key) if status is None: - progress_hook['status'].update({key: ProgressHookStatus()}) + status = ProgressHookStatus(**event) + status.register(key, filename, status.filename) status.download_progress = 100 total_size_str = event.get('_total_bytes_str', '?').strip() @@ -78,22 +124,22 @@ def yt_dlp_progress_hook(event): log.info(f'[youtube-dl] finished downloading: {filename} - ' f'{total_size_str} in {elapsed_str}') - # clean up the status for key - if key in progress_hook['status']: - del progress_hook['status'][key] + status.cleanup() def yt_dlp_postprocessor_hook(event): - if event['status'] not in PPHookStatus.valid: + if not PPHookStatus.valid_status(event['status']): log.warn(f'[youtube-dl] unknown postprocessor event: {str(event)}') return None name = key = 'Unknown' + filename = os.path.basename(event.get('filename', '???')) if 'display_id' in event['info_dict']: key = event['info_dict']['display_id'] elif 'id' in event['info_dict']: key = event['info_dict']['id'] - postprocessor_hook['status'].update({key: PPHookStatus(*event)}) + status = PPHookStatus(**event) + status.register(key, filename, status.filename) title = None if 'fulltitle' in event['info_dict']: @@ -104,6 +150,8 @@ def yt_dlp_postprocessor_hook(event): if title: name = f'{key}: {title}' + status.media_name = name + if 'started' == event['status']: if 'formats' in event['info_dict']: del event['info_dict']['formats'] @@ -112,8 +160,8 @@ def yt_dlp_postprocessor_hook(event): log.debug(repr(event['info_dict'])) log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}') - if 'finished' == event['status'] and key in postprocessor_hook['status']: - del postprocessor_hook['status'][key] + if 'finished' == event['status']: + status.cleanup() progress_hook = { From 08842752d487bd3f4cb5af8580d60fb36ad56095 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 03:06:21 -0500 Subject: [PATCH 09/59] We can't use `self` without an instance --- tubesync/sync/hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 491d0bbb..2f248a48 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -13,7 +13,7 @@ class ProgressHookStatus: )) def valid_status(status): - return status in self.valid + return status in ProgressHookStatus.valid def get(key): return progress_hook['status'].get(key, None) @@ -46,7 +46,7 @@ class PPHookStatus: )) def valid_status(status): - return status in self.valid + return status in PPHookStatus.valid def get(key): return postprocessor_hook['status'].get(key, None) From 75df4a57e38171c77bae878d27456846fae0f3a4 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 03:44:12 -0500 Subject: [PATCH 10/59] Move common methods to a `BaseStatus` class --- tubesync/sync/hooks.py | 66 +++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 2f248a48..c669a3b9 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -5,72 +5,66 @@ from common.logger import log from django.conf import settings -class ProgressHookStatus: +class BaseStatus: + valid = set() + + @classmethod + def valid_status(cls, status): + return status in cls.valid + + def __init__(self, hook_status_dict=None): + self._status_dict = hook_status_dict or dict() + self._registered_keys = set() + + def register(self, *args): + additions = dict() + for key in args: + if key is not None: + self._registered_keys.add(key) + additions[key] = self + self._status_dict.update(additions) + + def cleanup(self): + for key in self._registered_keys: + if key in self._status_dict: + del self._status_dict[key] + +class ProgressHookStatus(BaseStatus): valid = frozenset(( 'downloading', 'finished', 'error', )) - def valid_status(status): - return status in ProgressHookStatus.valid - + @staticmethod def get(key): return progress_hook['status'].get(key, None) def __init__(self, *args, status=None, info_dict={}, filename=None, **kwargs): + super().__init__(progress_hook['status']) self.filename = filename self.info = info_dict self.status = status self.download_progress = 0 - self._registered_keys = set() - def register(self, *args): - additions = dict() - for key in args: - if key is not None: - self._registered_keys.add(key) - additions[key] = self - progress_hook['status'].update(additions) - - def cleanup(self): - for key in self._registered_keys: - if key in progress_hook['status']: - del progress_hook['status'][key] - -class PPHookStatus: +class PPHookStatus(BaseStatus): valid = frozenset(( 'started', 'processing', 'finished', )) - def valid_status(status): - return status in PPHookStatus.valid - + @staticmethod def get(key): return postprocessor_hook['status'].get(key, None) def __init__(self, *args, status=None, postprocessor=None, info_dict={}, filename=None, **kwargs): + super().__init__(postprocessor_hook['status']) self.filename = filename self.info = info_dict self.media_name = None self.name = postprocessor self.status = status - self._registered_keys = set() - - def register(self, *args): - additions = dict() - for key in args: - if key is not None: - self._registered_keys.add(key) - additions[key] = self - postprocessor_hook['status'].update(additions) - - def cleanup(self): - for key in self._registered_keys: - if key in postprocessor_hook['status']: - del postprocessor_hook['status'][key] def yt_dlp_progress_hook(event): if not ProgressHookStatus.valid_status(event['status']): From 3c4849551bc9e32a9ef502cb3090dab199c1ac15 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 03:50:48 -0500 Subject: [PATCH 11/59] Define then update `*_hook` dictionaries --- tubesync/sync/hooks.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index c669a3b9..27133b49 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -5,6 +5,14 @@ from common.logger import log from django.conf import settings +progress_hook = { + 'status': dict(), +} + +postprocessor_hook = { + 'status': dict(), +} + class BaseStatus: valid = set() @@ -158,15 +166,13 @@ def yt_dlp_postprocessor_hook(event): status.cleanup() -progress_hook = { +progress_hook.update({ 'class': ProgressHookStatus(), 'function': yt_dlp_progress_hook, - 'status': dict(), -} +}) -postprocessor_hook = { +postprocessor_hook.update({ 'class': PPHookStatus(), 'function': yt_dlp_postprocessor_hook, - 'status': dict(), -} +}) From 09812019b561f2fc60504821fb9a7ff7b9dcc2f2 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 04:28:52 -0500 Subject: [PATCH 12/59] Use `next_progress` to avoid logging in spurts --- tubesync/sync/hooks.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 27133b49..e3f55557 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -13,6 +13,7 @@ postprocessor_hook = { 'status': dict(), } + class BaseStatus: valid = set() @@ -55,6 +56,11 @@ class ProgressHookStatus(BaseStatus): self.status = status self.download_progress = 0 + def next_progress(self): + if 0 == self.download_progress: + return 0 + return 5 + self.download_progress + class PPHookStatus(BaseStatus): valid = frozenset(( 'started', @@ -109,10 +115,10 @@ def yt_dlp_progress_hook(event): pass if downloaded_bytes > 0 and total_bytes > 0: percent = round(100 * downloaded_bytes / total_bytes) - if percent and (0 < percent) and (0 == percent % 5): + if percent and (self.next_progress() < percent) and (0 == percent % 5): + status.download_progress = percent log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' f'of {total} at {speed}, {eta} remaining') - status.download_progress = percent or 0 elif 'finished' == event['status']: # update the status for key to the finished value status = ProgressHookStatus.get(key) From afa5ec8684cb3919261faf7262d201a0c4f53cd7 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 04:45:11 -0500 Subject: [PATCH 13/59] fixup: it is `status` not `self` needed here --- tubesync/sync/hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index e3f55557..c6fddb70 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -115,7 +115,7 @@ def yt_dlp_progress_hook(event): pass if downloaded_bytes > 0 and total_bytes > 0: percent = round(100 * downloaded_bytes / total_bytes) - if percent and (self.next_progress() < percent) and (0 == percent % 5): + if percent and (status.next_progress() < percent) and (0 == percent % 5): status.download_progress = percent log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' f'of {total} at {speed}, {eta} remaining') From 6177ff4c597407b975cbe35bea9cc0d32ac27ec0 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 05:42:34 -0500 Subject: [PATCH 14/59] Move `get` into `BaseStatus` --- tubesync/sync/hooks.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index c6fddb70..a5c7f9ad 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -15,14 +15,19 @@ postprocessor_hook = { class BaseStatus: + status_dict = dict() valid = set() + @classmethod + def get(cls, key): + return cls.status_dict.get(key, None) + @classmethod def valid_status(cls, status): return status in cls.valid def __init__(self, hook_status_dict=None): - self._status_dict = hook_status_dict or dict() + self._status_dict = hook_status_dict or self.status_dict self._registered_keys = set() def register(self, *args): @@ -39,18 +44,15 @@ class BaseStatus: del self._status_dict[key] class ProgressHookStatus(BaseStatus): + status_dict = progress_hook['status'] valid = frozenset(( 'downloading', 'finished', 'error', )) - @staticmethod - def get(key): - return progress_hook['status'].get(key, None) - def __init__(self, *args, status=None, info_dict={}, filename=None, **kwargs): - super().__init__(progress_hook['status']) + super().__init__(self.status_dict) self.filename = filename self.info = info_dict self.status = status @@ -62,18 +64,15 @@ class ProgressHookStatus(BaseStatus): return 5 + self.download_progress class PPHookStatus(BaseStatus): + status_dict = postprocessor_hook['status'] valid = frozenset(( 'started', 'processing', 'finished', )) - @staticmethod - def get(key): - return postprocessor_hook['status'].get(key, None) - def __init__(self, *args, status=None, postprocessor=None, info_dict={}, filename=None, **kwargs): - super().__init__(postprocessor_hook['status']) + super().__init__(self.status_dict) self.filename = filename self.info = info_dict self.media_name = None @@ -100,6 +99,7 @@ def yt_dlp_progress_hook(event): if status is None: status = ProgressHookStatus(**event) status.register(key, filename, status.filename) + log.info(ProgressHookStatus.status_dict) downloaded_bytes = event.get('downloaded_bytes', 0) or 0 total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 @@ -125,6 +125,7 @@ def yt_dlp_progress_hook(event): if status is None: status = ProgressHookStatus(**event) status.register(key, filename, status.filename) + log.info(ProgressHookStatus.status_dict) status.download_progress = 100 total_size_str = event.get('_total_bytes_str', '?').strip() @@ -133,6 +134,7 @@ def yt_dlp_progress_hook(event): f'{total_size_str} in {elapsed_str}') status.cleanup() + log.info(ProgressHookStatus.status_dict) def yt_dlp_postprocessor_hook(event): if not PPHookStatus.valid_status(event['status']): From 71d84e1d74975ebba43dcc43b739978ffa5f7e9e Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 06:11:09 -0500 Subject: [PATCH 15/59] Better output --- tubesync/sync/hooks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index a5c7f9ad..26ab2c64 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -58,6 +58,9 @@ class ProgressHookStatus(BaseStatus): self.status = status self.download_progress = 0 + def __str__(self): + return f'{self.__name__}: {self.status} ({self.download_progress}) file: {self.filename}' + def next_progress(self): if 0 == self.download_progress: return 0 @@ -134,7 +137,6 @@ def yt_dlp_progress_hook(event): f'{total_size_str} in {elapsed_str}') status.cleanup() - log.info(ProgressHookStatus.status_dict) def yt_dlp_postprocessor_hook(event): if not PPHookStatus.valid_status(event['status']): From d6ed3388e255fe379249c4ea5a56e3781b0c6e42 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 06:19:13 -0500 Subject: [PATCH 16/59] Removing existing (unlocked) tasks is safe to use --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 5609b372..4f811add 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -86,7 +86,7 @@ def source_post_save(sender, instance, created, **kwargs): queue=str(instance.pk), priority=1, verbose_name=verbose_name.format(instance.name), - remove_existing_tasks=False + remove_existing_tasks=True ) verbose_name = _('Checking all media for source "{}"') save_all_media_for_source( From f5c832e5aba65a27bd41f9944dac7d1dcd9f061e Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 07:35:51 -0500 Subject: [PATCH 17/59] fixup: use repr to identify the class --- tubesync/sync/hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 26ab2c64..3d679460 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -59,7 +59,7 @@ class ProgressHookStatus(BaseStatus): self.download_progress = 0 def __str__(self): - return f'{self.__name__}: {self.status} ({self.download_progress}) file: {self.filename}' + return f'{self!r}: {self.status} ({self.download_progress}) file: {self.filename}' def next_progress(self): if 0 == self.download_progress: From 034110e6fea83a4a8c09247056d1bcace1aa2ac7 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 11:42:19 -0500 Subject: [PATCH 18/59] Log str output --- tubesync/sync/hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 3d679460..a64688f9 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -102,7 +102,7 @@ def yt_dlp_progress_hook(event): if status is None: status = ProgressHookStatus(**event) status.register(key, filename, status.filename) - log.info(ProgressHookStatus.status_dict) + log.info(str(ProgressHookStatus.status_dict)) downloaded_bytes = event.get('downloaded_bytes', 0) or 0 total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 @@ -128,7 +128,7 @@ def yt_dlp_progress_hook(event): if status is None: status = ProgressHookStatus(**event) status.register(key, filename, status.filename) - log.info(ProgressHookStatus.status_dict) + log.info(str(ProgressHookStatus.status_dict)) status.download_progress = 100 total_size_str = event.get('_total_bytes_str', '?').strip() From 074230b737cf123a156e1061105b8ca653984b9e Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 13:04:23 -0500 Subject: [PATCH 19/59] Switch from key to filename When downloading subtitles, key isn't available. --- tubesync/sync/hooks.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index a64688f9..24568a82 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -58,9 +58,6 @@ class ProgressHookStatus(BaseStatus): self.status = status self.download_progress = 0 - def __str__(self): - return f'{self!r}: {self.status} ({self.download_progress}) file: {self.filename}' - def next_progress(self): if 0 == self.download_progress: return 0 @@ -87,7 +84,7 @@ def yt_dlp_progress_hook(event): log.warn(f'[youtube-dl] unknown progress event: {str(event)}') return None - key = 'Unknown' + key = None if 'display_id' in event['info_dict']: key = event['info_dict']['display_id'] elif 'id' in event['info_dict']: @@ -97,12 +94,11 @@ def yt_dlp_progress_hook(event): if 'error' == event['status']: log.error(f'[youtube-dl] error occured downloading: {filename}') elif 'downloading' == event['status']: - # get or create the status for key - status = ProgressHookStatus.get(key) + # get or create the status for filename + status = ProgressHookStatus.get(filename) if status is None: status = ProgressHookStatus(**event) status.register(key, filename, status.filename) - log.info(str(ProgressHookStatus.status_dict)) downloaded_bytes = event.get('downloaded_bytes', 0) or 0 total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 @@ -123,12 +119,11 @@ def yt_dlp_progress_hook(event): log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' f'of {total} at {speed}, {eta} remaining') elif 'finished' == event['status']: - # update the status for key to the finished value - status = ProgressHookStatus.get(key) + # update the status for filename to the finished value + status = ProgressHookStatus.get(filename) if status is None: status = ProgressHookStatus(**event) status.register(key, filename, status.filename) - log.info(str(ProgressHookStatus.status_dict)) status.download_progress = 100 total_size_str = event.get('_total_bytes_str', '?').strip() From ee9849a1aac45217cae8ebbabbaf3cf085fb69e6 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 13:58:22 -0500 Subject: [PATCH 20/59] Calculate percent from number of fragments downloaded --- tubesync/sync/hooks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 24568a82..1d6eb330 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -103,6 +103,8 @@ def yt_dlp_progress_hook(event): downloaded_bytes = event.get('downloaded_bytes', 0) or 0 total_bytes_estimate = event.get('total_bytes_estimate', 0) or 0 total_bytes = event.get('total_bytes', 0) or total_bytes_estimate + fragment_index = event.get('fragment_index', 0) or 0 + fragment_count = event.get('fragment_count', 0) or 0 eta = event.get('_eta_str', '?').strip() percent_str = event.get('_percent_str', '?').strip() speed = event.get('_speed_str', '?').strip() @@ -114,6 +116,8 @@ def yt_dlp_progress_hook(event): pass if downloaded_bytes > 0 and total_bytes > 0: percent = round(100 * downloaded_bytes / total_bytes) + if fragment_index > 0 and fragment_count > 0: + percent = round(100 * fragment_index / fragment_count) if percent and (status.next_progress() < percent) and (0 == percent % 5): status.download_progress = percent log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' From 7f642263698a923d3e48b4f933bda743ab80c9ce Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 14:58:43 -0500 Subject: [PATCH 21/59] Add more keys to filter - `heatmap`: is a huge list of tiny "chapters" used to display popularity of sections of the video - `requested_subtitles`: also has a URL that expires --- tubesync/sync/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 2f9e6593..791bfe6d 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -282,12 +282,18 @@ def filter_response(arg_dict, copy_arg=False): ) ) - for key in frozenset(('subtitles', 'automatic_captions',)): + for key in frozenset(('subtitles', 'requested_subtitles', 'automatic_captions',)): if key in response_dict.keys(): key_dict = response_dict[key] for lang_code in key_dict: _drop_url_keys(key_dict, lang_code, drop_subtitles_url) # end of subtitles cleanup }}} + + # beginning of heatmap cleanup {{{ + for key in frozenset(('heatmap',)): + if key in response_dict.keys(): + del response_dict[key] + # end of heatmap cleanup }}} return response_dict From 90b892fd85ceb155717e7d2ce491a91fb1cf5815 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 15:21:23 -0500 Subject: [PATCH 22/59] Prefer fragment count calculation When the first fragment is downloaded, the percent is calculated as `100`, because the bytes and total are from the fragment, which prevents additional logging. --- tubesync/sync/hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 1d6eb330..b03f7128 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -114,10 +114,10 @@ def yt_dlp_progress_hook(event): percent = int(float(percent_str.rstrip('%'))) except: pass - if downloaded_bytes > 0 and total_bytes > 0: - percent = round(100 * downloaded_bytes / total_bytes) if fragment_index > 0 and fragment_count > 0: percent = round(100 * fragment_index / fragment_count) + elif downloaded_bytes > 0 and total_bytes > 0: + percent = round(100 * downloaded_bytes / total_bytes) if percent and (status.next_progress() < percent) and (0 == percent % 5): status.download_progress = percent log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' From 23abc545d28e5d7dfd1e1cdc33bcdfa22984d16f Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 15:34:24 -0500 Subject: [PATCH 23/59] Replace '100%' with the calculated percent --- tubesync/sync/hooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index b03f7128..3c506eb7 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -116,6 +116,7 @@ def yt_dlp_progress_hook(event): pass if fragment_index > 0 and fragment_count > 0: percent = round(100 * fragment_index / fragment_count) + percent_str = f'{percent}%' elif downloaded_bytes > 0 and total_bytes > 0: percent = round(100 * downloaded_bytes / total_bytes) if percent and (status.next_progress() < percent) and (0 == percent % 5): From d7d1f52ff3077555d6269f3010cd1aead3bbec82 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 15:59:44 -0500 Subject: [PATCH 24/59] Completed fragments or bytes can be zero --- tubesync/sync/hooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 3c506eb7..faeeb81e 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -114,10 +114,10 @@ def yt_dlp_progress_hook(event): percent = int(float(percent_str.rstrip('%'))) except: pass - if fragment_index > 0 and fragment_count > 0: + if fragment_index >= 0 and fragment_count > 0: percent = round(100 * fragment_index / fragment_count) percent_str = f'{percent}%' - elif downloaded_bytes > 0 and total_bytes > 0: + elif downloaded_bytes >= 0 and total_bytes > 0: percent = round(100 * downloaded_bytes / total_bytes) if percent and (status.next_progress() < percent) and (0 == percent % 5): status.download_progress = percent From 64302e8191663d716deca1a8a6de6fc478026657 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 17:15:45 -0500 Subject: [PATCH 25/59] Change the minimum next step to 1 --- tubesync/sync/hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index faeeb81e..3e8722a9 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -61,7 +61,7 @@ class ProgressHookStatus(BaseStatus): def next_progress(self): if 0 == self.download_progress: return 0 - return 5 + self.download_progress + return 1 + self.download_progress class PPHookStatus(BaseStatus): status_dict = postprocessor_hook['status'] From d1eecf19c2573d25b56bca3a81b7759ea0999faa Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 18:56:09 -0500 Subject: [PATCH 26/59] Use `libopus` and speed up VP9 encoding --- tubesync/sync/youtube.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 3ea3c333..86c3d254 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -279,9 +279,11 @@ def download_media( codec_options = list() ofn = ytopts['outtmpl'] if 'av1-' in ofn: - codec_options = ['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35'] + codec_options.extend(['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35']) elif 'vp9-' in ofn: - codec_options = ['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31'] + codec_options.extend(['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31', '-row-mt', '1']) + if '-opus' in ofn: + codec_options.extend(['-c:a', 'libopus']) set_ffmpeg_codec = not ( ytopts['postprocessor_args'] and ytopts['postprocessor_args']['modifychapters+ffmpeg'] From 3039e3105cb02ed5bc8c37e41bc7c331549da8d3 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 19:12:17 -0500 Subject: [PATCH 27/59] More VP9 tweaks --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 86c3d254..fd3795a1 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -281,7 +281,7 @@ def download_media( if 'av1-' in ofn: codec_options.extend(['-c:v', 'libsvtav1', '-preset', '8', '-crf', '35']) elif 'vp9-' in ofn: - codec_options.extend(['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31', '-row-mt', '1']) + codec_options.extend(['-c:v', 'libvpx-vp9', '-b:v', '0', '-crf', '31', '-row-mt', '1', '-tile-columns', '2']) if '-opus' in ofn: codec_options.extend(['-c:a', 'libopus']) set_ffmpeg_codec = not ( From 23e38312a74d7f6fc381499eb22fcd17d8cf580c Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 21:10:48 -0500 Subject: [PATCH 28/59] Create modify_chapters.py --- patches/yt_dlp/post/modify_chapters.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 patches/yt_dlp/post/modify_chapters.py diff --git a/patches/yt_dlp/post/modify_chapters.py b/patches/yt_dlp/post/modify_chapters.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/patches/yt_dlp/post/modify_chapters.py @@ -0,0 +1 @@ + From 88a200a338e1a31f32b87a2bdedf53c8708df212 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 21:13:22 -0500 Subject: [PATCH 29/59] Update and rename modify_chapters.py to modify_chapters.py --- patches/yt_dlp/{post => postprocessor}/modify_chapters.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename patches/yt_dlp/{post => postprocessor}/modify_chapters.py (100%) diff --git a/patches/yt_dlp/post/modify_chapters.py b/patches/yt_dlp/postprocessor/modify_chapters.py similarity index 100% rename from patches/yt_dlp/post/modify_chapters.py rename to patches/yt_dlp/postprocessor/modify_chapters.py From f78f33f8fad2a0f4e3b20613912c4af5b365e6bd Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 21:15:13 -0500 Subject: [PATCH 30/59] Add modify_chapters.py From: https://github.com/tcely/yt-dlp/raw/refs/heads/modify-chapters-codecs/yt_dlp/postprocessor/modify_chapters.py --- .../yt_dlp/postprocessor/modify_chapters.py | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) diff --git a/patches/yt_dlp/postprocessor/modify_chapters.py b/patches/yt_dlp/postprocessor/modify_chapters.py index 8b137891..c41c9a48 100644 --- a/patches/yt_dlp/postprocessor/modify_chapters.py +++ b/patches/yt_dlp/postprocessor/modify_chapters.py @@ -1 +1,379 @@ +import copy +import heapq +import os +from .common import PostProcessor +from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP +from .sponsorblock import SponsorBlockPP +from ..utils import PostProcessingError, orderedSet, prepend_extension + +_TINY_CHAPTER_DURATION = 1 +DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' + + +class ModifyChaptersPP(FFmpegPostProcessor): + def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None, + *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): + FFmpegPostProcessor.__init__(self, downloader) + self._remove_chapters_patterns = set(remove_chapters_patterns or []) + self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys()) + self._ranges_to_remove = set(remove_ranges or []) + self._sponsorblock_chapter_title = sponsorblock_chapter_title + self._force_keyframes = force_keyframes + + @PostProcessor._restrict_to(images=False) + def run(self, info): + self._fixup_chapters(info) + # Chapters must be preserved intact when downloading multiple formats of the same video. + chapters, sponsor_chapters = self._mark_chapters_to_remove( + copy.deepcopy(info.get('chapters')) or [], + copy.deepcopy(info.get('sponsorblock_chapters')) or []) + if not chapters and not sponsor_chapters: + return [], info + + real_duration = self._get_real_video_duration(info['filepath']) + if not chapters: + chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}] + + info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters) + if not cuts: + return [], info + elif not info['chapters']: + self.report_warning('You have requested to remove the entire video, which is not possible') + return [], info + + original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time'] + if self._duration_mismatch(real_duration, original_duration, 1): + if not self._duration_mismatch(real_duration, info['duration']): + self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut') + return [], info + if not info.get('__real_download'): + raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. ' + 'Different chapters may have already been removed') + else: + self.write_debug('Expected and actual durations mismatch') + + concat_opts = self._make_concat_opts(cuts, real_duration) + self.write_debug('Concat spec = {}'.format(', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts))) + + def remove_chapters(file, is_sub): + return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub) + + in_out_files = [remove_chapters(info['filepath'], False)] + in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info)) + + # Renaming should only happen after all files are processed + files_to_remove = [] + for in_file, out_file in in_out_files: + mtime = os.stat(in_file).st_mtime + uncut_file = prepend_extension(in_file, 'uncut') + os.replace(in_file, uncut_file) + os.replace(out_file, in_file) + self.try_utime(in_file, mtime, mtime) + files_to_remove.append(uncut_file) + + return files_to_remove, info + + def _mark_chapters_to_remove(self, chapters, sponsor_chapters): + if self._remove_chapters_patterns: + warn_no_chapter_to_remove = True + if not chapters: + self.to_screen('Chapter information is unavailable') + warn_no_chapter_to_remove = False + for c in chapters: + if any(regex.search(c['title']) for regex in self._remove_chapters_patterns): + c['remove'] = True + warn_no_chapter_to_remove = False + if warn_no_chapter_to_remove: + self.to_screen('There are no chapters matching the regex') + + if self._remove_sponsor_segments: + warn_no_chapter_to_remove = True + if not sponsor_chapters: + self.to_screen('SponsorBlock information is unavailable') + warn_no_chapter_to_remove = False + for c in sponsor_chapters: + if c['category'] in self._remove_sponsor_segments: + c['remove'] = True + warn_no_chapter_to_remove = False + if warn_no_chapter_to_remove: + self.to_screen('There are no matching SponsorBlock chapters') + + sponsor_chapters.extend({ + 'start_time': start, + 'end_time': end, + 'category': 'manually_removed', + '_categories': [('manually_removed', start, end, 'Manually removed')], + 'remove': True, + } for start, end in self._ranges_to_remove) + + return chapters, sponsor_chapters + + def _get_supported_subs(self, info): + for sub in (info.get('requested_subtitles') or {}).values(): + sub_file = sub.get('filepath') + # The file might have been removed by --embed-subs + if not sub_file or not os.path.exists(sub_file): + continue + ext = sub['ext'] + if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: + self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync') + continue + # TODO: create __real_download for subs? + yield sub_file + + def _remove_marked_arrange_sponsors(self, chapters): + # Store cuts separately, since adjacent and overlapping cuts must be merged. + cuts = [] + + def append_cut(c): + assert 'remove' in c, 'Not a cut is appended to cuts' + last_to_cut = cuts[-1] if cuts else None + if last_to_cut and last_to_cut['end_time'] >= c['start_time']: + last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time']) + else: + cuts.append(c) + return len(cuts) - 1 + + def excess_duration(c): + # Cuts that are completely within the chapter reduce chapters' duration. + # Since cuts can overlap, excess duration may be less that the sum of cuts' durations. + # To avoid that, chapter stores the index to the fist cut within the chapter, + # instead of storing excess duration. append_cut ensures that subsequent cuts (if any) + # will be merged with previous ones (if necessary). + cut_idx, excess = c.pop('cut_idx', len(cuts)), 0 + while cut_idx < len(cuts): + cut = cuts[cut_idx] + if cut['start_time'] >= c['end_time']: + break + if cut['end_time'] > c['start_time']: + excess += min(cut['end_time'], c['end_time']) + excess -= max(cut['start_time'], c['start_time']) + cut_idx += 1 + return excess + + new_chapters = [] + + def append_chapter(c): + assert 'remove' not in c, 'Cut is appended to chapters' + length = c['end_time'] - c['start_time'] - excess_duration(c) + # Chapter is completely covered by cuts or sponsors. + if length <= 0: + return + start = new_chapters[-1]['end_time'] if new_chapters else 0 + c.update(start_time=start, end_time=start + length) + new_chapters.append(c) + + # Turn into a priority queue, index is a tie breaker. + # Plain stack sorted by start_time is not enough: after splitting the chapter, + # the part returned to the stack is not guaranteed to have start_time + # less than or equal to the that of the stack's head. + chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)] + heapq.heapify(chapters) + + _, cur_i, cur_chapter = heapq.heappop(chapters) + while chapters: + _, i, c = heapq.heappop(chapters) + # Non-overlapping chapters or cuts can be appended directly. However, + # adjacent non-overlapping cuts must be merged, which is handled by append_cut. + if cur_chapter['end_time'] <= c['start_time']: + (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + cur_i, cur_chapter = i, c + continue + + # Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor), + # (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor), + # (sponsor, normal), and (normal, sponsor). There is no (normal, normal): + # normal chapters are assumed not to overlap. + if 'remove' in cur_chapter: + # (cut, cut): adjust end_time. + if 'remove' in c: + cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time']) + # (cut, sponsor/normal): chop the beginning of the later chapter + # (if it's not completely hidden by the cut). Push to the priority queue + # to restore sorting by start_time: with beginning chopped, c may actually + # start later than the remaining chapters from the queue. + elif cur_chapter['end_time'] < c['end_time']: + c['start_time'] = cur_chapter['end_time'] + c['_was_cut'] = True + heapq.heappush(chapters, (c['start_time'], i, c)) + # (sponsor/normal, cut). + elif 'remove' in c: + cur_chapter['_was_cut'] = True + # Chop the end of the current chapter if the cut is not contained within it. + # Chopping the end doesn't break start_time sorting, no PQ push is necessary. + if cur_chapter['end_time'] <= c['end_time']: + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + continue + # Current chapter contains the cut within it. If the current chapter is + # a sponsor chapter, check whether the categories before and after the cut differ. + if '_categories' in cur_chapter: + after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[]) + cur_cats = [] + for cat_start_end in cur_chapter['_categories']: + if cat_start_end[1] < c['start_time']: + cur_cats.append(cat_start_end) + if cat_start_end[2] > c['end_time']: + after_c['_categories'].append(cat_start_end) + cur_chapter['_categories'] = cur_cats + if cur_chapter['_categories'] != after_c['_categories']: + # Categories before and after the cut differ: push the after part to PQ. + heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c)) + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + continue + # Either sponsor categories before and after the cut are the same or + # we're dealing with a normal chapter. Just register an outstanding cut: + # subsequent append_chapter will reduce the duration. + cur_chapter.setdefault('cut_idx', append_cut(c)) + # (sponsor, normal): if a normal chapter is not completely overlapped, + # chop the beginning of it and push it to PQ. + elif '_categories' in cur_chapter and '_categories' not in c: + if cur_chapter['end_time'] < c['end_time']: + c['start_time'] = cur_chapter['end_time'] + c['_was_cut'] = True + heapq.heappush(chapters, (c['start_time'], i, c)) + # (normal, sponsor) and (sponsor, sponsor) + else: + assert '_categories' in c, 'Normal chapters overlap' + cur_chapter['_was_cut'] = True + c['_was_cut'] = True + # Push the part after the sponsor to PQ. + if cur_chapter['end_time'] > c['end_time']: + # deepcopy to make categories in after_c and cur_chapter/c refer to different lists. + after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time']) + heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c)) + # Push the part after the overlap to PQ. + elif c['end_time'] > cur_chapter['end_time']: + after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time']) + heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur)) + c['end_time'] = cur_chapter['end_time'] + # (sponsor, sponsor): merge categories in the overlap. + if '_categories' in cur_chapter: + c['_categories'] = cur_chapter['_categories'] + c['_categories'] + # Inherit the cuts that the current chapter has accumulated within it. + if 'cut_idx' in cur_chapter: + c['cut_idx'] = cur_chapter['cut_idx'] + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + return self._remove_tiny_rename_sponsors(new_chapters), cuts + + def _remove_tiny_rename_sponsors(self, chapters): + new_chapters = [] + for i, c in enumerate(chapters): + # Merge with the previous/next if the chapter is tiny. + # Only tiny chapters resulting from a cut can be skipped. + # Chapters that were already tiny in the original list will be preserved. + if (('_was_cut' in c or '_categories' in c) + and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION): + if not new_chapters: + # Prepend tiny chapter to the next one if possible. + if i < len(chapters) - 1: + chapters[i + 1]['start_time'] = c['start_time'] + continue + else: + old_c = new_chapters[-1] + if i < len(chapters) - 1: + next_c = chapters[i + 1] + # Not a typo: key names in old_c and next_c are really different. + prev_is_sponsor = 'categories' in old_c + next_is_sponsor = '_categories' in next_c + # Preferentially prepend tiny normals to normals and sponsors to sponsors. + if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor) + or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)): + next_c['start_time'] = c['start_time'] + continue + old_c['end_time'] = c['end_time'] + continue + + c.pop('_was_cut', None) + cats = c.pop('_categories', None) + if cats: + category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1]) + c.update({ + 'category': category, + 'categories': orderedSet(x[0] for x in cats), + 'name': category_name, + 'category_names': orderedSet(x[3] for x in cats), + }) + c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy()) + # Merge identically named sponsors. + if (new_chapters and 'categories' in new_chapters[-1] + and new_chapters[-1]['title'] == c['title']): + new_chapters[-1]['end_time'] = c['end_time'] + continue + new_chapters.append(c) + return new_chapters + + def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False): + in_file = filename + out_file = prepend_extension(in_file, 'temp') + if force_keyframes: + in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time']))) + self.to_screen(f'Removing chapters from {filename}') + self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) + if in_file != filename: + self._delete_downloaded_files(in_file, msg=None) + return out_file + + + # override to change the args ordering + def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)): + self.check_version() + + oldest_mtime = min( + os.stat(path).st_mtime for path, _ in input_path_opts if path) + + cmd = [self.executable, encodeArgument('-y')] + # avconv does not have repeat option + if self.basename == 'ffmpeg': + cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + + def make_args(file, args, name, number): + keys = [f'_{name}{number}', f'_{name}'] + if name == 'o': + args += ['-movflags', '+faststart'] + if number == 1: + keys.append('') + args = self._configuration_args(self.basename, keys) + args + if name == 'i': + args.append('-i') + return ( + [encodeArgument(arg) for arg in args] + + [self._ffmpeg_filename_argument(file)]) + + for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)): + cmd += itertools.chain.from_iterable( + make_args(path, list(opts), arg_type, i + 1) + for i, (path, opts) in enumerate(path_opts) if path) + + self.write_debug(f'ffmpeg command line: {shell_quote(cmd)}') + _, stderr, returncode = Popen.run( + cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + if returncode not in variadic(expected_retcodes): + self.write_debug(stderr) + raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1]) + for out_path, _ in output_path_opts: + if out_path: + self.try_utime(out_path, oldest_mtime, oldest_mtime) + return stderr + + + @staticmethod + def _make_concat_opts(chapters_to_remove, duration): + opts = [{}] + for s in chapters_to_remove: + # Do not create 0 duration chunk at the beginning. + if s['start_time'] == 0: + opts[-1]['inpoint'] = f'{s["end_time"]:.6f}' + continue + opts[-1]['outpoint'] = f'{s["start_time"]:.6f}' + # Do not create 0 duration chunk at the end. + if s['end_time'] < duration: + opts.append({'inpoint': f'{s["end_time"]:.6f}'}) + return opts From e889c78a48bc5e2c4e2c86ba2aa942281f6af64a Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 21:20:31 -0500 Subject: [PATCH 31/59] Patch yt_dlp postprocessor modify_chapters.py --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 285b7056..d70fd81d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -367,6 +367,9 @@ RUN set -x && \ # Copy root COPY config/root / +# patch yt_dlp +COPY patches/yt_dlp/ /usr/local/lib/python3.*/dist-packages/yt_dlp/ + # Create a healthcheck HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"] From bf7b57e666108e87f12d2faa43abeb0f7d898a69 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Feb 2025 22:27:26 -0500 Subject: [PATCH 32/59] Update modify_chapters.py --- patches/yt_dlp/postprocessor/modify_chapters.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/patches/yt_dlp/postprocessor/modify_chapters.py b/patches/yt_dlp/postprocessor/modify_chapters.py index c41c9a48..2beaca92 100644 --- a/patches/yt_dlp/postprocessor/modify_chapters.py +++ b/patches/yt_dlp/postprocessor/modify_chapters.py @@ -5,7 +5,12 @@ import os from .common import PostProcessor from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP from .sponsorblock import SponsorBlockPP -from ..utils import PostProcessingError, orderedSet, prepend_extension +from ..utils import ( + PostProcessingError, + orderedSet, + prepend_extension, + encodeArgument, +) _TINY_CHAPTER_DURATION = 1 DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' From a9078c3521bef9bfc34abb209db78fdbd11b1f74 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 00:14:48 -0500 Subject: [PATCH 33/59] Update modify_chapters.py --- patches/yt_dlp/postprocessor/modify_chapters.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/patches/yt_dlp/postprocessor/modify_chapters.py b/patches/yt_dlp/postprocessor/modify_chapters.py index 2beaca92..2ce0607c 100644 --- a/patches/yt_dlp/postprocessor/modify_chapters.py +++ b/patches/yt_dlp/postprocessor/modify_chapters.py @@ -1,15 +1,24 @@ import copy import heapq +import itertools import os +import subprocess from .common import PostProcessor -from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP +from .ffmpeg import ( + FFmpegPostProcessor, + FFmpegPostProcessorError, + FFmpegSubtitlesConvertorPP, +) from .sponsorblock import SponsorBlockPP from ..utils import ( + Popen, PostProcessingError, + encodeArgument, orderedSet, prepend_extension, - encodeArgument, + shell_quote, + variadic, ) _TINY_CHAPTER_DURATION = 1 From e2680c4b24c35c22e03d757a29ad3ff28756d2be Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 13:45:21 -0500 Subject: [PATCH 34/59] Split the `COPY` line --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d70fd81d..2c90a869 100644 --- a/Dockerfile +++ b/Dockerfile @@ -368,7 +368,8 @@ RUN set -x && \ COPY config/root / # patch yt_dlp -COPY patches/yt_dlp/ /usr/local/lib/python3.*/dist-packages/yt_dlp/ +COPY patches/yt_dlp/ \ + /usr/local/lib/python3.*/dist-packages/yt_dlp/ # Create a healthcheck HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"] From cfec87679600283e7626ee5c3292d2dede952a40 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 14:14:56 -0500 Subject: [PATCH 35/59] Adjust for `requested_subtitles` layout --- tubesync/sync/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 791bfe6d..e536f57d 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -282,11 +282,15 @@ def filter_response(arg_dict, copy_arg=False): ) ) - for key in frozenset(('subtitles', 'requested_subtitles', 'automatic_captions',)): + for key in frozenset(('subtitles', 'automatic_captions',)): if key in response_dict.keys(): key_dict = response_dict[key] for lang_code in key_dict: _drop_url_keys(key_dict, lang_code, drop_subtitles_url) + + for key in frozenset(('requested_subtitles',)): + if key in response_dict.keys(): + _drop_url_keys(response_dict, key, drop_subtitles_url) # end of subtitles cleanup }}} # beginning of heatmap cleanup {{{ From 8bff3b6c24fa10fe860e4979ab5aa013dde5ddf7 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 16:02:38 -0500 Subject: [PATCH 36/59] More stringent checking of types --- tubesync/sync/utils.py | 55 ++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index e536f57d..6a745c45 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -205,20 +205,26 @@ def normalize_codec(codec_str): def _url_keys(arg_dict, filter_func): result = {} - for key in arg_dict.keys(): - if 'url' in key: - result.update( - {key: filter_func(key=key, url=arg_dict[key])} - ) + if isinstance(arg_dict, dict): + for key, value in arg_dict.items(): + if 'url' in key: + result.update( + {key: filter_func(key=key, url=value)} + ) return result def _drop_url_keys(arg_dict, key, filter_func): + assert isinstance(arg_dict, dict) if key in arg_dict.keys(): - for val_dict in arg_dict[key]: - for url_key, remove in _url_keys(val_dict, filter_func).items(): - if remove is True: - del val_dict[url_key] + key_list = arg_dict[key] + assert isinstance(key_list, list) + if isinstance(key_list, list): + for val_dict in key_list: + assert isinstance(val_dict, dict) + for url_key, remove in _url_keys(val_dict, filter_func).items(): + if remove is True: + del val_dict[url_key] def filter_response(arg_dict, copy_arg=False): @@ -260,13 +266,16 @@ def filter_response(arg_dict, copy_arg=False): '__needs_testing', '__working', )) - for key in frozenset(('formats', 'requested_formats',)): - _drop_url_keys(response_dict, key, drop_format_url) + for key in ('formats', 'requested_formats',): if key in response_dict.keys(): - for format in response_dict[key]: - for drop_key in drop_keys: - if drop_key in format.keys(): - del format[drop_key] + _drop_url_keys(response_dict, key, drop_format_url) + formats = response_dict[key] + assert isinstance(formats, list) + if isinstance(formats, list): + for format in formats: + for drop_key in drop_keys: + if drop_key in format.keys(): + del format[drop_key] # end of formats cleanup }}} # beginning of subtitles cleanup {{{ @@ -282,19 +291,17 @@ def filter_response(arg_dict, copy_arg=False): ) ) - for key in frozenset(('subtitles', 'automatic_captions',)): + for key in ('subtitles', 'requested_subtitles', 'automatic_captions',): if key in response_dict.keys(): - key_dict = response_dict[key] - for lang_code in key_dict: - _drop_url_keys(key_dict, lang_code, drop_subtitles_url) - - for key in frozenset(('requested_subtitles',)): - if key in response_dict.keys(): - _drop_url_keys(response_dict, key, drop_subtitles_url) + lang_codes = response_dict[key] + assert isinstance(lang_codes, dict) + if isinstance(lang_codes, dict): + for lang_code in lang_codes.keys(): + _drop_url_keys(lang_codes, lang_code, drop_subtitles_url) # end of subtitles cleanup }}} # beginning of heatmap cleanup {{{ - for key in frozenset(('heatmap',)): + for key in ('heatmap',): if key in response_dict.keys(): del response_dict[key] # end of heatmap cleanup }}} From 2205f08124344d64f0702d7f5efc310aae54a9be Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 16:11:45 -0500 Subject: [PATCH 37/59] Remove the assert for `lang_codes` The test data doesn't conform to this expectation. --- tubesync/sync/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 6a745c45..87d95285 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -294,7 +294,6 @@ def filter_response(arg_dict, copy_arg=False): for key in ('subtitles', 'requested_subtitles', 'automatic_captions',): if key in response_dict.keys(): lang_codes = response_dict[key] - assert isinstance(lang_codes, dict) if isinstance(lang_codes, dict): for lang_code in lang_codes.keys(): _drop_url_keys(lang_codes, lang_code, drop_subtitles_url) From 1d51c6182be100dd7dee6377f8566fb829662bcb Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 17:04:33 -0500 Subject: [PATCH 38/59] `COPY` doesn't do any globbing Instead, set up and use a 'python3' symbolic link. --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 2c90a869..2226c411 100644 --- a/Dockerfile +++ b/Dockerfile @@ -359,6 +359,8 @@ RUN set -x && \ mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ + # Link to the current python3 version + ln -v -s -f -T "$(find /usr/local/lib -name 'python3.*' -type d -printf '%P\n' | sort -g -r | head -n 1)" /usr/local/lib/python3 && \ # Append software versions ffmpeg_version=$(/usr/local/bin/ffmpeg -version | awk -v 'ev=31' '1 == NR && "ffmpeg" == $1 { print $3; ev=0; } END { exit ev; }') && \ test -n "${ffmpeg_version}" && \ @@ -369,7 +371,7 @@ COPY config/root / # patch yt_dlp COPY patches/yt_dlp/ \ - /usr/local/lib/python3.*/dist-packages/yt_dlp/ + /usr/local/lib/python3/dist-packages/yt_dlp/ # Create a healthcheck HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"] From 2fd5e70486bf21f7d51bf254fe06222de2a6d2e1 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 17:22:54 -0500 Subject: [PATCH 39/59] Use version sorting --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 2226c411..9c0add9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -360,7 +360,7 @@ RUN set -x && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ # Link to the current python3 version - ln -v -s -f -T "$(find /usr/local/lib -name 'python3.*' -type d -printf '%P\n' | sort -g -r | head -n 1)" /usr/local/lib/python3 && \ + ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ # Append software versions ffmpeg_version=$(/usr/local/bin/ffmpeg -version | awk -v 'ev=31' '1 == NR && "ffmpeg" == $1 { print $3; ev=0; } END { exit ev; }') && \ test -n "${ffmpeg_version}" && \ From 208a540bc95d0afd49b8e334fe0bac800917c082 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 19:59:00 -0500 Subject: [PATCH 40/59] Add and use `list_of_dictionaries` function --- tubesync/sync/utils.py | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 87d95285..7afb8067 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -203,6 +203,17 @@ def normalize_codec(codec_str): return result +def list_of_dictionaries(arg_list, arg_function=lambda x: x): + assert callable(arg_function) + if isinstance(arg_list, list): + def _assert_and_call(arg_dict): + assert isinstance(arg_dict, dict) + if isinstance(arg_dict, dict): + return arg_function(arg_dict) + return (True, list(map(_assert_and_call, arg_list)),) + return (False, arg_list,) + + def _url_keys(arg_dict, filter_func): result = {} if isinstance(arg_dict, dict): @@ -214,17 +225,17 @@ def _url_keys(arg_dict, filter_func): return result +# expects a dictionary where the value at key is a: +# list of dictionaries def _drop_url_keys(arg_dict, key, filter_func): + def _del_url_keys(_arg_dict): + for url_key, remove in _url_keys(_arg_dict, filter_func).items(): + if remove is True: + del _arg_dict[url_key] + assert isinstance(arg_dict, dict) if key in arg_dict.keys(): - key_list = arg_dict[key] - assert isinstance(key_list, list) - if isinstance(key_list, list): - for val_dict in key_list: - assert isinstance(val_dict, dict) - for url_key, remove in _url_keys(val_dict, filter_func).items(): - if remove is True: - del val_dict[url_key] + list_of_dictionaries(arg_dict[key], _del_url_keys) def filter_response(arg_dict, copy_arg=False): @@ -266,16 +277,15 @@ def filter_response(arg_dict, copy_arg=False): '__needs_testing', '__working', )) + def del_drop_keys(arg_dict): + for drop_key in drop_keys: + if drop_key in arg_dict.keys(): + del arg_dict[drop_key] + for key in ('formats', 'requested_formats',): if key in response_dict.keys(): _drop_url_keys(response_dict, key, drop_format_url) - formats = response_dict[key] - assert isinstance(formats, list) - if isinstance(formats, list): - for format in formats: - for drop_key in drop_keys: - if drop_key in format.keys(): - del format[drop_key] + list_of_dictionaries(response_dict[key], del_drop_keys) # end of formats cleanup }}} # beginning of subtitles cleanup {{{ From e58fae1f9693eba4d43062fdd85088c6ad2e0f17 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Feb 2025 20:17:53 -0500 Subject: [PATCH 41/59] Skip over items that are not dictionaries --- tubesync/sync/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 7afb8067..fa904c5f 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -207,9 +207,9 @@ def list_of_dictionaries(arg_list, arg_function=lambda x: x): assert callable(arg_function) if isinstance(arg_list, list): def _assert_and_call(arg_dict): - assert isinstance(arg_dict, dict) if isinstance(arg_dict, dict): return arg_function(arg_dict) + return arg_dict return (True, list(map(_assert_and_call, arg_list)),) return (False, arg_list,) From 4dbb0c5196dfa650e4ff63d965abc68910f2701c Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 02:21:16 -0500 Subject: [PATCH 42/59] Remove subtitles also Fixes #416 --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 4f811add..79cc556b 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -221,7 +221,7 @@ def media_post_delete(sender, instance, **kwargs): video_path = Path(str(instance.media_file.path)).resolve() instance.media_file.delete(save=False) # the other files we created have these known suffixes - for suffix in frozenset(('nfo', 'jpg', 'webp', 'info.json',)): + for suffix in frozenset(('nfo', 'jpg', 'webp', 'vtt', 'info.json',)): other_path = video_path.with_suffix(f'.{suffix}').resolve() log.info(f'Deleting file for: {instance} path: {other_path!s}') delete_file(other_path) From 6247a8b062e7a447ca3fef0cabb7e8f0628565cf Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 02:33:32 -0500 Subject: [PATCH 43/59] Account for language code in subtitle files --- tubesync/sync/signals.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 79cc556b..bb7bba5c 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -221,10 +221,15 @@ def media_post_delete(sender, instance, **kwargs): video_path = Path(str(instance.media_file.path)).resolve() instance.media_file.delete(save=False) # the other files we created have these known suffixes - for suffix in frozenset(('nfo', 'jpg', 'webp', 'vtt', 'info.json',)): + for suffix in frozenset(('nfo', 'jpg', 'webp', 'info.json',)): other_path = video_path.with_suffix(f'.{suffix}').resolve() log.info(f'Deleting file for: {instance} path: {other_path!s}') delete_file(other_path) + # subtitles include language code + subtitle_files = video_path.parent.glob(f'{glob_quote(video_path.with_suffix("").name)}*.vtt') + for file in subtitle_files: + log.info(f'Deleting file for: {instance} path: {file}') + delete_file(file) # Jellyfin creates .trickplay directories and posters for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)): # with_suffix insists on suffix beginning with '.' for no good reason From cae285cd9f51a3732072f639d5167517d0364fd8 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 04:51:17 -0500 Subject: [PATCH 44/59] Allow `exec_cmd` dictionary to be configured in settings This is a dictionary with keys of `WHEN` and values are a `list` of commands for the `ExecPP` to run. --- tubesync/sync/youtube.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index fd3795a1..a832b8ce 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -224,6 +224,10 @@ def download_media( 'sponskrub': False, }) + pp_opts.exec_cmd.update( + opts.get('exec_cmd', default_opts.exec_cmd) + ) + if skip_sponsors: # Let yt_dlp convert from human for us. pp_opts.sponsorblock_mark = yt_dlp.parse_options( From 913514326a07e13e148b9239b47cef507b2c0e59 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 05:57:29 -0500 Subject: [PATCH 45/59] Remove unused `filter_media` --- tubesync/sync/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a5e3f135..918610de 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -27,7 +27,6 @@ from common.utils import json_serial from .models import Source, Media, MediaServer from .utils import (get_remote_image, resize_image_to_height, delete_file, write_text_file, filter_response) -from .filtering import filter_media from .youtube import YouTubeError From 6390f5a1c39a5353710d773bcebb4b10c17648ae Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 16:15:48 -0500 Subject: [PATCH 46/59] Try to include `timestamp` --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index fd3795a1..d33de76c 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -143,6 +143,7 @@ def get_media_info(url): 'simulate': True, 'logger': log, 'extract_flat': True, + 'extractor_args': {'youtubetab': {'approximate_date': ['true']}}, }) response = {} with yt_dlp.YoutubeDL(opts) as y: From 83dc375810855d5c3855fa92e41cbc45a7935bd9 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 19:32:13 -0500 Subject: [PATCH 47/59] Save `duration`, `timestamp`, and `title` to Media instances --- tubesync/sync/tasks.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a5e3f135..96ecec75 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -10,7 +10,7 @@ import math import uuid from io import BytesIO from hashlib import sha1 -from datetime import timedelta, datetime +from datetime import datetime, timedelta, timezone as tz from shutil import copyfile from PIL import Image from django.conf import settings @@ -202,6 +202,7 @@ def index_source_task(source_id): source.last_crawl = timezone.now() source.save() log.info(f'Found {len(videos)} media items for source: {source}') + fields = lambda x, t=source.source_type: Media.METADATA_FIELDS.get(x, dict()).get(t, x) for video in videos: # Create or update each video as a Media object key = video.get(source.key_field, None) @@ -213,6 +214,18 @@ def index_source_task(source_id): except Media.DoesNotExist: media = Media(key=key) media.source = source + media.duration = float(video.get(fields('duration'), 0)) or None + media.title = str(video.get(fields('title'), '')) + timestamp = video.get(fields('timestamp'), None) + if timestamp is not None: + try: + timestamp_float = float(timestamp) + posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + published_dt = posix_epoch + timedelta(seconds=timestamp_float) + except Exception as e: + log.warn(f'Could not set published for: {source} / {media} with "{e}"') + else: + media.published = published_dt try: media.save() log.debug(f'Indexed media: {source} / {media}') From 55637c2cafa3c11901bccf0b50c93d4f50693745 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 04:34:58 -0500 Subject: [PATCH 48/59] Add status text to the download task --- tubesync/sync/hooks.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 3e8722a9..1200bb22 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -4,6 +4,9 @@ import yt_dlp from common.logger import log from django.conf import settings +from .models import Media +from .tasks import get_media_download_task + progress_hook = { 'status': dict(), @@ -27,6 +30,9 @@ class BaseStatus: return status in cls.valid def __init__(self, hook_status_dict=None): + self.media_key = None + self.task_status = '[Started: 0%]' + self.task_verbose_name = None self._status_dict = hook_status_dict or self.status_dict self._registered_keys = set() @@ -43,6 +49,17 @@ class BaseStatus: if key in self._status_dict: del self._status_dict[key] + def update_task(self): + if self.media_key is None: + return + media = Media.objects.get(key=self.media_key) + task = get_media_download_task(str(media.pk)) + if task: + if self.task_verbose_name is None: + self.task_verbose_name = task.verbose_name + task.verbose_name = f'{self.task_status} ' + self.task_verbose_name + task.save() + class ProgressHookStatus(BaseStatus): status_dict = progress_hook['status'] valid = frozenset(( @@ -121,6 +138,10 @@ def yt_dlp_progress_hook(event): percent = round(100 * downloaded_bytes / total_bytes) if percent and (status.next_progress() < percent) and (0 == percent % 5): status.download_progress = percent + if key: + status.media_key = key + status.task_status = f'[downloading: {percent_str}]' + status.update_task() log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' f'of {total} at {speed}, {eta} remaining') elif 'finished' == event['status']: @@ -171,6 +192,11 @@ def yt_dlp_postprocessor_hook(event): del event['info_dict']['automatic_captions'] log.debug(repr(event['info_dict'])) + if 'Unknown' != key: + status.media_key = key + status.task_status = f'[{event["postprocessor"]}: {event["status"]}]' + status.update_task() + log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}') if 'finished' == event['status']: status.cleanup() From 332f34c78e8776f36f1d2a333ea7c4bb2fe4955e Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 04:46:35 -0500 Subject: [PATCH 49/59] Avoid `ImportError` --- tubesync/sync/hooks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 1200bb22..6060a3a6 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -4,9 +4,6 @@ import yt_dlp from common.logger import log from django.conf import settings -from .models import Media -from .tasks import get_media_download_task - progress_hook = { 'status': dict(), @@ -18,6 +15,9 @@ postprocessor_hook = { class BaseStatus: + from .models import Media + from .tasks import get_media_download_task + status_dict = dict() valid = set() From b16fb062aaf2af42a16dd3abb1c7a1f652992544 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 04:50:08 -0500 Subject: [PATCH 50/59] Avoid `ImportError` --- tubesync/sync/hooks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 6060a3a6..7dcfb25a 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -15,9 +15,6 @@ postprocessor_hook = { class BaseStatus: - from .models import Media - from .tasks import get_media_download_task - status_dict = dict() valid = set() @@ -52,6 +49,9 @@ class BaseStatus: def update_task(self): if self.media_key is None: return + from .models import Media + from .tasks import get_media_download_task + media = Media.objects.get(key=self.media_key) task = get_media_download_task(str(media.pk)) if task: From 33ad4179b86e714b928015db01d7ae5824e243b5 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 05:27:45 -0500 Subject: [PATCH 51/59] Clean up previously prepended task_status --- tubesync/sync/hooks.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 7dcfb25a..96c9f98d 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -56,8 +56,12 @@ class BaseStatus: task = get_media_download_task(str(media.pk)) if task: if self.task_verbose_name is None: - self.task_verbose_name = task.verbose_name - task.verbose_name = f'{self.task_status} ' + self.task_verbose_name + # clean up any previously prepended task_status + # this happened because of duplicated tasks on my test system + s = task.verbose_name + cleaned = s[1+s.find(' Downloading '):] + self.task_verbose_name = cleaned + task.verbose_name = f'{self.task_status} {self.task_verbose_name}' task.save() class ProgressHookStatus(BaseStatus): From 92130f3604f0716e229e338dbfbd2eddfc0ec342 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 06:19:25 -0500 Subject: [PATCH 52/59] Do not schedule additional `download_media` tasks --- tubesync/sync/signals.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 4f811add..6f244fd5 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -14,7 +14,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images, save_all_media_for_source, rename_all_media_for_source, - get_media_metadata_task) + get_media_metadata_task, get_media_download_task) from .utils import delete_file, glob_quote from .filtering import filter_media @@ -183,13 +183,13 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name=verbose_name.format(instance.name), remove_existing_tasks=True ) + existing_media_download_task = get_media_download_task(str(instance.pk)) # If the media has not yet been downloaded schedule it to be downloaded - if not instance.media_file_exists: + if not (instance.media_file_exists or existing_media_download_task): instance.downloaded = False instance.media_file = None - if (not instance.downloaded and instance.can_download and not instance.skip - and instance.source.download_media): - delete_task_by_media('sync.tasks.download_media', (str(instance.pk),)) + if (instance.source.download_media and instance.can_download) and not ( + instance.skip or instance.downloaded or existing_media_download_task): verbose_name = _('Downloading media for "{}"') download_media( str(instance.pk), From d7222e8fbb167b46c4b36fcaa32b0b08f025d271 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 07:23:50 -0500 Subject: [PATCH 53/59] Be consistent with `get_media_metadata_task` --- tubesync/sync/signals.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 6f244fd5..7625cec5 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -156,8 +156,9 @@ def media_post_save(sender, instance, created, **kwargs): post_save.disconnect(media_post_save, sender=Media) instance.save() post_save.connect(media_post_save, sender=Media) + existing_media_metadata_task = get_media_metadata_task(str(instance.pk)) # If the media is missing metadata schedule it to be downloaded - if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk): + if not (instance.skip or instance.metadata or existing_media_metadata_task): log.info(f'Scheduling task to download metadata for: {instance.url}') verbose_name = _('Downloading metadata for "{}"') download_media_metadata( From b6334ce41cdb99dde71d638a7c2e02c49fb40109 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:18:22 -0500 Subject: [PATCH 54/59] Add `timestamp` to mappings Return `field` instead of '' so that a missing mapping returns itself. --- tubesync/sync/models.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2daeb094..f406e5e0 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -664,6 +664,11 @@ class Media(models.Model): Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date', Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date', }, + 'timestamp': { + Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp', + Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp', + Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp', + }, 'title': { Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title', Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title', @@ -944,7 +949,7 @@ class Media(models.Model): def get_metadata_field(self, field): fields = self.METADATA_FIELDS.get(field, {}) - return fields.get(self.source.source_type, '') + return fields.get(self.source.source_type, field) def iter_formats(self): for fmt in self.formats: From b8f8d9d7fab01d0b6ba720d54f023228d9f7ad80 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:27:10 -0500 Subject: [PATCH 55/59] Use `Media.get_metadata_field` --- tubesync/sync/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 96ecec75..6f6bd800 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -202,7 +202,7 @@ def index_source_task(source_id): source.last_crawl = timezone.now() source.save() log.info(f'Found {len(videos)} media items for source: {source}') - fields = lambda x, t=source.source_type: Media.METADATA_FIELDS.get(x, dict()).get(t, x) + fields = lambda f, m: m.get_metadata_field(f) for video in videos: # Create or update each video as a Media object key = video.get(source.key_field, None) @@ -214,9 +214,9 @@ def index_source_task(source_id): except Media.DoesNotExist: media = Media(key=key) media.source = source - media.duration = float(video.get(fields('duration'), 0)) or None - media.title = str(video.get(fields('title'), '')) - timestamp = video.get(fields('timestamp'), None) + media.duration = float(video.get(fields('duration', media), 0)) or None + media.title = str(video.get(fields('title', media), '')) + timestamp = video.get(fields('timestamp', media), None) if timestamp is not None: try: timestamp_float = float(timestamp) From 488294475a3cea94e87916407115dc38557bddd7 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:48:16 -0500 Subject: [PATCH 56/59] Limit the `metadata_title` string to fit in the `title` column --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f406e5e0..a61b1379 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -935,7 +935,7 @@ class Media(models.Model): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Trigger an update of derived fields from metadata if self.metadata: - self.title = self.metadata_title + self.title = self.metadata_title[:200] self.duration = self.metadata_duration if update_fields is not None and "metadata" in update_fields: # If only some fields are being updated, make sure we update title and duration if metadata changes From f963c556106c7efa4538bcf902ec198d763ee51b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:52:09 -0500 Subject: [PATCH 57/59] Limit the `title` string to fit in the database column --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 6f6bd800..d44eee0e 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -215,7 +215,7 @@ def index_source_task(source_id): media = Media(key=key) media.source = source media.duration = float(video.get(fields('duration', media), 0)) or None - media.title = str(video.get(fields('title', media), '')) + media.title = str(video.get(fields('title', media), ''))[:200] timestamp = video.get(fields('timestamp', media), None) if timestamp is not None: try: From 08504708674b918cf2e77c2feeb4496dcaa553d5 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 22:57:09 -0500 Subject: [PATCH 58/59] Rename the internal function After removing the assert, the old name was a bit confusing. --- tubesync/sync/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index fa904c5f..9f599672 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -206,11 +206,11 @@ def normalize_codec(codec_str): def list_of_dictionaries(arg_list, arg_function=lambda x: x): assert callable(arg_function) if isinstance(arg_list, list): - def _assert_and_call(arg_dict): + def _call_func_with_dict(arg_dict): if isinstance(arg_dict, dict): return arg_function(arg_dict) return arg_dict - return (True, list(map(_assert_and_call, arg_list)),) + return (True, list(map(_call_func_with_dict, arg_list)),) return (False, arg_list,) From 749fb1308e02656bbbf146275e6b47e5127fb4c2 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Feb 2025 04:29:53 -0500 Subject: [PATCH 59/59] Control `writethumbnail` with `embed_thumbnail` --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index fd3795a1..2a3d33b1 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -242,7 +242,7 @@ def download_media( 'writesubtitles': write_subtitles, 'writeautomaticsub': auto_subtitles, 'subtitleslangs': sub_langs.split(','), - 'writethumbnail': True, + 'writethumbnail': embed_thumbnail, 'check_formats': False, 'overwrites': None, 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),