From 8c22b6c99efb464dfb450707440c6162f04b7b46 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 00:05:57 -0500 Subject: [PATCH 01/34] Add response filtering These functions aren't being used yet, they will be tested against my database before that happens. --- tubesync/sync/utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 3e29fe3f..e44cef1f 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -170,6 +170,68 @@ def normalize_codec(codec_str): return result +def _url_keys(arg_dict, filter_func): + result = {} + for key in arg_dict.keys(): + if 'url' in key: + result.update( + {key: (key, filter_func(key=key, url=arg_dict[key]),)} + ) + return result + + +def _drop_url_keys(arg_dict, key, filter_func): + if key in arg_dict.keys(): + for val_dict in arg_dict[key]: + for url_key in _url_keys(val_dict, filter_func): + if url_key[1] is True: + del val_dict[url_key[0]] + + +def filter_response(response_dict): + ''' + Clean up the response so as to not store useless metadata in the database. + ''' + # raise an exception for an unexpected argument type + if not isinstance(filedata, dict): + raise TypeError(f'filedata must be a dict, got "{type(filedata)}"') + # optimize the empty case + if not response_dict: + return response_dict + + # beginning of formats cleanup {{{ + # drop urls that expire, or restrict IPs + def drop_format_url(**kwargs): + url = kwargs['url'] + return ( + url + and '://' in url + and ( + '/ip/' in url + or '/expire/' in url + ) + ) + + _drop_url_keys(response_dict, 'formats', drop_format_url) + _drop_url_keys(response_dict, 'requested_formats', drop_format_url) + # end of formats cleanup }}} + + # beginning of automatic_captions cleanup {{{ + # drop urls that expire, or restrict IPs + def drop_auto_caption_url(**kwargs): + url = kwargs['url'] + return ( + url + and '://' in url + and '&expire=' in url + ) + + _drop_url_keys(response_dict, 'automatic_captions', drop_auto_caption_url) + # end of automatic_captions cleanup }}} + + return response_dict + + def parse_media_format(format_dict): ''' This parser primarily adapts the format dict returned by youtube-dl into a From 63fa97cc5842af7805c3efb1c8d58971b096893d Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 00:43:59 -0500 Subject: [PATCH 02/34] More compact JSON The software doesn't need an extra space per key. --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3df651ba..080dff6d 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -304,7 +304,7 @@ def download_media_metadata(media_id): return source = media.source metadata = media.index_metadata() - media.metadata = json.dumps(metadata, default=json_serial) + media.metadata = json.dumps(metadata, separators=(',', ':'), default=json_serial) upload_date = media.upload_date # Media must have a valid upload date if upload_date: From 8c31720bf707b0b12713af0e8a5a356f3bc6255d Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 01:33:06 -0500 Subject: [PATCH 03/34] Log the reduction of metadata length --- tubesync/sync/models.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2037492d..7ae68729 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -19,7 +19,7 @@ from common.utils import clean_filename, clean_emoji from .youtube import (get_media_info as get_youtube_media_info, download_media as download_youtube_media, get_channel_image_info as get_youtube_channel_image_info) -from .utils import seconds_to_timestr, parse_media_format +from .utils import seconds_to_timestr, parse_media_format, filter_response from .matching import (get_best_combined_format, get_best_audio_format, get_best_video_format) from .mediaservers import PlexMediaServer @@ -1143,12 +1143,27 @@ class Media(models.Model): def has_metadata(self): return self.metadata is not None + + def reduce_data(self, data): + from common.logger import log + from common.utils import json_serial + # log the results of filtering / compacting on metadata size + filtered_data = filter_response(data) + compact_metadata = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) + old_mdl = len(self.metadata) + new_mdl = len(compact_metadata) + if old_mdl > new_mdl: + delta = old_mdl - new_mdl + log.info(f'{self.key}: metadata reduced by {delta,} characters ({old_mdl,} -> {new_mdl,})') + + @property def loaded_metadata(self): try: data = json.loads(self.metadata) if not isinstance(data, dict): return {} + self.reduce_data(data) return data except Exception as e: return {} From 25d2ff680270aa9e4188233cba3770cd9dc5275e Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 02:12:22 -0500 Subject: [PATCH 04/34] Don't reduce the actual data yet --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 7ae68729..44f24dfb 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1163,7 +1163,7 @@ class Media(models.Model): data = json.loads(self.metadata) if not isinstance(data, dict): return {} - self.reduce_data(data) + self.reduce_data(json.loads(self.metadata)) return data except Exception as e: return {} From 2f34fff7133754c05d348d50e43442a481c8adfc Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 02:55:05 -0500 Subject: [PATCH 05/34] Fixes from testing The `automatic_captions` has a layer for language codes that I didn't account for. The type checking was copied and I didn't adjust for the arguments in this function. --- tubesync/sync/utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 162146eb..b85abaab 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -194,8 +194,8 @@ def filter_response(response_dict): Clean up the response so as to not store useless metadata in the database. ''' # raise an exception for an unexpected argument type - if not isinstance(filedata, dict): - raise TypeError(f'filedata must be a dict, got "{type(filedata)}"') + if not isinstance(response_dict, dict): + raise TypeError(f'response_dict must be a dict, got "{type(response_dict)}"') # optimize the empty case if not response_dict: return response_dict @@ -227,7 +227,11 @@ def filter_response(response_dict): and '&expire=' in url ) - _drop_url_keys(response_dict, 'automatic_captions', drop_auto_caption_url) + ac_key = 'automatic_captions' + if ac_key in response_dict.keys(): + ac_dict = response_dict[ac_key] + for lang_code in ac_dict: + _drop_url_keys(ac_dict, lang_code, drop_auto_caption_url) # end of automatic_captions cleanup }}} return response_dict From 9a4101a0a147f3fe0ee91c13197a077f1f27cd3e Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 03:18:39 -0500 Subject: [PATCH 06/34] Fix formatting --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 44f24dfb..077a8283 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1154,7 +1154,7 @@ class Media(models.Model): new_mdl = len(compact_metadata) if old_mdl > new_mdl: delta = old_mdl - new_mdl - log.info(f'{self.key}: metadata reduced by {delta,} characters ({old_mdl,} -> {new_mdl,})') + log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') @property From db25fa80294e035b1742fac2e044d2ff7de27464 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 03:35:58 -0500 Subject: [PATCH 07/34] Adjusted comment --- tubesync/sync/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index b85abaab..108cd757 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -218,7 +218,7 @@ def filter_response(response_dict): # end of formats cleanup }}} # beginning of automatic_captions cleanup {{{ - # drop urls that expire, or restrict IPs + # drop urls that expire def drop_auto_caption_url(**kwargs): url = kwargs['url'] return ( From 431de2e0dfa606d5a725a475159afe5fe370a251 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 04:11:14 -0500 Subject: [PATCH 08/34] Loop over a set of keys for each URL type --- tubesync/sync/utils.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 108cd757..f66348b4 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -213,13 +213,13 @@ def filter_response(response_dict): ) ) - _drop_url_keys(response_dict, 'formats', drop_format_url) - _drop_url_keys(response_dict, 'requested_formats', drop_format_url) + for key in frozenset(('formats', 'requested_formats',)): + _drop_url_keys(response_dict, key, drop_format_url) # end of formats cleanup }}} - # beginning of automatic_captions cleanup {{{ + # beginning of subtitles cleanup {{{ # drop urls that expire - def drop_auto_caption_url(**kwargs): + def drop_subtitles_url(**kwargs): url = kwargs['url'] return ( url @@ -227,12 +227,13 @@ def filter_response(response_dict): and '&expire=' in url ) - ac_key = 'automatic_captions' - if ac_key in response_dict.keys(): - ac_dict = response_dict[ac_key] - for lang_code in ac_dict: - _drop_url_keys(ac_dict, lang_code, drop_auto_caption_url) - # end of automatic_captions cleanup }}} + # beginning of automatic_captions cleanup {{{ + for key in frozenset(('subtitles', 'automatic_captions',)): + if key in response_dict.keys(): + key_dict = response_dict[key] + for lang_code in key_dict: + _drop_url_keys(key_dict, lang_code, drop_subtitles_url) + # end of subtitles cleanup }}} return response_dict From 7b8d11791d9725191146304f612ae7e2f7d3d0ec Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 05:39:50 -0500 Subject: [PATCH 09/34] Drop keys from formats that cannot be useful --- tubesync/sync/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index f66348b4..8e98857e 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -213,8 +213,20 @@ def filter_response(response_dict): ) ) + # these format keys are not useful to us + drop_keys = frozenset(( + 'downloader_options', + 'fragments', + 'http_headers', + '__needs_testing', + '__working', + )) for key in frozenset(('formats', 'requested_formats',)): _drop_url_keys(response_dict, key, drop_format_url) + if key in response_dict.keys(): + for format in response_dict[key]: + for drop_key in drop_keys: + del format[drop_key] # end of formats cleanup }}} # beginning of subtitles cleanup {{{ @@ -227,7 +239,6 @@ def filter_response(response_dict): and '&expire=' in url ) - # beginning of automatic_captions cleanup {{{ for key in frozenset(('subtitles', 'automatic_captions',)): if key in response_dict.keys(): key_dict = response_dict[key] From c7457e94ac1f27c04f912a086b9cc766f4ab5882 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 05:58:50 -0500 Subject: [PATCH 10/34] Check that the drop_key exists --- tubesync/sync/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 8e98857e..f73e243b 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -226,7 +226,8 @@ def filter_response(response_dict): if key in response_dict.keys(): for format in response_dict[key]: for drop_key in drop_keys: - del format[drop_key] + if drop_key in format.keys(): + del format[drop_key] # end of formats cleanup }}} # beginning of subtitles cleanup {{{ From 2d85bcbe14c0701782d5c76b0cb36116be193d08 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 06:20:01 -0500 Subject: [PATCH 11/34] Use a distinct try to log errors --- tubesync/sync/models.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 077a8283..6bcac984 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1159,11 +1159,17 @@ class Media(models.Model): @property def loaded_metadata(self): + from common.logger import log + try: + self.reduce_data(json.loads(self.metadata)) + except Exception as e: + log.error(f'reduce_data: {e.msg}') + pass + try: data = json.loads(self.metadata) if not isinstance(data, dict): return {} - self.reduce_data(json.loads(self.metadata)) return data except Exception as e: return {} From 8ac5b36eee9a504d0f0b5a9092c5120fa7f8ecbf Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 06:38:56 -0500 Subject: [PATCH 12/34] Use the exception function for traceback --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 6bcac984..54fcdaa6 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1163,7 +1163,7 @@ class Media(models.Model): try: self.reduce_data(json.loads(self.metadata)) except Exception as e: - log.error(f'reduce_data: {e.msg}') + log.exception('reduce_data: %s', e) pass try: From 779370122847bb24484181834a299f7e3f41ed1f Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 13:01:06 -0500 Subject: [PATCH 13/34] Simplify results from _url_keys Also, name the tuple values when using the results. --- tubesync/sync/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index f73e243b..170b2a51 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -176,7 +176,7 @@ def _url_keys(arg_dict, filter_func): for key in arg_dict.keys(): if 'url' in key: result.update( - {key: (key, filter_func(key=key, url=arg_dict[key]),)} + {key: (filter_func(key=key, url=arg_dict[key]),)} ) return result @@ -184,9 +184,9 @@ def _url_keys(arg_dict, filter_func): def _drop_url_keys(arg_dict, key, filter_func): if key in arg_dict.keys(): for val_dict in arg_dict[key]: - for url_key in _url_keys(val_dict, filter_func): - if url_key[1] is True: - del val_dict[url_key[0]] + for url_key, remove in _url_keys(val_dict, filter_func).items(): + if remove is True: + del val_dict[url_key] def filter_response(response_dict): From 1c432ccce127439bc722e4d0727d545794d51e4e Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 7 Jan 2025 13:49:58 -0500 Subject: [PATCH 14/34] Some formats are using a different URL --- tubesync/sync/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 170b2a51..14e7505f 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -176,7 +176,7 @@ def _url_keys(arg_dict, filter_func): for key in arg_dict.keys(): if 'url' in key: result.update( - {key: (filter_func(key=key, url=arg_dict[key]),)} + {key: filter_func(key=key, url=arg_dict[key])} ) return result @@ -209,7 +209,9 @@ def filter_response(response_dict): and '://' in url and ( '/ip/' in url + or 'ip=' in url or '/expire/' in url + or 'expire=' in url ) ) From d35f52f8acb07c30f81c855a855b63d284dbaedf Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 8 Jan 2025 11:31:23 -0500 Subject: [PATCH 15/34] Drop /expire/ URLs from automatic_captions too --- tubesync/sync/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 14e7505f..b424528b 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -239,7 +239,10 @@ def filter_response(response_dict): return ( url and '://' in url - and '&expire=' in url + and ( + '/expire/' in url + or '&expire=' in url + ) ) for key in frozenset(('subtitles', 'automatic_captions',)): From ad10bcfa61af480fd9be9b3f7a97baeba18e033d Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 8 Jan 2025 22:48:23 -0500 Subject: [PATCH 16/34] Log both compacted and reduced sizes --- tubesync/sync/models.py | 43 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 54fcdaa6..76dea0b1 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1144,28 +1144,35 @@ class Media(models.Model): return self.metadata is not None - def reduce_data(self, data): - from common.logger import log - from common.utils import json_serial - # log the results of filtering / compacting on metadata size - filtered_data = filter_response(data) - compact_metadata = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) - old_mdl = len(self.metadata) - new_mdl = len(compact_metadata) - if old_mdl > new_mdl: - delta = old_mdl - new_mdl - log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') + @property + def reduce_data(self): + try: + from common.logger import log + from common.utils import json_serial + + old_mdl = len(self.metadata or "") + data = json.loads(self.metadata or "") + compact_data = json.dumps(data, separators=(',', ':'), default=json_serial) + + filtered_data = filter_response(data) + filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) + except Exception as e: + log.exception('reduce_data: %s', e) + else: + # log the results of filtering / compacting on metadata size + new_mdl = len(compact_data) + if old_mdl > new_mdl: + delta = old_mdl - new_mdl + log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') + new_mdl = len(filtered_json) + if old_mdl > new_mdl: + delta = old_mdl - new_mdl + log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') @property def loaded_metadata(self): - from common.logger import log - try: - self.reduce_data(json.loads(self.metadata)) - except Exception as e: - log.exception('reduce_data: %s', e) - pass - + self.reduce_data try: data = json.loads(self.metadata) if not isinstance(data, dict): From 100382f66fea8b8dd27532932f23f4160d354401 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 09:28:58 -0500 Subject: [PATCH 17/34] Rename compact_data to compact_json This was misleading because the data dict becomes a JSON string. --- tubesync/sync/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 76dea0b1..67453f03 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1152,7 +1152,7 @@ class Media(models.Model): old_mdl = len(self.metadata or "") data = json.loads(self.metadata or "") - compact_data = json.dumps(data, separators=(',', ':'), default=json_serial) + compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) filtered_data = filter_response(data) filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) @@ -1160,7 +1160,7 @@ class Media(models.Model): log.exception('reduce_data: %s', e) else: # log the results of filtering / compacting on metadata size - new_mdl = len(compact_data) + new_mdl = len(compact_json) if old_mdl > new_mdl: delta = old_mdl - new_mdl log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') From 682a53da34d18d777e58e6080df4390f44519686 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 10:17:37 -0500 Subject: [PATCH 18/34] Add a filter_response test First, only check that changes did happen. --- tubesync/sync/tests.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 8f0de6ef..935ad569 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -18,6 +18,7 @@ from background_task.models import Task from .models import Source, Media from .tasks import cleanup_old_media from .filtering import filter_media +from .utils import filter_response class FrontEndTestCase(TestCase): @@ -1709,6 +1710,43 @@ class FormatMatchingTestCase(TestCase): f'expected {expected_match_result}') +class ResponseFilteringTestCase(TestCase): + + def setUp(self): + # Disable general logging for test case + logging.disable(logging.CRITICAL) + # Add a test source + self.source = Source.objects.create( + source_type=Source.SOURCE_TYPE_YOUTUBE_CHANNEL, + key='testkey', + name='testname', + directory='testdirectory', + index_schedule=3600, + delete_old_media=False, + days_to_keep=14, + source_resolution=Source.SOURCE_RESOLUTION_1080P, + source_vcodec=Source.SOURCE_VCODEC_VP9, + source_acodec=Source.SOURCE_ACODEC_OPUS, + prefer_60fps=False, + prefer_hdr=False, + fallback=Source.FALLBACK_FAIL + ) + # Add some media + self.media = Media.objects.create( + key='mediakey', + source=self.source, + metadata='{}' + ) + + def test_metadata_20230629(self): + self.media.metadata = all_test_metadata['20230629'] + self.media.save() + + unfiltered = self.media.loaded_metadata + filtered = filter_response(self.media.loaded_metadata) + self.assertNotEqual(len(str(unfiltered)), len(str(filtered))) + + class TasksTestCase(TestCase): def setUp(self): From 4c9fa40bb0e47871caffaf9a3212932727ffc1cb Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 11:47:10 -0500 Subject: [PATCH 19/34] More filter_response asserts --- tubesync/sync/tests.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 935ad569..bc199282 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1744,7 +1744,41 @@ class ResponseFilteringTestCase(TestCase): unfiltered = self.media.loaded_metadata filtered = filter_response(self.media.loaded_metadata) - self.assertNotEqual(len(str(unfiltered)), len(str(filtered))) + self.assertIn('formats', unfiltered.keys()) + self.assertIn('formats', filtered.keys()) + # filtered 'http_headers' + self.assertIn('http_headers', unfiltered['formats'][0].keys()) + self.assertNotIn('http_headers', filtered['formats'][0].keys()) + # did not lose any formats + self.assertEqual(48, len(unfiltered['formats'])) + self.assertEqual(48, len(filtered['formats'])) + self.assertEqual(len(unfiltered['formats']), len(filtered['formats'])) + # did reduce the size of the metadata + self.assertTrue(len(str(filtered)) < len(str(unfiltered))) + + url_keys = [] + for format in unfiltered['formats']: + for key in format.keys(): + if 'url' in key: + url_keys.append((format['format_id'], key, format[key],)) + unfiltered_url_keys = url_keys + self.assertEqual(63, len(unfiltered_url_keys), msg=str(unfiltered_url_keys)) + + url_keys = [] + for format in filtered['formats']: + for key in format.keys(): + if 'url' in key: + url_keys.append((format['format_id'], key, format[key],)) + filtered_url_keys = url_keys + self.assertEqual(3, len(filtered_url_keys), msg=str(filtered_url_keys)) + + url_keys = [] + for lang_code, captions in filtered['automatic_captions'].items(): + for caption in captions: + for key in caption.keys(): + if 'url' in key: + url_keys.append((lang_code, caption['ext'], caption[key],)) + self.assertEqual(0, len(url_keys), msg=str(url_keys)) class TasksTestCase(TestCase): From 3e3f80d287c637c34f5c5094aa313531dfbe7b77 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 12:04:01 -0500 Subject: [PATCH 20/34] More filter_response asserts --- tubesync/sync/tests.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index bc199282..2704058f 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1746,6 +1746,9 @@ class ResponseFilteringTestCase(TestCase): filtered = filter_response(self.media.loaded_metadata) self.assertIn('formats', unfiltered.keys()) self.assertIn('formats', filtered.keys()) + # filtered 'downloader_options' + self.assertIn('downloader_options', unfiltered['formats'][10].keys()) + self.assertNotIn('downloader_options', filtered['formats'][10].keys()) # filtered 'http_headers' self.assertIn('http_headers', unfiltered['formats'][0].keys()) self.assertNotIn('http_headers', filtered['formats'][0].keys()) @@ -1753,6 +1756,10 @@ class ResponseFilteringTestCase(TestCase): self.assertEqual(48, len(unfiltered['formats'])) self.assertEqual(48, len(filtered['formats'])) self.assertEqual(len(unfiltered['formats']), len(filtered['formats'])) + # did not remove everything with url + self.assertIn('original_url', unfiltered.keys()) + self.assertIn('original_url', filtered.keys()) + self.assertEqual(unfiltered['original_url'], filtered['original_url']) # did reduce the size of the metadata self.assertTrue(len(str(filtered)) < len(str(unfiltered))) From 29c39aab1f7096a7267c351cc3ebf0d786c98723 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 13:20:22 -0500 Subject: [PATCH 21/34] Add SHRINK_NEW_MEDIA_METADATA setting --- tubesync/sync/tasks.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 30f8c827..644918b7 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -8,6 +8,7 @@ import os import json import math import uuid +from copy import deepcopy from io import BytesIO from hashlib import sha1 from datetime import timedelta, datetime @@ -26,7 +27,7 @@ from common.errors import NoMediaException, DownloadFailedException from common.utils import json_serial from .models import Source, Media, MediaServer from .utils import (get_remote_image, resize_image_to_height, delete_file, - write_text_file) + write_text_file, filter_response) from .filtering import filter_media @@ -304,7 +305,11 @@ def download_media_metadata(media_id): return source = media.source metadata = media.index_metadata() - media.metadata = json.dumps(metadata, separators=(',', ':'), default=json_serial) + if getattr(settings, 'SHRINK_NEW_MEDIA_METADATA', False): + response = filter_response(deepcopy(metadata)) + else: + response = metadata + media.metadata = json.dumps(response, separators=(',', ':'), default=json_serial) upload_date = media.upload_date # Media must have a valid upload date if upload_date: From 0f986949e5ad18195de2265eae83f5360f6c5277 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 13:36:43 -0500 Subject: [PATCH 22/34] Have filter_response return a copy, if requested --- tubesync/sync/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index b424528b..1d67af38 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -1,6 +1,7 @@ import os import re import math +from copy import deepcopy from operator import itemgetter from pathlib import Path from tempfile import NamedTemporaryFile @@ -189,13 +190,18 @@ def _drop_url_keys(arg_dict, key, filter_func): del val_dict[url_key] -def filter_response(response_dict): +def filter_response(arg_dict, copy_arg=False): ''' Clean up the response so as to not store useless metadata in the database. ''' + response_dict = arg_dict # raise an exception for an unexpected argument type if not isinstance(response_dict, dict): raise TypeError(f'response_dict must be a dict, got "{type(response_dict)}"') + + if copy_arg: + response_dict = deepcopy(arg_dict) + # optimize the empty case if not response_dict: return response_dict From 274f19fa15547c1a9d76c967e4134ffafa822aa1 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 13:41:23 -0500 Subject: [PATCH 23/34] Use the new copy argument to filter_response --- tubesync/sync/tasks.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 644918b7..ab92e2c8 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -8,7 +8,6 @@ import os import json import math import uuid -from copy import deepcopy from io import BytesIO from hashlib import sha1 from datetime import timedelta, datetime @@ -305,10 +304,9 @@ def download_media_metadata(media_id): return source = media.source metadata = media.index_metadata() + response = metadata if getattr(settings, 'SHRINK_NEW_MEDIA_METADATA', False): - response = filter_response(deepcopy(metadata)) - else: - response = metadata + response = filter_response(metadata, True) media.metadata = json.dumps(response, separators=(',', ':'), default=json_serial) upload_date = media.upload_date # Media must have a valid upload date From 1ff8dfda9897dd8c409feba2649b5ce15f5f7e32 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 13:53:12 -0500 Subject: [PATCH 24/34] Use the new copy argument to filter_response --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 67453f03..10fbbdbd 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1154,7 +1154,7 @@ class Media(models.Model): data = json.loads(self.metadata or "") compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) - filtered_data = filter_response(data) + filtered_data = filter_response(data, True) filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) except Exception as e: log.exception('reduce_data: %s', e) From 6292a9a59dc5d05db79241b9bd2d58f51be3cc6a Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 9 Jan 2025 14:22:37 -0500 Subject: [PATCH 25/34] Add SHRINK_OLD_MEDIA_METADATA setting --- tubesync/sync/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 10fbbdbd..bb850af3 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1168,6 +1168,8 @@ class Media(models.Model): if old_mdl > new_mdl: delta = old_mdl - new_mdl log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') + if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): + self.metadata = filtered_json @property From 81edd08c7d8ce8d0844b82751b730d2dc91ff4ac Mon Sep 17 00:00:00 2001 From: Makhuta Date: Sat, 11 Jan 2025 14:38:31 +0100 Subject: [PATCH 26/34] Update - added video order to Media Format --- tubesync/sync/models.py | 9 +++++++-- tubesync/sync/templates/sync/_mediaformatvars.html | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2037492d..8e37bdbe 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -589,6 +589,7 @@ class Source(models.Model): 'key': 'SoMeUnIqUiD', 'format': '-'.join(fmt), 'playlist_title': 'Some Playlist Title', + 'video_order': '1', 'ext': self.extension, 'resolution': self.source_resolution if self.source_resolution else '', 'height': '720' if self.source_resolution else '', @@ -1128,6 +1129,7 @@ class Media(models.Model): 'key': self.key, 'format': '-'.join(display_format['format']), 'playlist_title': self.playlist_title, + 'video_order': self.get_episode_str(), 'ext': self.source.extension, 'resolution': display_format['resolution'], 'height': display_format['height'], @@ -1373,8 +1375,7 @@ class Media(models.Model): nfo.append(season) # episode = number of video in the year episode = nfo.makeelement('episode', {}) - episode_number = self.calculate_episode_number() - episode.text = str(episode_number) if episode_number else '' + episode.text = self.get_episode_str() episode.tail = '\n ' nfo.append(episode) # ratings = media metadata youtube rating @@ -1524,6 +1525,10 @@ class Media(models.Model): return position_counter position_counter += 1 + def get_episode_str(self): + episode_number = self.calculate_episode_number() + return f'{episode_number:02}' if episode_number else '' + class MediaServer(models.Model): ''' diff --git a/tubesync/sync/templates/sync/_mediaformatvars.html b/tubesync/sync/templates/sync/_mediaformatvars.html index 438b200a..06068f90 100644 --- a/tubesync/sync/templates/sync/_mediaformatvars.html +++ b/tubesync/sync/templates/sync/_mediaformatvars.html @@ -73,6 +73,11 @@ Playlist title of media, if it's in a playlist Some Playlist + + {video_order} + Episode order in playlist, if in playlist (can cause issues if playlist is changed after adding) + 01 + {ext} File extension From 8dda325dbd841535708ca8f8d58602d26080b019 Mon Sep 17 00:00:00 2001 From: Makhuta Date: Sat, 11 Jan 2025 15:53:36 +0100 Subject: [PATCH 27/34] Update models.py --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 8e37bdbe..a5b7adbd 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -589,7 +589,7 @@ class Source(models.Model): 'key': 'SoMeUnIqUiD', 'format': '-'.join(fmt), 'playlist_title': 'Some Playlist Title', - 'video_order': '1', + 'video_order': '01', 'ext': self.extension, 'resolution': self.source_resolution if self.source_resolution else '', 'height': '720' if self.source_resolution else '', From 4364ebbff3cd2f8147206ce05c63745cda88406c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 11 Jan 2025 11:53:10 -0500 Subject: [PATCH 28/34] Multi-stage docker build for ffmpeg & s6-overlay * Create a s6-overlay-extracted stage to copy from This was largely inspired by: @socheatsok78 Our downloaded files are checked where that version doesn't do any verification of the downloads. * Update ffmpeg to the first build with checksums.sha256 * Create a ffmpeg-extracted stage to copy from * Don't preserve ownership from the builder I was sick of the extra work with ffmpeg builds. So, I managed to get sums generated for those builds and now we don't need to manually fill out SHA256 hashes anymore. Now to bump ffmpeg, we can just change the date. --- Dockerfile | 286 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 202 insertions(+), 84 deletions(-) diff --git a/Dockerfile b/Dockerfile index 880dd677..a69609c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,202 @@ -FROM debian:bookworm-slim - -ARG TARGETARCH -ARG TARGETPLATFORM +ARG FFMPEG_DATE="2025-01-10-19-43" +ARG FFMPEG_VERSION="N-118280-g5cd49e1bfd" ARG S6_VERSION="3.2.0.2" + ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a904f478" ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f66f785" ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae" -ARG FFMPEG_DATE="autobuild-2024-12-24-14-15" -ARG FFMPEG_VERSION="N-118163-g954d55c2a4" -ARG SHA256_FFMPEG_AMD64="798a7e5a0724139e6bb70df8921522b23be27028f9f551dfa83c305ec4ffaf3a" -ARG SHA256_FFMPEG_ARM64="c3e6cc0fec42cc7e3804014fbb02c1384a1a31ef13f6f9a36121f2e1216240c0" +ARG ALPINE_VERSION="latest" +ARG FFMPEG_PREFIX_FILE="ffmpeg-${FFMPEG_VERSION%%-*}" +ARG FFMPEG_SUFFIX_FILE=".tar.xz" + +FROM alpine:${ALPINE_VERSION} AS ffmpeg-download +ARG FFMPEG_DATE +ARG FFMPEG_VERSION +ARG FFMPEG_PREFIX_FILE +ARG FFMPEG_SUFFIX_FILE +ARG SHA256_FFMPEG_AMD64 +ARG SHA256_FFMPEG_ARM64 +ARG CHECKSUM_ALGORITHM="sha256" +ARG FFMPEG_CHECKSUM_AMD64="${SHA256_FFMPEG_AMD64}" +ARG FFMPEG_CHECKSUM_ARM64="${SHA256_FFMPEG_ARM64}" + +ARG FFMPEG_FILE_SUMS="checksums.${CHECKSUM_ALGORITHM}" +ARG FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/autobuild-${FFMPEG_DATE}" + +ARG DESTDIR="/downloaded" +ARG TARGETARCH +ADD "${FFMPEG_URL}/${FFMPEG_FILE_SUMS}" "${DESTDIR}/" +RUN set -eu ; \ + apk --no-cache --no-progress add cmd:aria2c cmd:awk ; \ +\ + aria2c_options() { \ + algorithm="${CHECKSUM_ALGORITHM%[0-9]??}" ; \ + bytes="${CHECKSUM_ALGORITHM#${algorithm}}" ; \ + hash="$( awk -v fn="${1##*/}" '$0 ~ fn"$" { print $1; exit; }' "${DESTDIR}/${FFMPEG_FILE_SUMS}" )" ; \ +\ + printf -- '\t%s\n' \ + 'allow-overwrite=true' \ + 'always-resume=false' \ + 'check-integrity=true' \ + "checksum=${algorithm}-${bytes}=${hash}" \ + 'max-connection-per-server=2' \ +; \ + printf -- '\n' ; \ + } ; \ +\ + decide_arch() { \ + case "${TARGETARCH}" in \ + (amd64) printf -- 'linux64' ;; \ + (arm64) printf -- 'linuxarm64' ;; \ + esac ; \ + } ; \ +\ + FFMPEG_ARCH="$(decide_arch)" ; \ + for url in $(awk ' \ + $2 ~ /^[*]?'"${FFMPEG_PREFIX_FILE}"'/ && /-'"${FFMPEG_ARCH}"'-/ { $1=""; print; } \ + ' "${DESTDIR}/${FFMPEG_FILE_SUMS}") ; \ + do \ + url="${FFMPEG_URL}/${url# }" ; \ + printf -- '%s\n' "${url}" ; \ + aria2c_options "${url}" ; \ + printf -- '\n' ; \ + done > /tmp/downloads ; \ + unset -v url ; \ +\ + aria2c --no-conf=true \ + --dir /downloaded \ + --lowest-speed-limit='16K' \ + --show-console-readout=false \ + --summary-interval=0 \ + --input-file /tmp/downloads ; \ +\ + apk --no-cache --no-progress add cmd:awk "cmd:${CHECKSUM_ALGORITHM}sum" ; \ +\ + decide_expected() { \ + case "${TARGETARCH}" in \ + (amd64) printf -- '%s' "${FFMPEG_CHECKSUM_AMD64}" ;; \ + (arm64) printf -- '%s' "${FFMPEG_CHECKSUM_ARM64}" ;; \ + esac ; \ + } ; \ +\ + FFMPEG_HASH="$(decide_expected)" ; \ +\ + cd "${DESTDIR}" ; \ + if [ -n "${FFMPEG_HASH}" ] ; \ + then \ + printf -- '%s *%s\n' "${FFMPEG_HASH}" "${FFMPEG_PREFIX_FILE}"*-"${FFMPEG_ARCH}"-*"${FFMPEG_SUFFIX_FILE}" >> /tmp/SUMS ; \ + "${CHECKSUM_ALGORITHM}sum" --check --strict /tmp/SUMS || exit ; \ + fi ; \ + "${CHECKSUM_ALGORITHM}sum" --check --strict --ignore-missing "${DESTDIR}/${FFMPEG_FILE_SUMS}" ; \ +\ + mkdir -v -p "/verified/${TARGETARCH}" ; \ + ln -v "${FFMPEG_PREFIX_FILE}"*-"${FFMPEG_ARCH}"-*"${FFMPEG_SUFFIX_FILE}" "/verified/${TARGETARCH}/" ; \ + rm -rf "${DESTDIR}" ; + +FROM alpine:${ALPINE_VERSION} AS ffmpeg-extracted +COPY --link --from=ffmpeg-download /verified /verified + +ARG FFMPEG_PREFIX_FILE +ARG FFMPEG_SUFFIX_FILE +ARG TARGETARCH +RUN set -eu ; \ + apk --no-cache --no-progress add cmd:tar cmd:xz ; \ +\ + mkdir -v /extracted ; \ + cd /extracted ; \ + set -x ; \ + tar -xp \ + --strip-components=2 \ + --no-anchored \ + --no-same-owner \ + -f "/verified/${TARGETARCH}"/"${FFMPEG_PREFIX_FILE}"*"${FFMPEG_SUFFIX_FILE}" \ + 'ffmpeg' 'ffprobe' ; \ +\ + ls -AlR /extracted ; + +FROM scratch AS s6-overlay-download +ARG S6_VERSION +ARG SHA256_S6_AMD64 +ARG SHA256_S6_ARM64 +ARG SHA256_S6_NOARCH + +ARG DESTDIR="/downloaded" +ARG CHECKSUM_ALGORITHM="sha256" + +ARG S6_CHECKSUM_AMD64="${CHECKSUM_ALGORITHM}:${SHA256_S6_AMD64}" +ARG S6_CHECKSUM_ARM64="${CHECKSUM_ALGORITHM}:${SHA256_S6_ARM64}" +ARG S6_CHECKSUM_NOARCH="${CHECKSUM_ALGORITHM}:${SHA256_S6_NOARCH}" + +ARG S6_OVERLAY_URL="https://github.com/just-containers/s6-overlay/releases/download/v${S6_VERSION}" +ARG S6_PREFIX_FILE="s6-overlay-" +ARG S6_SUFFIX_FILE=".tar.xz" + +ARG S6_FILE_AMD64="${S6_PREFIX_FILE}x86_64${S6_SUFFIX_FILE}" +ARG S6_FILE_ARM64="${S6_PREFIX_FILE}aarch64${S6_SUFFIX_FILE}" +ARG S6_FILE_NOARCH="${S6_PREFIX_FILE}noarch${S6_SUFFIX_FILE}" + +ADD "${S6_OVERLAY_URL}/${S6_FILE_AMD64}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/" +ADD "${S6_OVERLAY_URL}/${S6_FILE_ARM64}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/" +ADD "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/" + +ADD --checksum="${S6_CHECKSUM_AMD64}" "${S6_OVERLAY_URL}/${S6_FILE_AMD64}" "${DESTDIR}/" +ADD --checksum="${S6_CHECKSUM_ARM64}" "${S6_OVERLAY_URL}/${S6_FILE_ARM64}" "${DESTDIR}/" +ADD --checksum="${S6_CHECKSUM_NOARCH}" "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}" "${DESTDIR}/" + +FROM alpine:${ALPINE_VERSION} AS s6-overlay-extracted +COPY --link --from=s6-overlay-download /downloaded /downloaded + +ARG TARGETARCH + +RUN set -eu ; \ +\ + decide_arch() { \ + local arg1 ; \ + arg1="${1:-$(uname -m)}" ; \ +\ + case "${arg1}" in \ + (amd64) printf -- 'x86_64' ;; \ + (arm64) printf -- 'aarch64' ;; \ + (armv7l) printf -- 'arm' ;; \ + (*) printf -- '%s' "${arg1}" ;; \ + esac ; \ + unset -v arg1 ; \ + } ; \ +\ + mkdir -v /verified ; \ + cd /downloaded ; \ + for f in *.sha256 ; \ + do \ + sha256sum -c < "${f}" || exit ; \ + ln -v "${f%.sha256}" /verified/ || exit ; \ + done ; \ + unset -v f ; \ +\ + S6_ARCH="$(decide_arch "${TARGETARCH}")" ; \ + set -x ; \ + mkdir -v /s6-overlay-rootfs ; \ + cd /s6-overlay-rootfs ; \ + for f in /verified/*.tar* ; \ + do \ + case "${f}" in \ + (*-noarch.tar*|*-"${S6_ARCH}".tar*) \ + tar -xpf "${f}" || exit ;; \ + esac ; \ + done ; \ + set +x ; \ + unset -v f ; + +FROM debian:bookworm-slim AS tubesync + +ARG TARGETARCH +ARG TARGETPLATFORM + +ARG S6_VERSION + +ARG FFMPEG_DATE +ARG FFMPEG_VERSION ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ @@ -26,89 +211,20 @@ ENV DEBIAN_FRONTEND="noninteractive" \ S6_CMD_WAIT_FOR_SERVICES_MAXTIME="0" # Install third party software +COPY --link --from=s6-overlay-extracted /s6-overlay-rootfs / +COPY --link --from=ffmpeg-extracted /extracted /usr/local/bin/ + # Reminder: the SHELL handles all variables -RUN decide_arch() { \ - case "${TARGETARCH:=amd64}" in \ - (arm64) printf -- 'aarch64' ;; \ - (*) printf -- '%s' "${TARGETARCH}" ;; \ - esac ; \ - } && \ - decide_expected() { \ - case "${1}" in \ - (ffmpeg) case "${2}" in \ - (amd64) printf -- '%s' "${SHA256_FFMPEG_AMD64}" ;; \ - (arm64) printf -- '%s' "${SHA256_FFMPEG_ARM64}" ;; \ - esac ;; \ - (s6) case "${2}" in \ - (amd64) printf -- '%s' "${SHA256_S6_AMD64}" ;; \ - (arm64) printf -- '%s' "${SHA256_S6_ARM64}" ;; \ - (noarch) printf -- '%s' "${SHA256_S6_NOARCH}" ;; \ - esac ;; \ - esac ; \ - } && \ - decide_url() { \ - case "${1}" in \ - (ffmpeg) printf -- \ - 'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-%s-linux%s-gpl%s.tar.xz' \ - "${FFMPEG_DATE}" \ - "${FFMPEG_VERSION}" \ - "$(case "${2}" in \ - (amd64) printf -- '64' ;; \ - (*) printf -- '%s' "${2}" ;; \ - esac)" \ - "$(case "${FFMPEG_VERSION%%-*}" in \ - (n*) printf -- '-%s\n' "${FFMPEG_VERSION#n}" | cut -d '-' -f 1,2 ;; \ - (*) printf -- '' ;; \ - esac)" ;; \ - (s6) printf -- \ - 'https://github.com/just-containers/s6-overlay/releases/download/v%s/s6-overlay-%s.tar.xz' \ - "${S6_VERSION}" \ - "$(case "${2}" in \ - (amd64) printf -- 'x86_64' ;; \ - (arm64) printf -- 'aarch64' ;; \ - (*) printf -- '%s' "${2}" ;; \ - esac)" ;; \ - esac ; \ - } && \ - verify_download() { \ - while [ $# -ge 2 ] ; do \ - sha256sum "${2}" ; \ - printf -- '%s %s\n' "${1}" "${2}" | sha256sum -c || return ; \ - shift ; shift ; \ - done ; \ - } && \ - download_expected_file() { \ - local arg1 expected file url ; \ - arg1="$(printf -- '%s\n' "${1}" | awk '{print toupper($0);}')" ; \ - expected="$(decide_expected "${1}" "${2}")" ; \ - file="${3}" ; \ - url="$(decide_url "${1}" "${2}")" ; \ - printf -- '%s\n' \ - "Building for arch: ${2}|${ARCH}, downloading ${arg1} from: ${url}, expecting ${arg1} SHA256: ${expected}" && \ - rm -rf "${file}" && \ - curl --disable --output "${file}" --clobber --location --no-progress-meter --url "${url}" && \ - verify_download "${expected}" "${file}" ; \ - } && \ - export ARCH="$(decide_arch)" && \ - set -x && \ +RUN set -x && \ apt-get update && \ apt-get -y --no-install-recommends install locales && \ printf -- "en_US.UTF-8 UTF-8\n" > /etc/locale.gen && \ locale-gen en_US.UTF-8 && \ # Install required distro packages apt-get -y --no-install-recommends install curl ca-certificates file binutils xz-utils && \ - # Install s6 - _file="/tmp/s6-overlay-noarch.tar.xz" && \ - download_expected_file s6 noarch "${_file}" && \ - tar -C / -xpf "${_file}" && rm -f "${_file}" && \ - _file="/tmp/s6-overlay-${ARCH}.tar.xz" && \ - download_expected_file s6 "${TARGETARCH}" "${_file}" && \ - tar -C / -xpf "${_file}" && rm -f "${_file}" && \ + # Installed s6 (using COPY earlier) file -L /command/s6-overlay-suexec && \ - # Install ffmpeg - _file="/tmp/ffmpeg-${ARCH}.tar.xz" && \ - download_expected_file ffmpeg "${TARGETARCH}" "${_file}" && \ - tar -xvvpf "${_file}" --strip-components=2 --no-anchored -C /usr/local/bin/ "ffmpeg" "ffprobe" && rm -f "${_file}" && \ + # Installed ffmpeg (using COPY earlier) /usr/local/bin/ffmpeg -version && \ file /usr/local/bin/ff* && \ # Clean up @@ -154,7 +270,9 @@ ENV PIP_NO_COMPILE=1 \ WORKDIR /app # Set up the app -RUN set -x && \ +#BuildKit#RUN --mount=type=bind,source=Pipfile,target=/app/Pipfile \ +RUN \ + set -x && \ apt-get update && \ # Install required build packages apt-get -y --no-install-recommends install \ From f464acaa6331913abd5bb341344568f6d9eb73fc Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 11 Jan 2025 15:38:45 -0500 Subject: [PATCH 29/34] Simplify directory_path for Media --- tubesync/sync/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2037492d..ad17258c 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1263,8 +1263,7 @@ class Media(models.Model): @property def directory_path(self): - dirname = self.source.directory_path / self.filename - return dirname.parent + return self.filepath.parent @property def filepath(self): From 3ea7e6c8ee0ab7631507938734c255ec9116c2bb Mon Sep 17 00:00:00 2001 From: Makhuta Date: Sat, 11 Jan 2025 22:07:36 +0100 Subject: [PATCH 30/34] Change - changed the episode_str to be togglable and use the old format by default --- tubesync/sync/models.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index a5b7adbd..d22cdb57 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1129,7 +1129,7 @@ class Media(models.Model): 'key': self.key, 'format': '-'.join(display_format['format']), 'playlist_title': self.playlist_title, - 'video_order': self.get_episode_str(), + 'video_order': self.get_episode_str(True), 'ext': self.source.extension, 'resolution': display_format['resolution'], 'height': display_format['height'], @@ -1525,9 +1525,12 @@ class Media(models.Model): return position_counter position_counter += 1 - def get_episode_str(self): + def get_episode_str(self, use_padding=False): episode_number = self.calculate_episode_number() - return f'{episode_number:02}' if episode_number else '' + if use_padding: + return f'{episode_number:02}' if episode_number else '' + + return str(episode_number) if episode_number else '' class MediaServer(models.Model): From df4b824672bcc00442064ced9de6b86e05a505ea Mon Sep 17 00:00:00 2001 From: Makhuta Date: Sat, 11 Jan 2025 22:17:10 +0100 Subject: [PATCH 31/34] Change - simplified the returns --- tubesync/sync/models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index d22cdb57..66bb0481 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1527,10 +1527,13 @@ class Media(models.Model): def get_episode_str(self, use_padding=False): episode_number = self.calculate_episode_number() + if not episode_number: + return '' + if use_padding: - return f'{episode_number:02}' if episode_number else '' + return f'{episode_number:02}' - return str(episode_number) if episode_number else '' + return str(episode_number) class MediaServer(models.Model): From 5e5d011b640be82d7c5d7d749f1801be787c46bf Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 12 Jan 2025 00:49:19 -0500 Subject: [PATCH 32/34] Add parser directives This hopefully helps anyone building on an older docker, such as Debian / Ubuntu packaged versions. --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index a69609c5..f7a26bb3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,6 @@ +# syntax=docker/dockerfile:1 +# check=error=true + ARG FFMPEG_DATE="2025-01-10-19-43" ARG FFMPEG_VERSION="N-118280-g5cd49e1bfd" From 2860147212fb21087d699761d959727aae1c707a Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 12 Jan 2025 04:46:32 -0500 Subject: [PATCH 33/34] Build on older docker also * Do without --link for COPY or ADD * Do without --checksum for ADD * Trim the FFMPEG_VERSION variable with cut instead I've built successfully on old Debian systems using these changes. Everything else I use has a newer docker on it. --- Dockerfile | 74 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index f7a26bb3..d0107385 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,9 +11,12 @@ ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f6 ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae" ARG ALPINE_VERSION="latest" -ARG FFMPEG_PREFIX_FILE="ffmpeg-${FFMPEG_VERSION%%-*}" +ARG FFMPEG_PREFIX_FILE="ffmpeg-${FFMPEG_VERSION}" ARG FFMPEG_SUFFIX_FILE=".tar.xz" +ARG FFMPEG_CHECKSUM_ALGORITHM="sha256" +ARG S6_CHECKSUM_ALGORITHM="sha256" + FROM alpine:${ALPINE_VERSION} AS ffmpeg-download ARG FFMPEG_DATE ARG FFMPEG_VERSION @@ -21,7 +24,8 @@ ARG FFMPEG_PREFIX_FILE ARG FFMPEG_SUFFIX_FILE ARG SHA256_FFMPEG_AMD64 ARG SHA256_FFMPEG_ARM64 -ARG CHECKSUM_ALGORITHM="sha256" +ARG FFMPEG_CHECKSUM_ALGORITHM +ARG CHECKSUM_ALGORITHM="${FFMPEG_CHECKSUM_ALGORITHM}" ARG FFMPEG_CHECKSUM_AMD64="${SHA256_FFMPEG_AMD64}" ARG FFMPEG_CHECKSUM_ARM64="${SHA256_FFMPEG_ARM64}" @@ -57,6 +61,7 @@ RUN set -eu ; \ } ; \ \ FFMPEG_ARCH="$(decide_arch)" ; \ + FFMPEG_PREFIX_FILE="$( printf -- '%s' "${FFMPEG_PREFIX_FILE}" | cut -d '-' -f 1,2 )" ; \ for url in $(awk ' \ $2 ~ /^[*]?'"${FFMPEG_PREFIX_FILE}"'/ && /-'"${FFMPEG_ARCH}"'-/ { $1=""; print; } \ ' "${DESTDIR}/${FFMPEG_FILE_SUMS}") ; \ @@ -75,7 +80,7 @@ RUN set -eu ; \ --summary-interval=0 \ --input-file /tmp/downloads ; \ \ - apk --no-cache --no-progress add cmd:awk "cmd:${CHECKSUM_ALGORITHM}sum" ; \ + apk --no-cache --no-progress add "cmd:${CHECKSUM_ALGORITHM}sum" ; \ \ decide_expected() { \ case "${TARGETARCH}" in \ @@ -90,43 +95,44 @@ RUN set -eu ; \ if [ -n "${FFMPEG_HASH}" ] ; \ then \ printf -- '%s *%s\n' "${FFMPEG_HASH}" "${FFMPEG_PREFIX_FILE}"*-"${FFMPEG_ARCH}"-*"${FFMPEG_SUFFIX_FILE}" >> /tmp/SUMS ; \ - "${CHECKSUM_ALGORITHM}sum" --check --strict /tmp/SUMS || exit ; \ + "${CHECKSUM_ALGORITHM}sum" --check --warn --strict /tmp/SUMS || exit ; \ fi ; \ - "${CHECKSUM_ALGORITHM}sum" --check --strict --ignore-missing "${DESTDIR}/${FFMPEG_FILE_SUMS}" ; \ + "${CHECKSUM_ALGORITHM}sum" --check --warn --strict --ignore-missing "${DESTDIR}/${FFMPEG_FILE_SUMS}" ; \ \ mkdir -v -p "/verified/${TARGETARCH}" ; \ ln -v "${FFMPEG_PREFIX_FILE}"*-"${FFMPEG_ARCH}"-*"${FFMPEG_SUFFIX_FILE}" "/verified/${TARGETARCH}/" ; \ rm -rf "${DESTDIR}" ; FROM alpine:${ALPINE_VERSION} AS ffmpeg-extracted -COPY --link --from=ffmpeg-download /verified /verified +COPY --from=ffmpeg-download /verified /verified ARG FFMPEG_PREFIX_FILE ARG FFMPEG_SUFFIX_FILE ARG TARGETARCH -RUN set -eu ; \ - apk --no-cache --no-progress add cmd:tar cmd:xz ; \ -\ +RUN set -eux ; \ mkdir -v /extracted ; \ cd /extracted ; \ - set -x ; \ - tar -xp \ + ln -s "/verified/${TARGETARCH}"/"${FFMPEG_PREFIX_FILE}"*"${FFMPEG_SUFFIX_FILE}" "/tmp/ffmpeg${FFMPEG_SUFFIX_FILE}" ; \ + tar -tf "/tmp/ffmpeg${FFMPEG_SUFFIX_FILE}" | grep '/bin/\(ffmpeg\|ffprobe\)' > /tmp/files ; \ + tar -xop \ --strip-components=2 \ - --no-anchored \ - --no-same-owner \ - -f "/verified/${TARGETARCH}"/"${FFMPEG_PREFIX_FILE}"*"${FFMPEG_SUFFIX_FILE}" \ - 'ffmpeg' 'ffprobe' ; \ + -f "/tmp/ffmpeg${FFMPEG_SUFFIX_FILE}" \ + -T /tmp/files ; \ \ ls -AlR /extracted ; -FROM scratch AS s6-overlay-download +FROM scratch AS ffmpeg +COPY --from=ffmpeg-extracted /extracted /usr/local/bin/ + +FROM alpine:${ALPINE_VERSION} AS s6-overlay-download ARG S6_VERSION ARG SHA256_S6_AMD64 ARG SHA256_S6_ARM64 ARG SHA256_S6_NOARCH ARG DESTDIR="/downloaded" -ARG CHECKSUM_ALGORITHM="sha256" +ARG S6_CHECKSUM_ALGORITHM +ARG CHECKSUM_ALGORITHM="${S6_CHECKSUM_ALGORITHM}" ARG S6_CHECKSUM_AMD64="${CHECKSUM_ALGORITHM}:${SHA256_S6_AMD64}" ARG S6_CHECKSUM_ARM64="${CHECKSUM_ALGORITHM}:${SHA256_S6_ARM64}" @@ -144,12 +150,28 @@ ADD "${S6_OVERLAY_URL}/${S6_FILE_AMD64}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/" ADD "${S6_OVERLAY_URL}/${S6_FILE_ARM64}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/" ADD "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/" -ADD --checksum="${S6_CHECKSUM_AMD64}" "${S6_OVERLAY_URL}/${S6_FILE_AMD64}" "${DESTDIR}/" -ADD --checksum="${S6_CHECKSUM_ARM64}" "${S6_OVERLAY_URL}/${S6_FILE_ARM64}" "${DESTDIR}/" -ADD --checksum="${S6_CHECKSUM_NOARCH}" "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}" "${DESTDIR}/" +##ADD --checksum="${S6_CHECKSUM_AMD64}" "${S6_OVERLAY_URL}/${S6_FILE_AMD64}" "${DESTDIR}/" +##ADD --checksum="${S6_CHECKSUM_ARM64}" "${S6_OVERLAY_URL}/${S6_FILE_ARM64}" "${DESTDIR}/" +##ADD --checksum="${S6_CHECKSUM_NOARCH}" "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}" "${DESTDIR}/" + +# --checksum wasn't recognized, so use busybox to check the sums instead +ADD "${S6_OVERLAY_URL}/${S6_FILE_AMD64}" "${DESTDIR}/" +RUN set -eu ; checksum="${S6_CHECKSUM_AMD64}" ; file="${S6_FILE_AMD64}" ; cd "${DESTDIR}/" && \ + printf -- '%s *%s\n' "$(printf -- '%s' "${checksum}" | cut -d : -f 2-)" "${file}" | "${CHECKSUM_ALGORITHM}sum" -cw + +ADD "${S6_OVERLAY_URL}/${S6_FILE_ARM64}" "${DESTDIR}/" +RUN set -eu ; checksum="${S6_CHECKSUM_ARM64}" ; file="${S6_FILE_ARM64}" ; cd "${DESTDIR}/" && \ + printf -- '%s *%s\n' "$(printf -- '%s' "${checksum}" | cut -d : -f 2-)" "${file}" | "${CHECKSUM_ALGORITHM}sum" -cw + +ADD "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}" "${DESTDIR}/" +RUN set -eu ; checksum="${S6_CHECKSUM_NOARCH}" ; file="${S6_FILE_NOARCH}" ; cd "${DESTDIR}/" && \ + printf -- '%s *%s\n' "$(printf -- '%s' "${checksum}" | cut -d : -f 2-)" "${file}" | "${CHECKSUM_ALGORITHM}sum" -cw FROM alpine:${ALPINE_VERSION} AS s6-overlay-extracted -COPY --link --from=s6-overlay-download /downloaded /downloaded +COPY --from=s6-overlay-download /downloaded /downloaded + +ARG S6_CHECKSUM_ALGORITHM +ARG CHECKSUM_ALGORITHM="${S6_CHECKSUM_ALGORITHM}" ARG TARGETARCH @@ -168,11 +190,12 @@ RUN set -eu ; \ unset -v arg1 ; \ } ; \ \ + apk --no-cache --no-progress add "cmd:${CHECKSUM_ALGORITHM}sum" ; \ mkdir -v /verified ; \ cd /downloaded ; \ for f in *.sha256 ; \ do \ - sha256sum -c < "${f}" || exit ; \ + "${CHECKSUM_ALGORITHM}sum" --check --warn --strict "${f}" || exit ; \ ln -v "${f%.sha256}" /verified/ || exit ; \ done ; \ unset -v f ; \ @@ -191,6 +214,9 @@ RUN set -eu ; \ set +x ; \ unset -v f ; +FROM scratch AS s6-overlay +COPY --from=s6-overlay-extracted /s6-overlay-rootfs / + FROM debian:bookworm-slim AS tubesync ARG TARGETARCH @@ -214,8 +240,8 @@ ENV DEBIAN_FRONTEND="noninteractive" \ S6_CMD_WAIT_FOR_SERVICES_MAXTIME="0" # Install third party software -COPY --link --from=s6-overlay-extracted /s6-overlay-rootfs / -COPY --link --from=ffmpeg-extracted /extracted /usr/local/bin/ +COPY --from=s6-overlay / / +COPY --from=ffmpeg /usr/local/bin/ /usr/local/bin/ # Reminder: the SHELL handles all variables RUN set -x && \ From 45d7039188c746e9726562808caa7ed8bbc5f6ee Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 14 Jan 2025 05:34:59 -0500 Subject: [PATCH 34/34] Only log the extra messages with the new setting --- tubesync/sync/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index bb850af3..a65abdf8 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1174,7 +1174,8 @@ class Media(models.Model): @property def loaded_metadata(self): - self.reduce_data + if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): + self.reduce_data try: data = json.loads(self.metadata) if not isinstance(data, dict):