From a11e3487d65e4cff14cb0d462e6015f08c6fe322 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 11 Dec 2024 12:33:14 -0500 Subject: [PATCH 001/417] Add database options where newer Django expects them The `3.2.x` versions don't have a lot of the code that `5.1.x` uses for this. --- tubesync/tubesync/local_settings.py.container | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index a0426a4c..c52bf588 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -46,6 +46,13 @@ else: 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': CONFIG_BASE_DIR / 'db.sqlite3', + "OPTIONS": { + "transaction_mode": "IMMEDIATE", + "init_command": """ + PRAGMA auto_vacuum = INCREMENTAL; + PRAGMA incremental_vacuum(100); + """, + }, } } DATABASE_CONNECTION_STR = f'sqlite at "{DATABASES["default"]["NAME"]}"' From 658f690b5b4051530b73f17434b5e6076e14e92c Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 11 Dec 2024 13:33:50 -0500 Subject: [PATCH 002/417] Add tubesync.sqlite3 --- tubesync/tubesync/sqlite3/base.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tubesync/tubesync/sqlite3/base.py diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py new file mode 100644 index 00000000..1ecb4419 --- /dev/null +++ b/tubesync/tubesync/sqlite3/base.py @@ -0,0 +1,28 @@ +from django.db.backends.sqlite3 import base + + +class DatabaseWrapper(base.DatabaseWrapper): + + def _start_transaction_under_autocommit(self): + conn_params = self.get_connection_params() + if "transaction_mode" not in conn_params: + self.cursor().execute("BEGIN TRANSACTION") + else: + tm = str(conn_params["transaction_mode"]).upper().strip() + transaction_modes = frozenset(["DEFERRED", "EXCLUSIVE", "IMMEDIATE"]) + if tm in transaction_modes: + self.cursor().execute(f"BEGIN {tm} TRANSACTION") + else: + self.cursor().execute("BEGIN TRANSACTION") + + + def init_connection_state(self): + conn_params = self.get_connection_params() + if "init_command" in conn_params: + ic = str(conn_params["init_command"]) + cmds = ic.split(';') + with self.cursor() as cursor: + for init_cmd in cmds: + cursor.execute(init_cmd.strip()) + + From 7e872bf8b515297ebd6981556ff768250cf51f17 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 11 Dec 2024 13:35:12 -0500 Subject: [PATCH 003/417] Use tubesync.sqlite3 --- tubesync/tubesync/local_settings.py.container | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index c52bf588..77b531c1 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -44,7 +44,7 @@ if database_dict: else: DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.sqlite3', + 'ENGINE': 'tubesync.sqlite3', 'NAME': CONFIG_BASE_DIR / 'db.sqlite3', "OPTIONS": { "transaction_mode": "IMMEDIATE", From 0ab0605d2297adc2108cd3c00bbcc580a32ef66d Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 11 Dec 2024 13:42:05 -0500 Subject: [PATCH 004/417] Keep legacy_alter_table off Newer Django will set this for us, but it won't matter if it is disabled twice. --- tubesync/tubesync/local_settings.py.container | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index 77b531c1..3659b289 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -49,6 +49,7 @@ else: "OPTIONS": { "transaction_mode": "IMMEDIATE", "init_command": """ + PRAGMA legacy_alter_table = OFF; PRAGMA auto_vacuum = INCREMENTAL; PRAGMA incremental_vacuum(100); """, From 467ec947ff023d31d8a78db88c905bf71fc1f3e2 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 11 Dec 2024 15:48:11 -0500 Subject: [PATCH 005/417] Configure isolation_level from transaction_mode TypeError: 'transaction_mode' is an invalid keyword argument for Connection() --- tubesync/tubesync/sqlite3/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index 1ecb4419..46db387a 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -25,4 +25,9 @@ class DatabaseWrapper(base.DatabaseWrapper): for init_cmd in cmds: cursor.execute(init_cmd.strip()) + + def get_new_connection(self, conn_params): + conn_params["isolation_level"] = conn_params.pop("transaction_mode", "DEFERRED") + super().get_new_connection(conn_params) + From ebf1ed3ef44855a0b4782b58497b9c7f8df034fd Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 11 Dec 2024 16:04:09 -0500 Subject: [PATCH 006/417] Filter out the init_command key TypeError: 'init_command' is an invalid keyword argument for Connection() --- tubesync/tubesync/sqlite3/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index 46db387a..05389730 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -27,7 +27,9 @@ class DatabaseWrapper(base.DatabaseWrapper): def get_new_connection(self, conn_params): - conn_params["isolation_level"] = conn_params.pop("transaction_mode", "DEFERRED") - super().get_new_connection(conn_params) + filtered_params = conn_params.copy() + filtered_params["isolation_level"] = filtered_params.pop("transaction_mode", "DEFERRED") + _ = filtered_params.pop("init_command", None) + super().get_new_connection(filtered_params) From de52e55e34c9fee8da83584e63365011aa79d955 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 11 Dec 2024 16:30:48 -0500 Subject: [PATCH 007/417] Return get_new_connection Oops. It helps to have the resulting connection. --- tubesync/tubesync/sqlite3/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index 05389730..d09d68cd 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -30,6 +30,6 @@ class DatabaseWrapper(base.DatabaseWrapper): filtered_params = conn_params.copy() filtered_params["isolation_level"] = filtered_params.pop("transaction_mode", "DEFERRED") _ = filtered_params.pop("init_command", None) - super().get_new_connection(filtered_params) + return super().get_new_connection(filtered_params) From 21e9cbef4b71fd463486096b38b0dc216ce218af Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 09:03:08 -0500 Subject: [PATCH 008/417] Cleanup for _start_transaction_under_autocommit --- tubesync/tubesync/sqlite3/base.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index d09d68cd..28d0ebc7 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -5,15 +5,14 @@ class DatabaseWrapper(base.DatabaseWrapper): def _start_transaction_under_autocommit(self): conn_params = self.get_connection_params() - if "transaction_mode" not in conn_params: - self.cursor().execute("BEGIN TRANSACTION") - else: + transaction_modes = frozenset(["DEFERRED", "EXCLUSIVE", "IMMEDIATE"]) + + sql_statement = "BEGIN TRANSACTION" + if "transaction_mode" in conn_params: tm = str(conn_params["transaction_mode"]).upper().strip() - transaction_modes = frozenset(["DEFERRED", "EXCLUSIVE", "IMMEDIATE"]) if tm in transaction_modes: - self.cursor().execute(f"BEGIN {tm} TRANSACTION") - else: - self.cursor().execute("BEGIN TRANSACTION") + sql_statement = f"BEGIN {tm} TRANSACTION" + self.cursor().execute(sql_statement) def init_connection_state(self): From 7b033d6e620c5320c35ee6cff14c851922fc769e Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 09:52:51 -0500 Subject: [PATCH 009/417] Use a filter map to make maintenance easier A possible future improvement would be to define a map of which keys Connection accepts. Right now, this is removing keys after Connection fails because of an unknown key. This could be automated by using try and removing a key each time the exception is caught. --- tubesync/tubesync/sqlite3/base.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index 28d0ebc7..b2d10a20 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -26,9 +26,12 @@ class DatabaseWrapper(base.DatabaseWrapper): def get_new_connection(self, conn_params): - filtered_params = conn_params.copy() - filtered_params["isolation_level"] = filtered_params.pop("transaction_mode", "DEFERRED") - _ = filtered_params.pop("init_command", None) + filter_map = { + "init_command": None, + "transaction_mode": ("isolation_level", "DEFERRED"), + } + filtered_params = {k: v for (k,v) in conn_params.items() if k not in filter_map} + filtered_params.update({v[0]: conn_params.get(k, v[1]) for (k,v) in filter_map.items() if v is not None}) return super().get_new_connection(filtered_params) From 32dead212689f2127e33b7932f01b21c50bb4d7e Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 10:35:46 -0500 Subject: [PATCH 010/417] Proof of concept for automated param removal --- tubesync/tubesync/sqlite3/base.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index b2d10a20..7e9e5f57 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -27,11 +27,24 @@ class DatabaseWrapper(base.DatabaseWrapper): def get_new_connection(self, conn_params): filter_map = { - "init_command": None, "transaction_mode": ("isolation_level", "DEFERRED"), } filtered_params = {k: v for (k,v) in conn_params.items() if k not in filter_map} filtered_params.update({v[0]: conn_params.get(k, v[1]) for (k,v) in filter_map.items() if v is not None}) - return super().get_new_connection(filtered_params) + + attempt = 0 + connection = None + tries = len(filtered_params) + while connection is None and attempt < tries: + try: + attempt += 1 + connection = super().get_new_connection(filtered_params) + except TypeError as e: + # remove unaccepted param + print(e, flush=True) + print('Exception args:', flush=True) + print(e.args, flush=True) + del filtered_params["init_command"] + return connection From 41215b9148186882af3e7955983b225ade0c01e1 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 11:14:17 -0500 Subject: [PATCH 011/417] Automated invalid keyword argument removal Less manual maintenance as versions change is a win! --- tubesync/tubesync/sqlite3/base.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index 7e9e5f57..f59065f4 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -1,3 +1,4 @@ +import re from django.db.backends.sqlite3 import base @@ -30,7 +31,7 @@ class DatabaseWrapper(base.DatabaseWrapper): "transaction_mode": ("isolation_level", "DEFERRED"), } filtered_params = {k: v for (k,v) in conn_params.items() if k not in filter_map} - filtered_params.update({v[0]: conn_params.get(k, v[1]) for (k,v) in filter_map.items() if v is not None}) + filtered_params.update({v[0]: conn_params.get(k, v[1]) for (k,v) in filter_map.items()}) attempt = 0 connection = None @@ -40,11 +41,12 @@ class DatabaseWrapper(base.DatabaseWrapper): attempt += 1 connection = super().get_new_connection(filtered_params) except TypeError as e: - # remove unaccepted param - print(e, flush=True) - print('Exception args:', flush=True) - print(e.args, flush=True) - del filtered_params["init_command"] + prog = re.compile("^'(?P[^']+)' is an invalid keyword argument for Connection[()]{2}$") + match = prog.match(e.args[0]) + key = match.group('key') if match else None + if key is None: + raise e + del filtered_params[key] return connection From 3fe7203ed50b2c68d571663a1ea3662e60117266 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 12:13:27 -0500 Subject: [PATCH 012/417] Add and use _remove_invalid_keyword_argument --- tubesync/tubesync/sqlite3/base.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index f59065f4..812c5b16 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -25,7 +25,25 @@ class DatabaseWrapper(base.DatabaseWrapper): for init_cmd in cmds: cursor.execute(init_cmd.strip()) - + + def _remove_invalid_keyword_argument(self, e, filtered_params): + key = None + try: + prog = re.compile(r"^'(?P[^']+)' is an invalid keyword argument for Connection[()]{2}$") + match = prog.match(e.args[0]) + except: + raise + else: + if match: + key = match.group('key') + finally: + # This isn't a TypeError we can handle + if key is None: + raise e + # remove the invalid keyword argument + del filtered_params[key] + + def get_new_connection(self, conn_params): filter_map = { "transaction_mode": ("isolation_level", "DEFERRED"), @@ -37,16 +55,10 @@ class DatabaseWrapper(base.DatabaseWrapper): connection = None tries = len(filtered_params) while connection is None and attempt < tries: + attempt += 1 try: - attempt += 1 connection = super().get_new_connection(filtered_params) except TypeError as e: - prog = re.compile("^'(?P[^']+)' is an invalid keyword argument for Connection[()]{2}$") - match = prog.match(e.args[0]) - key = match.group('key') if match else None - if key is None: - raise e - del filtered_params[key] + self._remove_invalid_keyword_argument(e, filtered_params) return connection - From 36b395ae30f73db92b4040412ae3ede5d1097af2 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 12:31:30 -0500 Subject: [PATCH 013/417] Don't pass the exception to _remove_invalid_keyword_argument --- tubesync/tubesync/sqlite3/base.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index 812c5b16..e4dfbcb8 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -26,22 +26,20 @@ class DatabaseWrapper(base.DatabaseWrapper): cursor.execute(init_cmd.strip()) - def _remove_invalid_keyword_argument(self, e, filtered_params): - key = None + def _remove_invalid_keyword_argument(self, params): try: prog = re.compile(r"^'(?P[^']+)' is an invalid keyword argument for Connection[()]{2}$") match = prog.match(e.args[0]) - except: - raise else: if match: key = match.group('key') - finally: - # This isn't a TypeError we can handle - if key is None: - raise e - # remove the invalid keyword argument - del filtered_params[key] + try: + # remove the invalid keyword argument + del params[key] + else: + return True + + return False def get_new_connection(self, conn_params): @@ -59,6 +57,8 @@ class DatabaseWrapper(base.DatabaseWrapper): try: connection = super().get_new_connection(filtered_params) except TypeError as e: - self._remove_invalid_keyword_argument(e, filtered_params) + if not self._remove_invalid_keyword_argument(filtered_params): + # This isn't a TypeError we can handle + raise e return connection From ba0d5ab285793bf943bcd76473e1ce657a93e9ac Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 13:33:17 -0500 Subject: [PATCH 014/417] Regex and syntax tweaks I didn't pay close enough attention to the try grammar. --- tubesync/tubesync/sqlite3/base.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index e4dfbcb8..bee1b3f4 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -28,16 +28,15 @@ class DatabaseWrapper(base.DatabaseWrapper): def _remove_invalid_keyword_argument(self, params): try: - prog = re.compile(r"^'(?P[^']+)' is an invalid keyword argument for Connection[()]{2}$") + prog = re.compile(r"^(?P['])(?P[^']+)(?P=quote) is an invalid keyword argument for Connection\(\)$") match = prog.match(e.args[0]) - else: - if match: - key = match.group('key') - try: - # remove the invalid keyword argument - del params[key] - else: - return True + + if match: + key = match.group('key') + try: + # remove the invalid keyword argument + del params[key] + return True return False From 11789df864154d5280e21dde29651f9fc8eeb33d Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 13:59:06 -0500 Subject: [PATCH 015/417] Pass the exception argument Fixed up the try syntax too. I really dislike try, but it's almost mandatory in Python. --- tubesync/tubesync/sqlite3/base.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index bee1b3f4..05889640 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -26,18 +26,23 @@ class DatabaseWrapper(base.DatabaseWrapper): cursor.execute(init_cmd.strip()) - def _remove_invalid_keyword_argument(self, params): + def _remove_invalid_keyword_argument(self, arg_str, params): try: prog = re.compile(r"^(?P['])(?P[^']+)(?P=quote) is an invalid keyword argument for Connection\(\)$") - match = prog.match(e.args[0]) - - if match: + match = prog.match(arg_str) + if match is None: + return False key = match.group('key') - try: - # remove the invalid keyword argument - del params[key] - return True + # remove the invalid keyword argument + del params[key] + + return True + except: + raise + + # It's unlikely that this will ever be reached, however, + # it was left here intentionally, so don't remove it. return False @@ -56,8 +61,10 @@ class DatabaseWrapper(base.DatabaseWrapper): try: connection = super().get_new_connection(filtered_params) except TypeError as e: - if not self._remove_invalid_keyword_argument(filtered_params): + e_arg = str(e.args[0]) + if not self._remove_invalid_keyword_argument(e_arg, filtered_params): # This isn't a TypeError we can handle raise e return connection + From 5ee295db0119498423a2dad04adae778612aec09 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 12 Dec 2024 14:05:23 -0500 Subject: [PATCH 016/417] Keep the argument extraction in the function --- tubesync/tubesync/sqlite3/base.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py index 05889640..ccb709cb 100644 --- a/tubesync/tubesync/sqlite3/base.py +++ b/tubesync/tubesync/sqlite3/base.py @@ -26,10 +26,10 @@ class DatabaseWrapper(base.DatabaseWrapper): cursor.execute(init_cmd.strip()) - def _remove_invalid_keyword_argument(self, arg_str, params): + def _remove_invalid_keyword_argument(self, e_args, params): try: prog = re.compile(r"^(?P['])(?P[^']+)(?P=quote) is an invalid keyword argument for Connection\(\)$") - match = prog.match(arg_str) + match = prog.match(str(e_args[0])) if match is None: return False key = match.group('key') @@ -61,8 +61,7 @@ class DatabaseWrapper(base.DatabaseWrapper): try: connection = super().get_new_connection(filtered_params) except TypeError as e: - e_arg = str(e.args[0]) - if not self._remove_invalid_keyword_argument(e_arg, filtered_params): + if not self._remove_invalid_keyword_argument(e.args, filtered_params): # This isn't a TypeError we can handle raise e return connection From a49c0c1b09b5ad7a225c434b5ffd1d7e0e7e018b Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Feb 2025 07:26:53 -0500 Subject: [PATCH 017/417] Allow AV1 as a choice --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f3c051fa..033ef45e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -237,7 +237,7 @@ class Source(models.Model): _('source video codec'), max_length=8, db_index=True, - choices=list(reversed(YouTube_VideoCodec.choices[1:])), + choices=list(reversed(YouTube_VideoCodec.choices)), default=YouTube_VideoCodec.VP9, help_text=_('Source video codec, desired video encoding format to download (ignored if "resolution" is audio only)') ) From b11b906497f8522b6e99e807bf6a76cd21adf9d5 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Feb 2025 07:30:26 -0500 Subject: [PATCH 018/417] Pin `django-background-tasks` It's `django-background-tasks` that limits the Django version. --- Pipfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Pipfile b/Pipfile index af67a7a1..304c6e0b 100644 --- a/Pipfile +++ b/Pipfile @@ -7,7 +7,7 @@ verify_ssl = true autopep8 = "*" [packages] -django = "~=3.2" +django = "*" django-sass-processor = "*" libsass = "*" pillow = "*" @@ -15,7 +15,7 @@ whitenoise = "*" gunicorn = "*" django-compressor = "*" httptools = "*" -django-background-tasks = "*" +django-background-tasks = "1.2.5" django-basicauth = "*" psycopg2-binary = "*" mysqlclient = "*" From c9bc7bf84e86cfe17d57563c2c2f52291b59f80b Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Feb 2025 07:44:20 -0500 Subject: [PATCH 019/417] Correct the syntax --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index 304c6e0b..abbe48b9 100644 --- a/Pipfile +++ b/Pipfile @@ -15,7 +15,7 @@ whitenoise = "*" gunicorn = "*" django-compressor = "*" httptools = "*" -django-background-tasks = "1.2.5" +django-background-tasks = "==1.2.5" django-basicauth = "*" psycopg2-binary = "*" mysqlclient = "*" From 6a80758c9b6241799d0fb2a16c61f4ea96fc465f Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Feb 2025 12:07:32 -0500 Subject: [PATCH 020/417] Check for the database vendor first --- tubesync/sync/views.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 2a9ce8b5..1fa57f6c 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -90,10 +90,11 @@ class DashboardView(TemplateView): data['database_connection'] = settings.DATABASE_CONNECTION_STR # Add the database filesize when using db.sqlite3 data['database_filesize'] = None - db_name = str(connection.get_connection_params()['database']) - db_path = pathlib.Path(db_name) if '/' == db_name[0] else None - if db_path and 'sqlite' == connection.vendor: - data['database_filesize'] = db_path.stat().st_size + if 'sqlite' == connection.vendor: + db_name = str(connection.get_connection_params().get('database', '')) + db_path = pathlib.Path(db_name) if '/' == db_name[0] else None + if db_path: + data['database_filesize'] = db_path.stat().st_size return data From 599d596368e3d9c3474e39c735219d80137fc0f9 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 06:53:28 -0500 Subject: [PATCH 021/417] Add scheduling delays This is an attempt to stop duplicated tasks from running at the same time. --- tubesync/sync/tasks.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index f1a40fb6..e6de4139 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -178,7 +178,7 @@ def cleanup_removed_media(source, videos): media.delete() -@background(schedule=0) +@background(schedule=300, remove_existing_tasks=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -311,7 +311,7 @@ def download_source_images(source_id): log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') -@background(schedule=0) +@background(schedule=60, remove_existing_tasks=True) def download_media_metadata(media_id): ''' Downloads the metadata for a media item. @@ -398,7 +398,7 @@ def download_media_metadata(media_id): f'{source} / {media}: {media_id}') -@background(schedule=0) +@background(schedule=60, remove_existing_tasks=True) def download_media_thumbnail(media_id, url): ''' Downloads an image from a URL and save it as a local thumbnail attached to a @@ -436,7 +436,7 @@ def download_media_thumbnail(media_id, url): return True -@background(schedule=0) +@background(schedule=60, remove_existing_tasks=True) def download_media(media_id): ''' Downloads the media to disk and attaches it to the Media instance. @@ -559,7 +559,7 @@ def download_media(media_id): raise DownloadFailedException(err) -@background(schedule=0) +@background(schedule=300, remove_existing_tasks=True) def rescan_media_server(mediaserver_id): ''' Attempts to request a media rescan on a remote media server. @@ -574,7 +574,7 @@ def rescan_media_server(mediaserver_id): mediaserver.update() -@background(schedule=0, remove_existing_tasks=True) +@background(schedule=300, remove_existing_tasks=True) def save_all_media_for_source(source_id): ''' Iterates all media items linked to a source and saves them to @@ -615,7 +615,7 @@ def save_all_media_for_source(source_id): media.save() -@background(schedule=0, remove_existing_tasks=True) +@background(schedule=60, remove_existing_tasks=True) def rename_media(media_id): try: media = Media.objects.defer('metadata', 'thumb').get(pk=media_id) @@ -624,7 +624,7 @@ def rename_media(media_id): media.rename_files() -@background(schedule=0, remove_existing_tasks=True) +@background(schedule=300, remove_existing_tasks=True) def rename_all_media_for_source(source_id): try: source = Source.objects.get(pk=source_id) @@ -637,7 +637,7 @@ def rename_all_media_for_source(source_id): media.rename_files() -@background(schedule=0, remove_existing_tasks=True) +@background(schedule=60, remove_existing_tasks=True) def wait_for_media_premiere(media_id): hours = lambda td: 1+int((24*td.days)+(td.seconds/(60*60))) From b5dcf41097141bc78e04798ebd4d914610dc602c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 07:05:44 -0500 Subject: [PATCH 022/417] Log not existing source pre save I think this is not only possible, but happens regularly. --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 5800c5ce..2d2c6ecb 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -25,7 +25,7 @@ def source_pre_save(sender, instance, **kwargs): try: existing_source = Source.objects.get(pk=instance.pk) except Source.DoesNotExist: - # Probably not possible? + log.debug(f'source_pre_save signal: no existing source: {sender} - {instance}') return existing_dirpath = existing_source.directory_path.resolve(strict=True) new_dirpath = instance.directory_path.resolve(strict=False) From 526224f1bb8b834dd0e8f9d45b90fd700e768a1b Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 07:29:01 -0500 Subject: [PATCH 023/417] Move `rename_media` task creation to `Media` post save --- tubesync/sync/signals.py | 53 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 5800c5ce..df6a3e31 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -84,33 +84,7 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name=verbose_name.format(instance.name), remove_existing_tasks=True ) - # Check settings before any rename tasks are scheduled - rename_sources_setting = settings.RENAME_SOURCES or list() - create_rename_tasks = ( - ( - instance.directory and - instance.directory in rename_sources_setting - ) or - settings.RENAME_ALL_SOURCES - ) - if create_rename_tasks: - mqs = Media.objects.filter( - source=instance.pk, - downloaded=True, - ).defer( - 'media_file', - 'metadata', - 'thumb', - ) - for media in mqs: - verbose_name = _('Renaming media for: {}: "{}"') - rename_media( - str(media.pk), - queue=str(media.pk), - priority=16, - verbose_name=verbose_name.format(media.key, media.name), - remove_existing_tasks=True - ) + verbose_name = _('Checking all media for source "{}"') save_all_media_for_source( str(instance.pk), @@ -160,8 +134,30 @@ def media_post_save(sender, instance, created, **kwargs): can_download_changed = False # Reset the skip flag if the download cap has changed if the media has not # already been downloaded - if not instance.downloaded: + downloaded = instance.downloaded + if not downloaded: skip_changed = filter_media(instance) + else: + # Downloaded media might need to be renamed + # Check settings before any rename tasks are scheduled + media = instance + rename_sources_setting = settings.RENAME_SOURCES or list() + create_rename_task = ( + ( + media.source.directory and + media.source.directory in rename_sources_setting + ) or + settings.RENAME_ALL_SOURCES + ) + if create_rename_task: + verbose_name = _('Renaming media for: {}: "{}"') + rename_media( + str(media.pk), + queue=str(media.pk), + priority=16, + verbose_name=verbose_name.format(media.key, media.name), + remove_existing_tasks=True + ) # Recalculate the "can_download" flag, this may # need to change if the source specifications have been changed @@ -204,7 +200,6 @@ def media_post_save(sender, instance, created, **kwargs): ) existing_media_download_task = get_media_download_task(str(instance.pk)) # If the media has not yet been downloaded schedule it to be downloaded - downloaded = instance.downloaded if not (instance.media_file_exists or existing_media_download_task): # The file was deleted after it was downloaded, skip this media. if instance.can_download and instance.downloaded: From e5d124e807769755e2db5962cfe5256812a0ef14 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 08:04:44 -0500 Subject: [PATCH 024/417] Update `rename_all_media_for_source` Include the recent optimizations. --- tubesync/sync/tasks.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index f1a40fb6..eefcab3b 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -633,7 +633,25 @@ def rename_all_media_for_source(source_id): log.error(f'Task rename_all_media_for_source(pk={source_id}) called but no ' f'source exists with ID: {source_id}') return - for media in Media.objects.filter(source=source): + # Check that the settings allow renaming + rename_sources_setting = settings.RENAME_SOURCES or list() + create_rename_tasks = ( + ( + source.directory and + source.directory in rename_sources_setting + ) or + settings.RENAME_ALL_SOURCES + ) + if not create_rename_tasks: + return + mqs = Media.objects.all().defer( + 'metadata', + 'thumb', + ).filter( + source=source, + downloaded=True, + ) + for media in mqs: media.rename_files() From 11190ada2ddd5721d88be2c217ecf43a17e6da83 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 08:25:32 -0500 Subject: [PATCH 025/417] We still need to pin django also --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index abbe48b9..00389093 100644 --- a/Pipfile +++ b/Pipfile @@ -7,7 +7,7 @@ verify_ssl = true autopep8 = "*" [packages] -django = "*" +django = "~=3.2" django-sass-processor = "*" libsass = "*" pillow = "*" From bf221394c34b88288f633d0de84545ef5ae88ca4 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 10:27:19 -0500 Subject: [PATCH 026/417] Do not make the user wait when they are interacting --- tubesync/sync/views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 2a9ce8b5..d8d9f941 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -121,6 +121,7 @@ class SourcesView(ListView): str(sobj.pk), queue=str(sobj.pk), repeat=0, + schedule=30, verbose_name=verbose_name.format(sobj.name)) url = reverse_lazy('sync:sources') url = append_uri_params(url, {'message': 'source-refreshed'}) From 8722a1dbe64734002644dbfeafb4cc760117cd3e Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 10:34:41 -0500 Subject: [PATCH 027/417] Match the priority of the other index tasks --- tubesync/sync/management/commands/reset-tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/management/commands/reset-tasks.py b/tubesync/sync/management/commands/reset-tasks.py index d65abfc3..7d78c09f 100644 --- a/tubesync/sync/management/commands/reset-tasks.py +++ b/tubesync/sync/management/commands/reset-tasks.py @@ -25,7 +25,7 @@ class Command(BaseCommand): str(source.pk), repeat=source.index_schedule, queue=str(source.pk), - priority=5, + priority=10, verbose_name=verbose_name.format(source.name) ) # This also chains down to call each Media objects .save() as well From 56bd3288b457e0def4968c1ad4f668d9cdf64664 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 10:38:30 -0500 Subject: [PATCH 028/417] Match the priority of the other index tasks --- tubesync/sync/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index d8d9f941..41cff24f 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -121,6 +121,7 @@ class SourcesView(ListView): str(sobj.pk), queue=str(sobj.pk), repeat=0, + priority=10, schedule=30, verbose_name=verbose_name.format(sobj.name)) url = reverse_lazy('sync:sources') @@ -861,7 +862,7 @@ class ResetTasks(FormView): str(source.pk), repeat=source.index_schedule, queue=str(source.pk), - priority=5, + priority=10, verbose_name=verbose_name.format(source.name) ) # This also chains down to call each Media objects .save() as well From a9db5639901c86553b28613a0a56b1ff58dafbe8 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 11:50:48 -0500 Subject: [PATCH 029/417] Be more consistent with `fmt` This was the cause of many rename operations. --- tubesync/sync/models.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f3c051fa..6e8700ed 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -883,14 +883,19 @@ class Media(models.Model): resolution = self.downloaded_format.lower() elif self.downloaded_height: resolution = f'{self.downloaded_height}p' + if resolution: + fmt.append(resolution) if self.downloaded_format != Val(SourceResolution.AUDIO): vcodec = self.downloaded_video_codec.lower() + if vcodec: fmt.append(vcodec) acodec = self.downloaded_audio_codec.lower() - fmt.append(acodec) + if acodec: + fmt.append(acodec) if self.downloaded_format != Val(SourceResolution.AUDIO): fps = str(self.downloaded_fps) - fmt.append(f'{fps}fps') + if fps: + fmt.append(f'{fps}fps') if self.downloaded_hdr: hdr = 'hdr' fmt.append(hdr) @@ -922,13 +927,19 @@ class Media(models.Model): # Combined vformat = cformat if vformat: - resolution = vformat['format'].lower() - fmt.append(resolution) + if vformat['format']: + resolution = vformat['format'].lower() + else: + resolution = f'{vformat['height']}p' + if resolution: + fmt.append(resolution) vcodec = vformat['vcodec'].lower() - fmt.append(vcodec) + if vcodec: + fmt.append(vcodec) if aformat: acodec = aformat['acodec'].lower() - fmt.append(acodec) + if acodec: + fmt.append(acodec) if vformat: if vformat['is_60fps']: fps = '60fps' From b3a21f2e27157c31a224117b8027f3695c143424 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 11:55:14 -0500 Subject: [PATCH 030/417] fixup: quoting --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 6e8700ed..b2a43e53 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -930,7 +930,7 @@ class Media(models.Model): if vformat['format']: resolution = vformat['format'].lower() else: - resolution = f'{vformat['height']}p' + resolution = f"{vformat['height']}p" if resolution: fmt.append(resolution) vcodec = vformat['vcodec'].lower() From 9d328df671d1c2d8a21bd49fccf4c82226519ce7 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 12:08:35 -0500 Subject: [PATCH 031/417] We need to keep the repeating index task --- tubesync/sync/views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 41cff24f..4786a5d6 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -123,6 +123,7 @@ class SourcesView(ListView): repeat=0, priority=10, schedule=30, + remove_existing_tasks=False, verbose_name=verbose_name.format(sobj.name)) url = reverse_lazy('sync:sources') url = append_uri_params(url, {'message': 'source-refreshed'}) From c5c340b58f6bc99bca15b3ce8cc2bcd9b9a0ddb4 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 12:59:06 -0500 Subject: [PATCH 032/417] Optimize `rename_files` --- tubesync/sync/models.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f3c051fa..11c11444 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1514,6 +1514,8 @@ class Media(models.Model): old_file_str = other_path.name new_file_str = new_stem + old_file_str[len(old_stem):] new_file_path = Path(new_prefix_path / new_file_str) + if new_file_path == other_path: + continue log.debug(f'Considering replace for: {self!s}\n\t{other_path!s}\n\t{new_file_path!s}') # it should exist, but check anyway if other_path.exists(): @@ -1525,6 +1527,8 @@ class Media(models.Model): old_file_str = fuzzy_path.name new_file_str = new_stem + old_file_str[len(fuzzy_stem):] new_file_path = Path(new_prefix_path / new_file_str) + if new_file_path == fuzzy_path: + continue log.debug(f'Considering rename for: {self!s}\n\t{fuzzy_path!s}\n\t{new_file_path!s}') # it quite possibly was renamed already if fuzzy_path.exists() and not new_file_path.exists(): @@ -1538,8 +1542,9 @@ class Media(models.Model): # try to remove empty dirs parent_dir = old_video_path.parent + stop_dir = self.source.directory_path try: - while parent_dir.is_dir(): + while parent_dir.is_relative_to(stop_dir): parent_dir.rmdir() log.info(f'Removed empty directory: {parent_dir!s}') parent_dir = parent_dir.parent From 0d7351994633b55f874b5c81255b62baf11ba4fc Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Feb 2025 14:45:53 -0500 Subject: [PATCH 033/417] Fix an internal server error --- tubesync/sync/views.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 2a9ce8b5..e9499bea 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -495,8 +495,9 @@ class MediaThumbView(DetailView): def get(self, request, *args, **kwargs): media = self.get_object() - if media.thumb: - thumb = open(media.thumb.path, 'rb').read() + if media.thumb_file_exists: + thumb_path = pathlib.Path(media.thumb.path) + thumb = thumb_path.read_bytes() content_type = 'image/jpeg' else: # No thumbnail on disk, return a blank 1x1 gif From be0cbd6cd0a8d7bd4a55be8e85c54c258f27dd66 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 23 Feb 2025 17:47:54 -0500 Subject: [PATCH 034/417] Be more efficient with `Media.loaded_metadata` --- tubesync/sync/models.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f3c051fa..1b3a1297 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -547,6 +547,9 @@ class Media(models.Model): Source. ''' + # Used to convert seconds to datetime + posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + # Format to use to display a URL for the media URLS = _srctype_dict('https://www.youtube.com/watch?v={key}') @@ -771,6 +774,7 @@ class Media(models.Model): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Trigger an update of derived fields from metadata if self.metadata: + setattr(self, '_cached_metadata_dict', None) self.title = self.metadata_title[:200] self.duration = self.metadata_duration if update_fields is not None and "metadata" in update_fields: @@ -1000,20 +1004,28 @@ class Media(models.Model): @property def reduce_data(self): try: - from common.logger import log - from common.utils import json_serial - - old_mdl = len(self.metadata or "") data = json.loads(self.metadata or "{}") + if '_reduce_data_ran_at' in data.keys(): + total_seconds = data['_reduce_data_ran_at'] + ran_at = posix_epoch + timedelta(seconds=total_seconds) + if (timezone.now() - ran_at) < timedelta(hours=1): + return data + + from common.utils import json_serial compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) filtered_data = filter_response(data, True) + filtered_data['_reduce_data_ran_at'] = round((timezone.now() - posix_epoch).total_seconds()) filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) except Exception as e: + from common.logger import log log.exception('reduce_data: %s', e) else: + from common.logger import log + log.debug(f'reduce_data: running for: {self.source.name} / {self.key}') # log the results of filtering / compacting on metadata size new_mdl = len(compact_json) + old_mdl = len(self.metadata or "") if old_mdl > new_mdl: delta = old_mdl - new_mdl log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') @@ -1023,16 +1035,24 @@ class Media(models.Model): log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): self.metadata = filtered_json + return filtered_data @property def loaded_metadata(self): + data = None if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): - self.reduce_data + data = self.reduce_data try: - data = json.loads(self.metadata) + if not data: + cached = getattr(self, '_cached_metadata_dict', None) + if cached: + data = cached + else: + data = json.loads(self.metadata or "{}") if not isinstance(data, dict): return {} + setattr(self, '_cached_metadata_dict', data) return data except Exception as e: return {} @@ -1100,7 +1120,6 @@ class Media(models.Model): if timestamp is not None: try: timestamp_float = float(timestamp) - posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) published_dt = posix_epoch + timedelta(seconds=timestamp_float) except Exception as e: log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') From 3e2337d4a94a7d68ad5e66d8d7d6fbcba65230fb Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 23 Feb 2025 17:58:15 -0500 Subject: [PATCH 035/417] fixup: only return filtered data when appropriate --- tubesync/sync/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 1b3a1297..000f2830 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1035,7 +1035,8 @@ class Media(models.Model): log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): self.metadata = filtered_json - return filtered_data + return filtered_data + return data @property From 7c2ba576f666bbc36cba08b48c7e4d0081832a04 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 23 Feb 2025 18:20:35 -0500 Subject: [PATCH 036/417] Return a copy instead of modifying the input --- tubesync/sync/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 0646a937..514f75b1 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1767,7 +1767,7 @@ class ResponseFilteringTestCase(TestCase): self.media.save() unfiltered = self.media.loaded_metadata - filtered = filter_response(self.media.loaded_metadata) + filtered = filter_response(self.media.loaded_metadata, True) self.assertIn('formats', unfiltered.keys()) self.assertIn('formats', filtered.keys()) # filtered 'downloader_options' From eeb1826b0538d0870f356bb31a4be1d61a170fc9 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 02:45:41 -0500 Subject: [PATCH 037/417] Upgrade to 1.2.8 for Django 5 support. --- Pipfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Pipfile b/Pipfile index 00389093..08c20163 100644 --- a/Pipfile +++ b/Pipfile @@ -7,7 +7,7 @@ verify_ssl = true autopep8 = "*" [packages] -django = "~=3.2" +django = "*" django-sass-processor = "*" libsass = "*" pillow = "*" @@ -15,7 +15,7 @@ whitenoise = "*" gunicorn = "*" django-compressor = "*" httptools = "*" -django-background-tasks = "==1.2.5" +django-background-tasks = ">=1.2.8" django-basicauth = "*" psycopg2-binary = "*" mysqlclient = "*" From 83203254adf65af7603c8d80483134ce0a6d2d57 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 02:47:37 -0500 Subject: [PATCH 038/417] Patched for version 1.2.8 --- patches/background_task/models.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index 02f7164a..0e46c5b6 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -6,8 +6,8 @@ import logging import os import traceback -from compat import StringIO -from compat.models import GenericForeignKey +from io import StringIO +from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType from django.db import models from django.db.models import Q @@ -50,14 +50,15 @@ class TaskManager(models.Manager): if queue: qs = qs.filter(queue=queue) ready = qs.filter(run_at__lte=now, failed_at=None) - _priority_ordering = '{}priority'.format(app_settings.BACKGROUND_TASK_PRIORITY_ORDERING) + _priority_ordering = '{}priority'.format( + app_settings.BACKGROUND_TASK_PRIORITY_ORDERING) ready = ready.order_by(_priority_ordering, 'run_at') if app_settings.BACKGROUND_TASK_RUN_ASYNC: currently_failed = self.failed().count() currently_locked = self.locked(now).count() count = app_settings.BACKGROUND_TASK_ASYNC_THREADS - \ - (currently_locked - currently_failed) + (currently_locked - currently_failed) if count > 0: ready = ready[:count] else: @@ -102,7 +103,8 @@ class TaskManager(models.Manager): s = "%s%s" % (task_name, task_params) task_hash = sha1(s.encode('utf-8')).hexdigest() if remove_existing_tasks: - Task.objects.filter(task_hash=task_hash, locked_at__isnull=True).delete() + Task.objects.filter(task_hash=task_hash, + locked_at__isnull=True).delete() return Task(task_name=task_name, task_params=task_params, task_hash=task_hash, @@ -251,13 +253,14 @@ class Task(models.Model): self.failed_at = timezone.now() logger.warning('Marking task %s as failed', self) completed = self.create_completed_task() - task_failed.send(sender=self.__class__, task_id=self.id, completed_task=completed) + task_failed.send(sender=self.__class__, + task_id=self.id, completed_task=completed) self.delete() else: backoff = timedelta(seconds=(self.attempts ** 4) + 5) self.run_at = timezone.now() + backoff logger.warning('Rescheduling task %s for %s later at %s', self, - backoff, self.run_at) + backoff, self.run_at) task_rescheduled.send(sender=self.__class__, task=self) self.locked_by = None self.locked_at = None @@ -330,9 +333,6 @@ class Task(models.Model): db_table = 'background_task' - - - class CompletedTaskQuerySet(models.QuerySet): def created_by(self, creator): @@ -389,7 +389,8 @@ class CompletedTask(models.Model): # when the task should be run run_at = models.DateTimeField(db_index=True) - repeat = models.BigIntegerField(choices=Task.REPEAT_CHOICES, default=Task.NEVER) + repeat = models.BigIntegerField( + choices=Task.REPEAT_CHOICES, default=Task.NEVER) repeat_until = models.DateTimeField(null=True, blank=True) # the "name" of the queue this is to be run on From ce68ba9a01e77367c3eb24ddfa16e78b5ba71162 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 02:51:36 -0500 Subject: [PATCH 039/417] Drop Python 3.7 as unsupported --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index faf25319..c1450eae 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} From b4c4825c2ab545b89148eae6c2e2fed3420eeab5 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:30:59 -0500 Subject: [PATCH 040/417] Remove `redis-server` from Dockerfile --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c54bf9c8..7376bbb5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -276,7 +276,6 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ pkgconf \ python3 \ python3-wheel \ - redis-server \ curl \ less \ && \ From 0e4c5b10c95f2fe628770e805fc21b29f4d4ce37 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:32:26 -0500 Subject: [PATCH 041/417] Delete config/root/etc/redis/redis.conf --- config/root/etc/redis/redis.conf | 46 -------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 config/root/etc/redis/redis.conf diff --git a/config/root/etc/redis/redis.conf b/config/root/etc/redis/redis.conf deleted file mode 100644 index 8e411e80..00000000 --- a/config/root/etc/redis/redis.conf +++ /dev/null @@ -1,46 +0,0 @@ -bind 127.0.0.1 -protected-mode yes -port 6379 -tcp-backlog 511 -timeout 0 -tcp-keepalive 300 -daemonize no -supervised no -loglevel notice -logfile "" -databases 1 -always-show-logo no -save "" -dir /var/lib/redis -maxmemory 64mb -maxmemory-policy noeviction -lazyfree-lazy-eviction no -lazyfree-lazy-expire no -lazyfree-lazy-server-del no -replica-lazy-flush no -lazyfree-lazy-user-del no -oom-score-adj no -oom-score-adj-values 0 200 800 -appendonly no -appendfsync no -lua-time-limit 5000 -slowlog-log-slower-than 10000 -slowlog-max-len 128 -latency-monitor-threshold 0 -notify-keyspace-events "" -hash-max-ziplist-entries 512 -hash-max-ziplist-value 64 -list-max-ziplist-size -2 -list-compress-depth 0 -set-max-intset-entries 512 -zset-max-ziplist-entries 128 -zset-max-ziplist-value 64 -hll-sparse-max-bytes 3000 -stream-node-max-bytes 4096 -stream-node-max-entries 100 -activerehashing yes -client-output-buffer-limit normal 0 0 0 -client-output-buffer-limit replica 256mb 64mb 60 -client-output-buffer-limit pubsub 32mb 8mb 60 -hz 10 -dynamic-hz yes From 44910edbb17f31d5e58f9bb7cf2fa687af05f579 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:33:00 -0500 Subject: [PATCH 042/417] Delete config/root/etc/s6-overlay/s6-rc.d/redis/run --- config/root/etc/s6-overlay/s6-rc.d/redis/run | 4 ---- 1 file changed, 4 deletions(-) delete mode 100755 config/root/etc/s6-overlay/s6-rc.d/redis/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/redis/run b/config/root/etc/s6-overlay/s6-rc.d/redis/run deleted file mode 100755 index 09edec81..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/redis/run +++ /dev/null @@ -1,4 +0,0 @@ -#!/command/with-contenv bash - -exec s6-setuidgid redis \ - /usr/bin/redis-server /etc/redis/redis.conf From e0b98f34b7ac7bb61b3552179ebdd6d0bfe7cf64 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:33:14 -0500 Subject: [PATCH 043/417] Delete config/root/etc/s6-overlay/s6-rc.d/redis/dependencies --- config/root/etc/s6-overlay/s6-rc.d/redis/dependencies | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 config/root/etc/s6-overlay/s6-rc.d/redis/dependencies diff --git a/config/root/etc/s6-overlay/s6-rc.d/redis/dependencies b/config/root/etc/s6-overlay/s6-rc.d/redis/dependencies deleted file mode 100644 index e69de29b..00000000 From bd5dbbd295c781ab269aa94fb332e5ef086afa87 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:33:28 -0500 Subject: [PATCH 044/417] Delete config/root/etc/s6-overlay/s6-rc.d/redis/type --- config/root/etc/s6-overlay/s6-rc.d/redis/type | 1 - 1 file changed, 1 deletion(-) delete mode 100644 config/root/etc/s6-overlay/s6-rc.d/redis/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/redis/type b/config/root/etc/s6-overlay/s6-rc.d/redis/type deleted file mode 100644 index 1780f9f4..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/redis/type +++ /dev/null @@ -1 +0,0 @@ -longrun \ No newline at end of file From 170671d4ec38faf66c3b33277cd30bd1d0c4db14 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:37:25 -0500 Subject: [PATCH 045/417] Delete config/root/etc/s6-overlay/s6-rc.d/celery-worker/dependencies --- config/root/etc/s6-overlay/s6-rc.d/celery-worker/dependencies | 1 - 1 file changed, 1 deletion(-) delete mode 100644 config/root/etc/s6-overlay/s6-rc.d/celery-worker/dependencies diff --git a/config/root/etc/s6-overlay/s6-rc.d/celery-worker/dependencies b/config/root/etc/s6-overlay/s6-rc.d/celery-worker/dependencies deleted file mode 100644 index 283e1305..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/celery-worker/dependencies +++ /dev/null @@ -1 +0,0 @@ -gunicorn \ No newline at end of file From 1ba97749df5776bf1f0602f46fb995ba3540574a Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:37:37 -0500 Subject: [PATCH 046/417] Delete config/root/etc/s6-overlay/s6-rc.d/celery-worker/run --- .../etc/s6-overlay/s6-rc.d/celery-worker/run | 24 ------------------- 1 file changed, 24 deletions(-) delete mode 100755 config/root/etc/s6-overlay/s6-rc.d/celery-worker/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/celery-worker/run b/config/root/etc/s6-overlay/s6-rc.d/celery-worker/run deleted file mode 100755 index 04e2a32c..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/celery-worker/run +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/with-contenv bash - -UMASK_SET=${UMASK_SET:-022} -umask "$UMASK_SET" - -cd /app || exit - -PIDFILE=/run/app/celery-worker.pid - -if [ -f "${PIDFILE}" ] -then - PID=$(cat $PIDFILE) - echo "Unexpected PID file exists at ${PIDFILE} with PID: ${PID}" - if kill -0 $PID - then - echo "Killing old gunicorn process with PID: ${PID}" - kill -9 $PID - fi - echo "Removing stale PID file: ${PIDFILE}" - rm ${PIDFILE} -fi - -#exec s6-setuidgid app \ -# /usr/local/bin/celery --workdir /app -A tubesync worker --pidfile ${PIDFILE} -l INFO From 3d1cc110ace3625f50ee7d5a3e022a97fad90af0 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:37:46 -0500 Subject: [PATCH 047/417] Delete config/root/etc/s6-overlay/s6-rc.d/celery-worker/type --- config/root/etc/s6-overlay/s6-rc.d/celery-worker/type | 1 - 1 file changed, 1 deletion(-) delete mode 100644 config/root/etc/s6-overlay/s6-rc.d/celery-worker/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/celery-worker/type b/config/root/etc/s6-overlay/s6-rc.d/celery-worker/type deleted file mode 100644 index 1780f9f4..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/celery-worker/type +++ /dev/null @@ -1 +0,0 @@ -longrun \ No newline at end of file From b474e9e817a2ae46b69f5242d24d2975ace7b060 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:38:45 -0500 Subject: [PATCH 048/417] Delete config/root/etc/s6-overlay/s6-rc.d/celery-beat/run --- .../etc/s6-overlay/s6-rc.d/celery-beat/run | 25 ------------------- 1 file changed, 25 deletions(-) delete mode 100755 config/root/etc/s6-overlay/s6-rc.d/celery-beat/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/celery-beat/run b/config/root/etc/s6-overlay/s6-rc.d/celery-beat/run deleted file mode 100755 index 46b03a67..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/celery-beat/run +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/with-contenv bash - -UMASK_SET=${UMASK_SET:-022} -umask "$UMASK_SET" - -cd /app || exit - -PIDFILE=/run/app/celery-beat.pid -SCHEDULE=/tmp/tubesync-celerybeat-schedule - -if [ -f "${PIDFILE}" ] -then - PID=$(cat $PIDFILE) - echo "Unexpected PID file exists at ${PIDFILE} with PID: ${PID}" - if kill -0 $PID - then - echo "Killing old gunicorn process with PID: ${PID}" - kill -9 $PID - fi - echo "Removing stale PID file: ${PIDFILE}" - rm ${PIDFILE} -fi - -#exec s6-setuidgid app \ -# /usr/local/bin/celery --workdir /app -A tubesync beat --pidfile ${PIDFILE} -s ${SCHEDULE} From 3f93d0ae34f572b1f8c8cae16ab9bf3d5077846c Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:38:53 -0500 Subject: [PATCH 049/417] Delete config/root/etc/s6-overlay/s6-rc.d/celery-beat/dependencies --- config/root/etc/s6-overlay/s6-rc.d/celery-beat/dependencies | 1 - 1 file changed, 1 deletion(-) delete mode 100644 config/root/etc/s6-overlay/s6-rc.d/celery-beat/dependencies diff --git a/config/root/etc/s6-overlay/s6-rc.d/celery-beat/dependencies b/config/root/etc/s6-overlay/s6-rc.d/celery-beat/dependencies deleted file mode 100644 index 283e1305..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/celery-beat/dependencies +++ /dev/null @@ -1 +0,0 @@ -gunicorn \ No newline at end of file From a111da9104dbe296a584ef2139f55e9aeab6bfeb Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:39:04 -0500 Subject: [PATCH 050/417] Delete config/root/etc/s6-overlay/s6-rc.d/celery-beat/type --- config/root/etc/s6-overlay/s6-rc.d/celery-beat/type | 1 - 1 file changed, 1 deletion(-) delete mode 100644 config/root/etc/s6-overlay/s6-rc.d/celery-beat/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/celery-beat/type b/config/root/etc/s6-overlay/s6-rc.d/celery-beat/type deleted file mode 100644 index 1780f9f4..00000000 --- a/config/root/etc/s6-overlay/s6-rc.d/celery-beat/type +++ /dev/null @@ -1 +0,0 @@ -longrun \ No newline at end of file From 2f044c36142e9b736af3e386fa32575e6086c241 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:54:53 -0500 Subject: [PATCH 051/417] Remove `hiredis` & `redis` from Pipfile --- Pipfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Pipfile b/Pipfile index 00389093..61d66a04 100644 --- a/Pipfile +++ b/Pipfile @@ -20,7 +20,5 @@ django-basicauth = "*" psycopg2-binary = "*" mysqlclient = "*" yt-dlp = "*" -redis = "*" -hiredis = "*" requests = {extras = ["socks"], version = "*"} emoji = "*" From 822db9d0395599a77c30e820a28899374fc16077 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 04:58:56 -0500 Subject: [PATCH 052/417] Delete tubesync/tubesync/celery.py --- tubesync/tubesync/celery.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 tubesync/tubesync/celery.py diff --git a/tubesync/tubesync/celery.py b/tubesync/tubesync/celery.py deleted file mode 100644 index aad3092f..00000000 --- a/tubesync/tubesync/celery.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -from celery import Celery - - -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'tubesync.settings') -REDIS_CONNECTION = os.getenv('REDIS_CONNECTION', 'redis://localhost:6379/0') - - -app = Celery('tubesync') -app.config_from_object('django.conf:settings', namespace='CELERY') -app.autodiscover_tasks() -app.conf.broker_url = REDIS_CONNECTION -app.conf.beat_schedule = { - '10-second-beat': { - 'task': 'sync.tasks.housekeeping_task', - 'schedule': 60.0, - 'args': () - }, -} From 7ebd23c070549b580869ef88495c21990e3ab96b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 06:27:23 -0500 Subject: [PATCH 053/417] Prevent unintended changes to the cached dictionary --- tubesync/sync/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 4db52870..86aa4e10 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -2,10 +2,11 @@ import os import uuid import json import re -from xml.etree import ElementTree from collections import OrderedDict +from copy import deepcopy from datetime import datetime, timedelta, timezone as tz from pathlib import Path +from xml.etree import ElementTree from django.conf import settings from django.db import models from django.core.exceptions import SuspiciousOperation @@ -1058,7 +1059,7 @@ class Media(models.Model): if not data: cached = getattr(self, '_cached_metadata_dict', None) if cached: - data = cached + return deepcopy(cached) else: data = json.loads(self.metadata or "{}") if not isinstance(data, dict): From 1ad90ad62886ff472dc79e9b678ead913419f92b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 10:11:06 -0500 Subject: [PATCH 054/417] Access `posix_epoch` correctly Fixes #768 --- tubesync/sync/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 86aa4e10..a1e2846d 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1018,7 +1018,7 @@ class Media(models.Model): data = json.loads(self.metadata or "{}") if '_reduce_data_ran_at' in data.keys(): total_seconds = data['_reduce_data_ran_at'] - ran_at = posix_epoch + timedelta(seconds=total_seconds) + ran_at = self.posix_epoch + timedelta(seconds=total_seconds) if (timezone.now() - ran_at) < timedelta(hours=1): return data @@ -1132,7 +1132,7 @@ class Media(models.Model): if timestamp is not None: try: timestamp_float = float(timestamp) - published_dt = posix_epoch + timedelta(seconds=timestamp_float) + published_dt = self.posix_epoch + timedelta(seconds=timestamp_float) except Exception as e: log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') return published_dt From 91523edc219d49c25309290d8988e5feaf5084cf Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 10:40:54 -0500 Subject: [PATCH 055/417] Delete tubesync/tubesync/sqlite3/base.py --- tubesync/tubesync/sqlite3/base.py | 69 ------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 tubesync/tubesync/sqlite3/base.py diff --git a/tubesync/tubesync/sqlite3/base.py b/tubesync/tubesync/sqlite3/base.py deleted file mode 100644 index ccb709cb..00000000 --- a/tubesync/tubesync/sqlite3/base.py +++ /dev/null @@ -1,69 +0,0 @@ -import re -from django.db.backends.sqlite3 import base - - -class DatabaseWrapper(base.DatabaseWrapper): - - def _start_transaction_under_autocommit(self): - conn_params = self.get_connection_params() - transaction_modes = frozenset(["DEFERRED", "EXCLUSIVE", "IMMEDIATE"]) - - sql_statement = "BEGIN TRANSACTION" - if "transaction_mode" in conn_params: - tm = str(conn_params["transaction_mode"]).upper().strip() - if tm in transaction_modes: - sql_statement = f"BEGIN {tm} TRANSACTION" - self.cursor().execute(sql_statement) - - - def init_connection_state(self): - conn_params = self.get_connection_params() - if "init_command" in conn_params: - ic = str(conn_params["init_command"]) - cmds = ic.split(';') - with self.cursor() as cursor: - for init_cmd in cmds: - cursor.execute(init_cmd.strip()) - - - def _remove_invalid_keyword_argument(self, e_args, params): - try: - prog = re.compile(r"^(?P['])(?P[^']+)(?P=quote) is an invalid keyword argument for Connection\(\)$") - match = prog.match(str(e_args[0])) - if match is None: - return False - key = match.group('key') - - # remove the invalid keyword argument - del params[key] - - return True - except: - raise - - # It's unlikely that this will ever be reached, however, - # it was left here intentionally, so don't remove it. - return False - - - def get_new_connection(self, conn_params): - filter_map = { - "transaction_mode": ("isolation_level", "DEFERRED"), - } - filtered_params = {k: v for (k,v) in conn_params.items() if k not in filter_map} - filtered_params.update({v[0]: conn_params.get(k, v[1]) for (k,v) in filter_map.items()}) - - attempt = 0 - connection = None - tries = len(filtered_params) - while connection is None and attempt < tries: - attempt += 1 - try: - connection = super().get_new_connection(filtered_params) - except TypeError as e: - if not self._remove_invalid_keyword_argument(e.args, filtered_params): - # This isn't a TypeError we can handle - raise e - return connection - - From c9f4a5dc1757b6ce4ecd5323a1a0b495953dc7fc Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 10:42:30 -0500 Subject: [PATCH 056/417] Use the standard `sqlite3` engine from Django 5.1 --- tubesync/tubesync/local_settings.py.container | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index 3659b289..659fb074 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -44,7 +44,7 @@ if database_dict: else: DATABASES = { 'default': { - 'ENGINE': 'tubesync.sqlite3', + 'ENGINE': 'django.db.backends.sqlite3', 'NAME': CONFIG_BASE_DIR / 'db.sqlite3', "OPTIONS": { "transaction_mode": "IMMEDIATE", From fd423fba88bda3029d8e9283c43976fc38d6c693 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Feb 2025 14:57:53 -0500 Subject: [PATCH 057/417] Check for the cached dictionary first --- tubesync/sync/models.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index a1e2846d..de874687 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1014,26 +1014,26 @@ class Media(models.Model): @property def reduce_data(self): + now = timezone.now() try: data = json.loads(self.metadata or "{}") if '_reduce_data_ran_at' in data.keys(): total_seconds = data['_reduce_data_ran_at'] ran_at = self.posix_epoch + timedelta(seconds=total_seconds) - if (timezone.now() - ran_at) < timedelta(hours=1): + if (now - ran_at) < timedelta(hours=1): return data from common.utils import json_serial compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) filtered_data = filter_response(data, True) - filtered_data['_reduce_data_ran_at'] = round((timezone.now() - posix_epoch).total_seconds()) + filtered_data['_reduce_data_ran_at'] = round((now - self.posix_epoch).total_seconds()) filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) except Exception as e: from common.logger import log log.exception('reduce_data: %s', e) else: from common.logger import log - log.debug(f'reduce_data: running for: {self.source.name} / {self.key}') # log the results of filtering / compacting on metadata size new_mdl = len(compact_json) old_mdl = len(self.metadata or "") @@ -1052,16 +1052,15 @@ class Media(models.Model): @property def loaded_metadata(self): + cached = getattr(self, '_cached_metadata_dict', None) + if cached: + return deepcopy(cached) data = None if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): data = self.reduce_data try: if not data: - cached = getattr(self, '_cached_metadata_dict', None) - if cached: - return deepcopy(cached) - else: - data = json.loads(self.metadata or "{}") + data = json.loads(self.metadata or "{}") if not isinstance(data, dict): return {} setattr(self, '_cached_metadata_dict', data) From 921e053fc32fcd71a302b8a459489997bc4ca996 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 25 Feb 2025 03:34:02 -0500 Subject: [PATCH 058/417] Remove unnecessary `chmod` executions Setting the mode to the same value twice is only wasted effort. --- config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index 4ac5ff8e..a6d8bbae 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -13,12 +13,11 @@ chown -R app:app /config chmod -R 0755 /config chown -R root:app /app chmod -R 0750 /app +chmod 0755 /app/*.py /app/*.sh +find /app -mindepth 2 -type f -execdir chmod 640 '{}' + chown -R app:app /app/common/static -chmod -R 0750 /app/common/static chown -R app:app /app/static -chmod -R 0750 /app/static -find /app -type f ! -iname healthcheck.py -exec chmod 640 {} \; -chmod 0755 /app/healthcheck.py + # Optionally reset the download dir permissions if [ "${TUBESYNC_RESET_DOWNLOAD_DIR:=True}" == "True" ] From fff42cec09d33add878ceeb7c5fa28aa57150514 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 25 Feb 2025 03:36:11 -0500 Subject: [PATCH 059/417] fixup: remove the extra blank line --- config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run | 1 - 1 file changed, 1 deletion(-) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index a6d8bbae..aa234f46 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -18,7 +18,6 @@ find /app -mindepth 2 -type f -execdir chmod 640 '{}' + chown -R app:app /app/common/static chown -R app:app /app/static - # Optionally reset the download dir permissions if [ "${TUBESYNC_RESET_DOWNLOAD_DIR:=True}" == "True" ] then From 533c1631b729a506bcc5eb36a63d6e2099ad71ee Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 25 Feb 2025 04:13:02 -0500 Subject: [PATCH 060/417] Store `Media.downloaded_filesize` for imported files Fixes #69 --- .../sync/management/commands/import-existing-media.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/management/commands/import-existing-media.py b/tubesync/sync/management/commands/import-existing-media.py index 6b723e70..fd6a800d 100644 --- a/tubesync/sync/management/commands/import-existing-media.py +++ b/tubesync/sync/management/commands/import-existing-media.py @@ -16,7 +16,7 @@ class Command(BaseCommand): log.info('Building directory to Source map...') dirmap = {} for s in Source.objects.all(): - dirmap[s.directory_path] = s + dirmap[str(s.directory_path)] = s log.info(f'Scanning sources...') file_extensions = list(FileExtension.values) + self.extra_extensions for sourceroot, source in dirmap.items(): @@ -38,7 +38,8 @@ class Command(BaseCommand): ext = ext.strip().lower() if ext not in file_extensions: continue - on_disk.append(str(rootpath / filename)) + filepath = Path(rootpath / filename).resolve(strict=True) + on_disk.append(str(filepath)) filemap = {} for item in media: for filepath in on_disk: @@ -50,7 +51,8 @@ class Command(BaseCommand): for filepath, item in filemap.items(): log.info(f'Matched on-disk file: {filepath} ' f'to media item: {item.source} / {item}') - item.media_file.name = filepath + item.media_file.name = str(Path(filepath).relative_to(item.media_file.storage.location)) item.downloaded = True + item.downloaded_filesize = Path(filepath).stat().st_size item.save() log.info('Done') From b0ea486302dcdac20829150334ef037efc56fe68 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Feb 2025 07:39:23 -0500 Subject: [PATCH 061/417] Prevent unintended unhealthy restarts When I intentionally set a test container to be down, I really don't want it to be restarted because it was "unhealthy" for too long --- tubesync/healthcheck.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tubesync/healthcheck.py b/tubesync/healthcheck.py index 5bc127b0..0826b7bb 100755 --- a/tubesync/healthcheck.py +++ b/tubesync/healthcheck.py @@ -30,6 +30,9 @@ def do_heatlhcheck(url): if __name__ == '__main__': + # if it is marked as intentionally down, nothing else matters + if os.path.exists('/run/service/gunicorn/down'): + sys.exit(0) try: url = sys.argv[1] except IndexError: From e2dd61af66bd88c18eda1b7a96689e0476a2a510 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Feb 2025 08:43:14 -0500 Subject: [PATCH 062/417] Pass through forwarded Host & Port --- config/root/etc/nginx/nginx.conf | 36 +++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index f09c02e1..1ccc5046 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -50,6 +50,38 @@ http { gzip_http_version 1.1; gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + # X-Forwarded-Host (pass-through, or set) + map $http_x_forwarded_host $x_forwarded_host { + default $http_x_forwarded_host; + "" $http_host; + } + + # X-Forwarded-Proto (pass-through, or set) + map $http_x_forwarded_proto $x_forwarded_proto { + default $http_x_forwarded_proto; + "" $scheme; + } + + # Set the default port based on X-Forwarded-Proto + map $x_forwarded_proto $default_http_port { + default 80; + "https" 443; + } + + # Extract the remote port from the HTTP Host header. + # Uses default_http_port from above, + # when no port was found in the header. + map $http_host $x_remote_port { + default $default_http_port; + "~^[^\:]+:(?

\d+)$" $p; + } + + # X-Forwarded-Port (pass-through, or set) + map $http_x_forwarded_port $x_forwarded_port { + default $http_x_forwarded_port; + "" $x_remote_port; + } + # Site server { @@ -72,7 +104,9 @@ http { location / { proxy_pass http://127.0.0.1:8080; proxy_set_header Host localhost; - proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $x_forwarded_host; + proxy_set_header X-Forwarded-Port $x_forwarded_port; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Real-IP $remote_addr; proxy_redirect off; From beeaafe2464f310104cab3e6393af5bb20e5bc25 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Feb 2025 08:51:09 -0500 Subject: [PATCH 063/417] Use the values from `nginx` for CSRF Origin checks --- tubesync/tubesync/settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index c9332fcd..a9f4061c 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -119,6 +119,8 @@ Disallow: / '''.strip() +USE_X_FORWARDED_HOST = True +USE_X_FORWARDED_PORT = True X_FRAME_OPTIONS = 'SAMEORIGIN' From e04562d8befbd1d60d8f8fb21b7906a7138b8c9b Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Feb 2025 08:59:21 -0500 Subject: [PATCH 064/417] Add the port to the Host header Without this, port 80 may be assumed, which would be incorrect. --- config/root/etc/nginx/nginx.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index 1ccc5046..8a132d34 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -103,7 +103,7 @@ http { # Authentication and proxying location / { proxy_pass http://127.0.0.1:8080; - proxy_set_header Host localhost; + proxy_set_header Host localhost:8080; proxy_set_header X-Forwarded-Host $x_forwarded_host; proxy_set_header X-Forwarded-Port $x_forwarded_port; proxy_set_header X-Forwarded-Proto $x_forwarded_proto; From fd4f917b057a5d5f2275ba8e1b5d57ca1d4d6167 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Feb 2025 10:14:33 -0500 Subject: [PATCH 065/417] Pass days to indexer function --- tubesync/sync/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index de874687..16736a8e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -508,7 +508,10 @@ class Source(models.Model): indexer = self.INDEXERS.get(self.source_type, None) if not callable(indexer): raise Exception(f'Source type f"{self.source_type}" has no indexer') - response = indexer(self.get_index_url(type=type)) + days = None + if self.download_cap_date: + days = timedelta(seconds=self.download_cap).days + response = indexer(self.get_index_url(type=type), days=days) if not isinstance(response, dict): return [] entries = response.get('entries', []) From ddc9588cbf860235e2463ae950783a02384b69e2 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Feb 2025 10:50:00 -0500 Subject: [PATCH 066/417] Set `daterange` for `yt-dlp` when indexing --- tubesync/sync/youtube.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 5d0cf19e..95eebb8a 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -130,12 +130,21 @@ def _subscriber_only(msg='', response=None): return False -def get_media_info(url): +def get_media_info(url, days=None): ''' Extracts information from a YouTube URL and returns it as a dict. For a channel or playlist this returns a dict of all the videos on the channel or playlist as well as associated metadata. ''' + start = None + if days is not None: + try: + days = int(str(days), 10) + except Exception as e: + days = None + start = ( + f'yesterday-{days!s}days' if days else None + ) opts = get_yt_opts() opts.update({ 'ignoreerrors': False, # explicitly set this to catch exceptions @@ -145,6 +154,7 @@ def get_media_info(url): 'logger': log, 'extract_flat': True, 'check_formats': True, + 'daterange': yt_dlp.utils.DateRange(start=start), 'extractor_args': { 'youtube': {'formats': ['missing_pot']}, 'youtubetab': {'approximate_date': ['true']}, From b78075557afae842d1bd375b6ac567c761bd3a83 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 01:32:32 -0500 Subject: [PATCH 067/417] Add `pagination` for scheduled tasks --- tubesync/sync/templates/sync/tasks.html | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 58e37f58..8f137a90 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -74,6 +74,7 @@ {% endfor %} +{% include 'pagination.html' with pagination=sources.paginator filter=source.pk %}

From 2d4e23a8d7cb6d2a8b88a0fc416aa0871e77a985 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 01:58:06 -0500 Subject: [PATCH 068/417] Pagination for scheduled tasks Keep showing running and errors on every page. --- tubesync/sync/views.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index cbba2f4d..16dd1c8a 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -743,6 +743,7 @@ class TasksView(ListView): template_name = 'sync/tasks.html' context_object_name = 'tasks' + paginate_by = settings.TASKS_PER_PAGE messages = { 'reset': _('All tasks have been reset'), } @@ -757,17 +758,24 @@ class TasksView(ListView): return super().dispatch(request, *args, **kwargs) def get_queryset(self): - return Task.objects.all().order_by('run_at') + _ordering = getattr(settings, + 'BACKGROUND_TASK_PRIORITY_ORDERING', + 'DESC' + ) + return Task.objects.all().order_by( + f"{'-' if 'ASC' != _ordering else ''}priority", + 'run_at' + ) def get_context_data(self, *args, **kwargs): data = super().get_context_data(*args, **kwargs) data['message'] = self.message + queryset = self.get_queryset() data['running'] = [] data['errors'] = [] data['scheduled'] = [] - queryset = self.get_queryset() now = timezone.now() - for task in queryset: + for task in queryset.filter(locked_at__isnull=False): # There was broken logic in `Task.objects.locked()`, work around it. # With that broken logic, the tasks never resume properly. # This check unlocks the tasks without a running process. @@ -795,6 +803,21 @@ class TasksView(ListView): data['errors'].append(task) else: data['scheduled'].append(task) + for task in data['tasks']: + obj, url = map_task_to_instance(task) + if not obj: + continue + already_added = ( + task in data['running'] or + task in data['errors'] or + task in data['scheduled'] + ) + if already_added: + continue + setattr(task, 'instance', obj) + setattr(task, 'url', url) + setattr(task, 'run_now', task.run_at < now) + data['scheduled'].append(task) return data From b61061d3c155db4d07e5509677b532c0982f8d7c Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 02:10:20 -0500 Subject: [PATCH 069/417] Keep the rows distinct --- tubesync/sync/templates/sync/tasks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 8f137a90..f0a87493 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -74,8 +74,8 @@ {% endfor %}
-{% include 'pagination.html' with pagination=sources.paginator filter=source.pk %} +{% include 'pagination.html' with pagination=sources.paginator filter=source.pk %}

Completed

From 63f3df1d5ce437ee6dfb345d49b0ad48f589bc77 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 12:45:36 -0500 Subject: [PATCH 070/417] Clean up __pycache__ directories --- Dockerfile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Dockerfile b/Dockerfile index 7376bbb5..bccfca16 100644 --- a/Dockerfile +++ b/Dockerfile @@ -297,6 +297,9 @@ RUN --mount=type=tmpfs,target=/cache \ --mount=type=cache,id=apt-cache-cache,sharing=locked,target=/var/cache/apt \ --mount=type=bind,source=Pipfile,target=/app/Pipfile \ set -x && \ + # Remove __pycache__ directories from image + PYTHONPYCACHEPREFIX=/tmp/pycache && \ + export PYTHONPYCACHEPREFIX && \ # Update from the network and keep cache rm -f /etc/apt/apt.conf.d/docker-clean && \ apt-get update && \ @@ -349,9 +352,14 @@ COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings. RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ + # Remove __pycache__ directories from image + PYTHONPYCACHEPREFIX=/tmp/pycache && \ + export PYTHONPYCACHEPREFIX && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ + # Clean up + rm -v -rf /tmp/* && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ mkdir -v -p /config/media && \ From fcc37100c32d1a8232ff7b5f60b09d1e51c6350b Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 13:44:19 -0500 Subject: [PATCH 071/417] `python -B` is well-behaved Use the `tmpfs` mount on `/cache` instead of `/tmp` to make this faster. --- Dockerfile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index bccfca16..b19174bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -298,7 +298,7 @@ RUN --mount=type=tmpfs,target=/cache \ --mount=type=bind,source=Pipfile,target=/app/Pipfile \ set -x && \ # Remove __pycache__ directories from image - PYTHONPYCACHEPREFIX=/tmp/pycache && \ + PYTHONPYCACHEPREFIX=/cache/pycache && \ export PYTHONPYCACHEPREFIX && \ # Update from the network and keep cache rm -f /etc/apt/apt.conf.d/docker-clean && \ @@ -352,14 +352,9 @@ COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings. RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ - # Remove __pycache__ directories from image - PYTHONPYCACHEPREFIX=/tmp/pycache && \ - export PYTHONPYCACHEPREFIX && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ - # Clean up - rm -v -rf /tmp/* && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ mkdir -v -p /config/media && \ From 0786933c5f6dbc41716eaa6990708a5c5ba80489 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 14:04:29 -0500 Subject: [PATCH 072/417] Tweak the comment --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b19174bd..ffaeaff3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -297,7 +297,7 @@ RUN --mount=type=tmpfs,target=/cache \ --mount=type=cache,id=apt-cache-cache,sharing=locked,target=/var/cache/apt \ --mount=type=bind,source=Pipfile,target=/app/Pipfile \ set -x && \ - # Remove __pycache__ directories from image + # Remove __pycache__ directories from the image PYTHONPYCACHEPREFIX=/cache/pycache && \ export PYTHONPYCACHEPREFIX && \ # Update from the network and keep cache From c81f0854e08636cbd719382731ba87f501c3add8 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 14:10:07 -0500 Subject: [PATCH 073/417] Add `scheduled_total` to the context --- tubesync/sync/views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 16dd1c8a..b0fb13ba 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -774,6 +774,7 @@ class TasksView(ListView): data['running'] = [] data['errors'] = [] data['scheduled'] = [] + data['scheduled_total'] = queryset.filter(locked_at__isnull=True).count() now = timezone.now() for task in queryset.filter(locked_at__isnull=False): # There was broken logic in `Task.objects.locked()`, work around it. From 174d9d46e83046260add289751c6f49c892326f2 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 14:11:15 -0500 Subject: [PATCH 074/417] Use `scheduled_total` instead of scheduled for this page --- tubesync/sync/templates/sync/tasks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index f0a87493..ecb00116 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -56,7 +56,7 @@
-

{{ scheduled|length }} Scheduled

+

{{ scheduled_total }} Scheduled

Tasks which are scheduled to run in the future or are waiting in a queue to be processed. They can be waiting for an available worker to run immediately, or From 4f32e6304747abdbbbd33d3ab03cabe4683210f3 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 14:14:57 -0500 Subject: [PATCH 075/417] Switch to `total_scheduled` in tasks.html --- tubesync/sync/templates/sync/tasks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index ecb00116..de65c787 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -56,7 +56,7 @@

-

{{ scheduled_total }} Scheduled

+

{{ total_scheduled }} Scheduled

Tasks which are scheduled to run in the future or are waiting in a queue to be processed. They can be waiting for an available worker to run immediately, or From b2c1748d622a2607fbabed19292943cf6d122421 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 14:16:11 -0500 Subject: [PATCH 076/417] Switch to `total_scheduled` in views.py --- tubesync/sync/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index b0fb13ba..c421c1cf 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -774,7 +774,7 @@ class TasksView(ListView): data['running'] = [] data['errors'] = [] data['scheduled'] = [] - data['scheduled_total'] = queryset.filter(locked_at__isnull=True).count() + data['total_scheduled'] = queryset.filter(locked_at__isnull=True).count() now = timezone.now() for task in queryset.filter(locked_at__isnull=False): # There was broken logic in `Task.objects.locked()`, work around it. From fdbea30f74cda4c44ac63bf834a8c0c4704c2c8e Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 14:25:45 -0500 Subject: [PATCH 077/417] Improve readability --- tubesync/sync/views.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index c421c1cf..7ce4cdd5 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -758,12 +758,14 @@ class TasksView(ListView): return super().dispatch(request, *args, **kwargs) def get_queryset(self): - _ordering = getattr(settings, + order = getattr(settings, 'BACKGROUND_TASK_PRIORITY_ORDERING', 'DESC' ) + prefix = '-' if 'ASC' != order else '' + _priority = f'{prefix}priority' return Task.objects.all().order_by( - f"{'-' if 'ASC' != _ordering else ''}priority", + _priority, 'run_at' ) From b31e26b964c54537a5fb6f9ccdd154259ab7b4eb Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 15:15:35 -0500 Subject: [PATCH 078/417] Allow filtering tasks by source --- tubesync/sync/views.py | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 7ce4cdd5..86ea4717 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -745,40 +745,61 @@ class TasksView(ListView): context_object_name = 'tasks' paginate_by = settings.TASKS_PER_PAGE messages = { + 'filter': _('Viewing tasks filtered for source: {name}'), 'reset': _('All tasks have been reset'), } def __init__(self, *args, **kwargs): + self.filter_source = None self.message = None super().__init__(*args, **kwargs) def dispatch(self, request, *args, **kwargs): message_key = request.GET.get('message', '') self.message = self.messages.get(message_key, '') + filter_by = request.GET.get('filter', '') + if filter_by: + try: + self.filter_source = Source.objects.get(pk=filter_by) + except Source.DoesNotExist: + self.filter_source = None + if not message_key or 'filter' == message_key: + message = self.messages.get('filter', '') + self.message = message.format( + name=self.filter_source.name + ) + return super().dispatch(request, *args, **kwargs) def get_queryset(self): + qs = Task.objects.all() + if self.filter_source: + qs = qs.filter(queue=str(self.filter_source.pk)) order = getattr(settings, 'BACKGROUND_TASK_PRIORITY_ORDERING', 'DESC' ) prefix = '-' if 'ASC' != order else '' _priority = f'{prefix}priority' - return Task.objects.all().order_by( + return qs.order_by( _priority, 'run_at' ) def get_context_data(self, *args, **kwargs): data = super().get_context_data(*args, **kwargs) + now = timezone.now() + qs = Task.objects.all() + + # Add to context data from ListView data['message'] = self.message - queryset = self.get_queryset() + data['source'] = self.filter_source data['running'] = [] data['errors'] = [] data['scheduled'] = [] - data['total_scheduled'] = queryset.filter(locked_at__isnull=True).count() - now = timezone.now() - for task in queryset.filter(locked_at__isnull=False): + data['total_scheduled'] = qs.filter(locked_at__isnull=True).count() + + for task in qs.filter(locked_at__isnull=False): # There was broken logic in `Task.objects.locked()`, work around it. # With that broken logic, the tasks never resume properly. # This check unlocks the tasks without a running process. @@ -806,6 +827,7 @@ class TasksView(ListView): data['errors'].append(task) else: data['scheduled'].append(task) + for task in data['tasks']: obj, url = map_task_to_instance(task) if not obj: @@ -821,6 +843,7 @@ class TasksView(ListView): setattr(task, 'url', url) setattr(task, 'run_now', task.run_at < now) data['scheduled'].append(task) + return data @@ -850,10 +873,10 @@ class CompletedTasksView(ListView): return super().dispatch(request, *args, **kwargs) def get_queryset(self): - q = CompletedTask.objects.all() + qs = CompletedTask.objects.all() if self.filter_source: - q = q.filter(queue=str(self.filter_source.pk)) - return q.order_by('-run_at') + qs = qs.filter(queue=str(self.filter_source.pk)) + return qs.order_by('-run_at') def get_context_data(self, *args, **kwargs): data = super().get_context_data(*args, **kwargs) @@ -862,11 +885,10 @@ class CompletedTasksView(ListView): error_message = get_error_message(task) setattr(task, 'error_message', error_message) data['message'] = '' - data['source'] = None + data['source'] = self.filter_source if self.filter_source: message = str(self.messages.get('filter', '')) data['message'] = message.format(name=self.filter_source.name) - data['source'] = self.filter_source return data From ff6a858783705bb0587423be28cd23ba5279e8ad Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 15:41:33 -0500 Subject: [PATCH 079/417] Humanize the numbers for each category This is important for scheduled higher than 1,000 mostly. --- tubesync/sync/templates/sync/tasks.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index de65c787..12753ae8 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -1,4 +1,4 @@ -{% extends 'base.html' %} +{% extends 'base.html' %}{% load humanize %} {% block headtitle %}Tasks{% endblock %} @@ -17,7 +17,7 @@ {% include 'infobox.html' with message=message %}

-

{{ running|length }} Running

+

{{ running|length|intcomma }} Running

Running tasks are tasks which currently being worked on right now.

@@ -35,7 +35,7 @@
-

{{ errors|length }} Error{{ errors|length|pluralize }}

+

{{ errors|length|intcomma }} Error{{ errors|length|pluralize }}

Tasks which generated an error are shown here. Tasks are retried a couple of times, so if there was an intermittent error such as a download got interrupted @@ -56,7 +56,7 @@

-

{{ total_scheduled }} Scheduled

+

{{ total_scheduled|intcomma }} Scheduled

Tasks which are scheduled to run in the future or are waiting in a queue to be processed. They can be waiting for an available worker to run immediately, or From a993240b969847925cf5e451599d5d385bfd7704 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 15:50:31 -0500 Subject: [PATCH 080/417] Better display of large numbers --- tubesync/sync/templates/sync/dashboard.html | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/templates/sync/dashboard.html b/tubesync/sync/templates/sync/dashboard.html index f25f36fc..af342800 100644 --- a/tubesync/sync/templates/sync/dashboard.html +++ b/tubesync/sync/templates/sync/dashboard.html @@ -24,9 +24,9 @@

@@ -35,9 +35,9 @@ @@ -46,9 +46,9 @@ From b38237803ae0d3709deadc0ac038ddf853f53c1c Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 16:24:01 -0500 Subject: [PATCH 081/417] Do not lose track of downloaded files --- tubesync/sync/models.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 16736a8e..a3ae6ed7 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1544,14 +1544,19 @@ class Media(models.Model): # move and change names to match stem for other_path in other_paths: + # it should exist, but check anyway + if not other_path.exists(): + continue + old_file_str = other_path.name new_file_str = new_stem + old_file_str[len(old_stem):] new_file_path = Path(new_prefix_path / new_file_str) if new_file_path == other_path: continue log.debug(f'Considering replace for: {self!s}\n\t{other_path!s}\n\t{new_file_path!s}') - # it should exist, but check anyway - if other_path.exists(): + # do not move the file we just updated in the database + # doing that loses track of the `Media.media_file` entirely + if not new_video_path.samefile(other_path): log.debug(f'{self!s}: {other_path!s} => {new_file_path!s}') other_path.replace(new_file_path) From 0703032fcfe84efaf2651447c1efa0bfd065dd78 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 16:51:53 -0500 Subject: [PATCH 082/417] Be more user-friendly when sorting tasks The time first, then the priority, if the tasks are at the same time. --- tubesync/sync/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 86ea4717..66c17595 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -782,8 +782,8 @@ class TasksView(ListView): prefix = '-' if 'ASC' != order else '' _priority = f'{prefix}priority' return qs.order_by( + 'run_at', _priority, - 'run_at' ) def get_context_data(self, *args, **kwargs): From fd0743986b7371a9db4ea034ab9baa8089f46858 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 17:41:59 -0500 Subject: [PATCH 083/417] Restrict `PYTHONPYCACHEPREFIX` to `pipenv` It turns out `apt` and friends don't play nicely with that variable. --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index ffaeaff3..5ee70af1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -297,9 +297,6 @@ RUN --mount=type=tmpfs,target=/cache \ --mount=type=cache,id=apt-cache-cache,sharing=locked,target=/var/cache/apt \ --mount=type=bind,source=Pipfile,target=/app/Pipfile \ set -x && \ - # Remove __pycache__ directories from the image - PYTHONPYCACHEPREFIX=/cache/pycache && \ - export PYTHONPYCACHEPREFIX && \ # Update from the network and keep cache rm -f /etc/apt/apt.conf.d/docker-clean && \ apt-get update && \ @@ -325,6 +322,7 @@ RUN --mount=type=tmpfs,target=/cache \ HOME="/tmp/${HOME#/}" \ XDG_CACHE_HOME='/cache' \ PIPENV_VERBOSITY=64 \ + PYTHONPYCACHEPREFIX=/cache/pycache \ pipenv install --system --skip-lock && \ # Clean up apt-get -y autoremove --purge \ From f41faa444d6c768fb2d5dc8dc3b10edcca656046 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 20:45:21 -0500 Subject: [PATCH 084/417] Rename to a temporary path then final destination Fixes #798 --- tubesync/sync/signals.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index a7ef1b5e..594a82a8 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -34,8 +34,10 @@ def source_pre_save(sender, instance, **kwargs): not new_dirpath.exists() ) if rename_source_directory: + tmp_dirpath = existing_dirpath.parent / ('.tmp.' + existing_dirpath.name) + existing_dirpath.rename(tmp_dirpath) mkdir_p(new_dirpath.parent) - existing_dirpath.rename(new_dirpath) + tmp_dirpath.rename(new_dirpath) recreate_index_source_task = ( existing_source.name != instance.name or existing_source.index_schedule != instance.index_schedule From 8c465f0afe435dd2e36050ed3c2bced8c2af79b1 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 22:51:52 -0500 Subject: [PATCH 085/417] Handle more cases This should now handle: - adding a sub-directory - removing a sub-directory --- tubesync/sync/signals.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 594a82a8..a2ad1cb6 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -1,4 +1,5 @@ from pathlib import Path +from tempfile import TemporaryDirectory from django.conf import settings from django.db.models.signals import pre_save, post_save, pre_delete, post_delete from django.dispatch import receiver @@ -29,15 +30,35 @@ def source_pre_save(sender, instance, **kwargs): return existing_dirpath = existing_source.directory_path.resolve(strict=True) new_dirpath = instance.directory_path.resolve(strict=False) - rename_source_directory = ( - existing_dirpath != new_dirpath and - not new_dirpath.exists() - ) - if rename_source_directory: - tmp_dirpath = existing_dirpath.parent / ('.tmp.' + existing_dirpath.name) - existing_dirpath.rename(tmp_dirpath) - mkdir_p(new_dirpath.parent) - tmp_dirpath.rename(new_dirpath) + if existing_dirpath != new_dirpath: + path_name = lambda p: p.name + relative_dir = existing_source.directory + rd_parents = Path(relative_dir).parents + rd_parents_set = set(map(path_name, rd_parents)) + ad_parents = existing_dirpath.parents + ad_parents_set = set(map(path_name, ad_parents)) + # the names in the relative path are also in the absolute path + parents_count = len(ad_parents_set.intersection(rd_parents_set)) + work_directory = existing_dirpath + while parents_count > 0: + work_directory = work_directory.parent + parents_count -= 1 + with TemporaryDirectory(suffix='.'+new_dirpath.name, prefix='.tmp.', dir=work_directory) as tmp_dir: + tmp_dirpath = Path(tmp_dir) + existed = None + if new_dirpath.exists(): + existed = tmp_dirpath / 'existed' + new_dirpath.rename(existed) + previous = tmp_dirpath / 'previous' + existing_dirpath.rename(previous) + mkdir_p(new_dirpath.parent) + previous.rename(new_dirpath) + if existed and existed.is_dir(): + existed.rename(new_dirpath / '.existed') + # TODO: merge the contents of the directories + pass + elif existed: + existed.rename(new_dirpath / '.existed') recreate_index_source_task = ( existing_source.name != instance.name or existing_source.index_schedule != instance.index_schedule From 844fce3c9073b7e7840c6b297693e9b0cee38489 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Feb 2025 22:59:49 -0500 Subject: [PATCH 086/417] Avoid the while loop --- tubesync/sync/signals.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index a2ad1cb6..f0669f1b 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -40,9 +40,8 @@ def source_pre_save(sender, instance, **kwargs): # the names in the relative path are also in the absolute path parents_count = len(ad_parents_set.intersection(rd_parents_set)) work_directory = existing_dirpath - while parents_count > 0: + for _ in range(parents_count, 0, -1): work_directory = work_directory.parent - parents_count -= 1 with TemporaryDirectory(suffix='.'+new_dirpath.name, prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None From d1f6c8184a89e903d0a3fd8e179679b81d2ce911 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 00:01:50 -0500 Subject: [PATCH 087/417] Merge directories and remove empty directory --- tubesync/sync/signals.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index f0669f1b..11857f95 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -28,6 +28,7 @@ def source_pre_save(sender, instance, **kwargs): except Source.DoesNotExist: log.debug(f'source_pre_save signal: no existing source: {sender} - {instance}') return + existing_dirpath = existing_source.directory_path.resolve(strict=True) new_dirpath = instance.directory_path.resolve(strict=False) if existing_dirpath != new_dirpath: @@ -42,22 +43,34 @@ def source_pre_save(sender, instance, **kwargs): work_directory = existing_dirpath for _ in range(parents_count, 0, -1): work_directory = work_directory.parent - with TemporaryDirectory(suffix='.'+new_dirpath.name, prefix='.tmp.', dir=work_directory) as tmp_dir: + with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None if new_dirpath.exists(): - existed = tmp_dirpath / 'existed' - new_dirpath.rename(existed) - previous = tmp_dirpath / 'previous' - existing_dirpath.rename(previous) + existed = new_dirpath.rename(tmp_dirpath / 'existed') + previous = existing_dirpath.rename(tmp_dirpath / 'previous') mkdir_p(new_dirpath.parent) previous.rename(new_dirpath) + existing_dirpath = previous = None if existed and existed.is_dir(): - existed.rename(new_dirpath / '.existed') - # TODO: merge the contents of the directories - pass + existed = existed.rename(new_dirpath / '.existed') + for entry_path in existed.iterdir(): + try: + target = new_dirpath / entry_path.name + if not target.exists(): + entry_path = entry_path.rename(target) + except Exception as e: + log.exception(e) + try: + existed.rmdir() + except Exception as e: + log.exception(e) elif existed: - existed.rename(new_dirpath / '.existed') + try: + existed = existed.rename(new_dirpath / ('.existed-' + new_dirpath.name)) + except Exception as e: + log.exception(e) + recreate_index_source_task = ( existing_source.name != instance.name or existing_source.index_schedule != instance.index_schedule From 8b611aa71b2df8de31efa2c5a4fd863d9f908f3d Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 00:12:17 -0500 Subject: [PATCH 088/417] There is a `_` already --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 11857f95..508b82b9 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -41,7 +41,7 @@ def source_pre_save(sender, instance, **kwargs): # the names in the relative path are also in the absolute path parents_count = len(ad_parents_set.intersection(rd_parents_set)) work_directory = existing_dirpath - for _ in range(parents_count, 0, -1): + for _count in range(parents_count, 0, -1): work_directory = work_directory.parent with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) From 3411bac78ad27a0f2b67a010991a288d6de86c5c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 02:54:56 -0500 Subject: [PATCH 089/417] Set `Media.download_date` for imported files --- .../management/commands/import-existing-media.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tubesync/sync/management/commands/import-existing-media.py b/tubesync/sync/management/commands/import-existing-media.py index fd6a800d..66f6648c 100644 --- a/tubesync/sync/management/commands/import-existing-media.py +++ b/tubesync/sync/management/commands/import-existing-media.py @@ -1,4 +1,5 @@ import os +from datetime import timedelta from pathlib import Path from django.core.management.base import BaseCommand, CommandError from common.logger import log @@ -54,5 +55,16 @@ class Command(BaseCommand): item.media_file.name = str(Path(filepath).relative_to(item.media_file.storage.location)) item.downloaded = True item.downloaded_filesize = Path(filepath).stat().st_size + # set a reasonable download date + date = item.posix_epoch + timedelta(seconds=Path(filepath).stat().st_mtime) + if item.published and item.published > date: + date = item.published + if item.has_metadata: + metadata_date = item.posix_epoch + timedelta(seconds=item.loaded_metadata.get('epoch', 0)) + if metadata_date and metadata_date > date: + date = metadata_date + if item.download_date and item.download_date > date: + date = item.download_date + item.download_date = date item.save() log.info('Done') From 14c3b99ef18dc79a9b9b8cda567c30be263a40d2 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 04:06:58 -0500 Subject: [PATCH 090/417] Be better about errors during the renaming process --- tubesync/sync/signals.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 508b82b9..9f067467 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -46,12 +46,18 @@ def source_pre_save(sender, instance, **kwargs): with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None - if new_dirpath.exists(): - existed = new_dirpath.rename(tmp_dirpath / 'existed') previous = existing_dirpath.rename(tmp_dirpath / 'previous') - mkdir_p(new_dirpath.parent) - previous.rename(new_dirpath) - existing_dirpath = previous = None + try: + if new_dirpath.exists(): + existed = new_dirpath.rename(tmp_dirpath / 'existed') + mkdir_p(new_dirpath.parent) + previous.rename(new_dirpath) + except Exception: + # try to preserve the directory, if anything went wrong + previous.rename(existing_dirpath) + raise + else: + existing_dirpath = previous = None if existed and existed.is_dir(): existed = existed.rename(new_dirpath / '.existed') for entry_path in existed.iterdir(): From 7ed7527815c59e8a095501a7542495f191671a38 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 07:43:30 -0500 Subject: [PATCH 091/417] Set `Media.skip` to `False` This handles a bad interaction between files that were deleted by the user externally and need to be skipped and files that were temporarily indistinguishable from that case. The source directory changing invalidates the absolute paths, but those should be corrected later. Leaving skip set causes display problems. --- tubesync/sync/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index a3ae6ed7..38c0d332 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1537,7 +1537,8 @@ class Media(models.Model): # update the media_file in the db self.media_file.name = str(new_video_path.relative_to(self.media_file.storage.location)) - self.save() + self.skip = False + self.save(update_fields=('media_file', 'skip')) log.info(f'Updated "media_file" in the database for: {self!s}') (new_prefix_path, new_stem) = directory_and_stem(new_video_path) From bf90a191a8ae1f5f36c4e51d69870132ca3a3dc8 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 08:39:03 -0500 Subject: [PATCH 092/417] Save the existing file to the `media_file` field The computed file path is adjusted for the new source directory location, so use this to update the old path to the new path after a source directory is renamed. --- tubesync/sync/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 38c0d332..4f926deb 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -775,6 +775,11 @@ class Media(models.Model): ) def save(self, force_insert=False, force_update=False, using=None, update_fields=None): + # Correct the path after a source is renamed + if not self.media_file_exists and self.filepath.exists(): + self.media_file.name = str(self.filepath.relative_to(self.media_file.storage.location)) + if update_fields is not None: + update_fields = {'media_file',}.union(update_fields) # Trigger an update of derived fields from metadata if self.metadata: setattr(self, '_cached_metadata_dict', None) From c497a74ebe4e638c13a54e59cc00b158fd0ff5c0 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 08:43:34 -0500 Subject: [PATCH 093/417] Don't skip when either path exists --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 9f067467..c03a4f72 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -241,7 +241,7 @@ def media_post_save(sender, instance, created, **kwargs): ) existing_media_download_task = get_media_download_task(str(instance.pk)) # If the media has not yet been downloaded schedule it to be downloaded - if not (instance.media_file_exists or existing_media_download_task): + if not (instance.media_file_exists or instance.filepath.exists() or existing_media_download_task): # The file was deleted after it was downloaded, skip this media. if instance.can_download and instance.downloaded: skip_changed = True != instance.skip From b6efd9dc5e8f53179aa7b3de76ac927399b2b1ac Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 09:00:36 -0500 Subject: [PATCH 094/417] Handle files that weren't renamed too The filename may be different, do try the old name in the new directory also. --- tubesync/sync/models.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 4f926deb..40ad98af 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -776,10 +776,22 @@ class Media(models.Model): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Correct the path after a source is renamed - if not self.media_file_exists and self.filepath.exists(): - self.media_file.name = str(self.filepath.relative_to(self.media_file.storage.location)) - if update_fields is not None: - update_fields = {'media_file',}.union(update_fields) + if self.downloaded and not self.media_file_exists: + fp_list = list( + self.filepath, + self.filepath.parent / Path(self.media_file.path).name, + ) + for filepath in fp_list: + if filepath.exists(): + self.media_file.name = str( + filepath.relative_to( + self.media_file.storage.location + ) + ) + self.skip = False + if update_fields is not None: + update_fields = {'media_file', 'skip'}.union(update_fields) + break # Trigger an update of derived fields from metadata if self.metadata: setattr(self, '_cached_metadata_dict', None) From 189569cc824816d14eb51b0063f1965a8e750d32 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 09:33:22 -0500 Subject: [PATCH 095/417] Use `media_file` only when it is set --- tubesync/sync/models.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 40ad98af..0cc2de5e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -777,10 +777,9 @@ class Media(models.Model): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Correct the path after a source is renamed if self.downloaded and not self.media_file_exists: - fp_list = list( - self.filepath, - self.filepath.parent / Path(self.media_file.path).name, - ) + fp_list = list(self.filepath) + if self.media_file: + fp_list.append(self.filepath.parent / Path(self.media_file.path).name) for filepath in fp_list: if filepath.exists(): self.media_file.name = str( @@ -791,7 +790,7 @@ class Media(models.Model): self.skip = False if update_fields is not None: update_fields = {'media_file', 'skip'}.union(update_fields) - break + # Trigger an update of derived fields from metadata if self.metadata: setattr(self, '_cached_metadata_dict', None) From b520d75f20726bce99be68d40eb40dde386bb034 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 09:39:54 -0500 Subject: [PATCH 096/417] Fixes from tests --- tubesync/sync/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 0cc2de5e..36417b1f 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -776,8 +776,8 @@ class Media(models.Model): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Correct the path after a source is renamed - if self.downloaded and not self.media_file_exists: - fp_list = list(self.filepath) + if self.created and self.downloaded and not self.media_file_exists: + fp_list = list((self.filepath,)) if self.media_file: fp_list.append(self.filepath.parent / Path(self.media_file.path).name) for filepath in fp_list: From e4a6c54764a1fb20799d005e1df439f809540a9c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 19:15:13 -0500 Subject: [PATCH 097/417] Don't just log an unhandled exception Raising these is important to trigger task failure and rescheduling. --- tubesync/sync/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3c93ed76..a6bd6d84 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -331,7 +331,7 @@ def download_media_metadata(media_id): metadata = media.index_metadata() except YouTubeError as e: e_str = str(e) - log_exception = True + raise_exception = True if ': Premieres in ' in e_str: now = timezone.now() published_datetime = None @@ -367,9 +367,9 @@ def download_media_metadata(media_id): verbose_name=verbose_name.format(media.key, published_datetime.isoformat(' ', 'seconds')), remove_existing_tasks=True, ) - log_exception = False - if log_exception: - log.exception(e) + raise_exception = False + if raise_exception: + raise log.debug(str(e)) return response = metadata From 0416e1148f5fced34460142f7ec8c6d9f585af94 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 23:23:06 -0500 Subject: [PATCH 098/417] Display task errors from the current page --- tubesync/sync/views.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 66c17595..0924ded9 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -796,6 +796,7 @@ class TasksView(ListView): data['source'] = self.filter_source data['running'] = [] data['errors'] = [] + data['total_errors'] = qs.exclude(last_error='').count() data['scheduled'] = [] data['total_scheduled'] = qs.filter(locked_at__isnull=True).count() @@ -842,7 +843,12 @@ class TasksView(ListView): setattr(task, 'instance', obj) setattr(task, 'url', url) setattr(task, 'run_now', task.run_at < now) - data['scheduled'].append(task) + if task.has_error(): + error_message = get_error_message(task) + setattr(task, 'error_message', error_message) + data['errors'].append(task) + else: + data['scheduled'].append(task) return data From 3abb29e9dbf62019c0ba9c10864a61792dd83dce Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 1 Mar 2025 23:31:07 -0500 Subject: [PATCH 099/417] Show total errors and how many are from the current page --- tubesync/sync/templates/sync/tasks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 12753ae8..e18c358e 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -35,7 +35,7 @@
-

{{ errors|length|intcomma }} Error{{ errors|length|pluralize }}

+

{{ total_errors|intcomma }} Total Error{{ total_errors|pluralize }} ({{ errors|length|intcomma }} on this page)

Tasks which generated an error are shown here. Tasks are retried a couple of times, so if there was an intermittent error such as a download got interrupted From e604a5e2426b8c6a4b8d93db3f23947d0c7948dd Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 2 Mar 2025 00:51:35 -0500 Subject: [PATCH 100/417] Reduce logic and adjust queries --- tubesync/sync/views.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 0924ded9..48ce2919 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -796,11 +796,11 @@ class TasksView(ListView): data['source'] = self.filter_source data['running'] = [] data['errors'] = [] - data['total_errors'] = qs.exclude(last_error='').count() + data['total_errors'] = qs.filter(attempts__gt=0, locked_by__isnull=True).count() data['scheduled'] = [] - data['total_scheduled'] = qs.filter(locked_at__isnull=True).count() + data['total_scheduled'] = qs.filter(locked_by__isnull=True).count() - for task in qs.filter(locked_at__isnull=False): + for task in qs.filter(locked_by__isnull=False): # There was broken logic in `Task.objects.locked()`, work around it. # With that broken logic, the tasks never resume properly. # This check unlocks the tasks without a running process. @@ -808,11 +808,15 @@ class TasksView(ListView): # - `True`: locked and PID exists # - `False`: locked and PID does not exist # - `None`: not `locked_by`, so there was no PID to check - if task.locked_by_pid_running() is False: + locked_by_pid_running = task.locked_by_pid_running() + if locked_by_pid_running is False: task.locked_by = None # do not wait for the task to expire task.locked_at = None task.save() + continue + elif not locked_by_pid_running: + continue obj, url = map_task_to_instance(task) if not obj: # Orphaned task, ignore it (it will be deleted when it fires) @@ -820,26 +824,14 @@ class TasksView(ListView): setattr(task, 'instance', obj) setattr(task, 'url', url) setattr(task, 'run_now', task.run_at < now) - if task.locked_by_pid_running(): - data['running'].append(task) - elif task.has_error(): - error_message = get_error_message(task) - setattr(task, 'error_message', error_message) - data['errors'].append(task) - else: - data['scheduled'].append(task) + data['running'].append(task) for task in data['tasks']: + if task in data['running']: + continue obj, url = map_task_to_instance(task) if not obj: continue - already_added = ( - task in data['running'] or - task in data['errors'] or - task in data['scheduled'] - ) - if already_added: - continue setattr(task, 'instance', obj) setattr(task, 'url', url) setattr(task, 'run_now', task.run_at < now) From dc15b2e4ff17a09b59b7edd5d2e3e7da88652ca2 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 2 Mar 2025 01:06:32 -0500 Subject: [PATCH 101/417] Add the page count for scheduled also --- tubesync/sync/templates/sync/tasks.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index e18c358e..9aa61d04 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -49,14 +49,14 @@ Task will be retried at {{ task.run_at|date:'Y-m-d H:i:s' }} {% empty %} - There are no tasks with errors. + There are no tasks with errors on this page. {% endfor %}

-

{{ total_scheduled|intcomma }} Scheduled

+

{{ total_scheduled|intcomma }} Scheduled ({{ scheduled|length|intcomma }} on this page)

Tasks which are scheduled to run in the future or are waiting in a queue to be processed. They can be waiting for an available worker to run immediately, or @@ -70,7 +70,7 @@ Task will run {% if task.run_now %}immediately{% else %}at {{ task.run_at|date:'Y-m-d H:i:s' }}{% endif %} {% empty %} - There are no scheduled tasks. + There are no scheduled tasks on this page. {% endfor %}

From b535f1324ec67b0cb004c60b3cf6f097b3c2d8ea Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 2 Mar 2025 19:45:10 -0500 Subject: [PATCH 102/417] Refactor `TasksView.get_context_data` Pulled out a function for adding attributes to mapped tasks. --- tubesync/sync/views.py | 72 ++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 48ce2919..d15a8df2 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -790,17 +790,33 @@ class TasksView(ListView): data = super().get_context_data(*args, **kwargs) now = timezone.now() qs = Task.objects.all() + errors_qs = qs.filter(attempts__gt=0, locked_by__isnull=True) + running_qs = qs.filter(locked_by__isnull=False) + scheduled_qs = qs.filter(locked_by__isnull=True) # Add to context data from ListView data['message'] = self.message data['source'] = self.filter_source - data['running'] = [] - data['errors'] = [] - data['total_errors'] = qs.filter(attempts__gt=0, locked_by__isnull=True).count() - data['scheduled'] = [] - data['total_scheduled'] = qs.filter(locked_by__isnull=True).count() + data['running'] = list() + data['errors'] = list() + data['total_errors'] = errors_qs.count() + data['scheduled'] = list() + data['total_scheduled'] = scheduled_qs.count() - for task in qs.filter(locked_by__isnull=False): + def add_to_task(task): + obj, url = map_task_to_instance(task) + if not obj: + return False + setattr(task, 'instance', obj) + setattr(task, 'url', url) + setattr(task, 'run_now', task.run_at < now) + if task.has_error(): + error_message = get_error_message(task) + setattr(task, 'error_message', error_message) + return 'error' + return True + + for task in running_qs: # There was broken logic in `Task.objects.locked()`, work around it. # With that broken logic, the tasks never resume properly. # This check unlocks the tasks without a running process. @@ -814,32 +830,32 @@ class TasksView(ListView): # do not wait for the task to expire task.locked_at = None task.save() - continue - elif not locked_by_pid_running: - continue - obj, url = map_task_to_instance(task) - if not obj: - # Orphaned task, ignore it (it will be deleted when it fires) - continue - setattr(task, 'instance', obj) - setattr(task, 'url', url) - setattr(task, 'run_now', task.run_at < now) - data['running'].append(task) + if locked_by_pid_running and add_to_task(task): + data['running'].append(task) + + # show all the errors when they fit on one page + if (data['total_errors'] + len(data['running'])) < self.paginate_by: + for task in errors_qs: + if task in data['running']: + continue + mapped = add_to_task(task) + if 'error' == mapped: + data['errors'].append(task) + elif mapped: + data['scheduled'].append(task) for task in data['tasks']: - if task in data['running']: + already_added = ( + task in data['running'] or + task in data['errors'] or + task in data['scheduled'] + ) + if already_added: continue - obj, url = map_task_to_instance(task) - if not obj: - continue - setattr(task, 'instance', obj) - setattr(task, 'url', url) - setattr(task, 'run_now', task.run_at < now) - if task.has_error(): - error_message = get_error_message(task) - setattr(task, 'error_message', error_message) + mapped = add_to_task(task) + if 'error' == mapped: data['errors'].append(task) - else: + elif mapped: data['scheduled'].append(task) return data From c0904539310e1b02b35a0ff3efbe454a26a79c4f Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 2 Mar 2025 20:10:38 -0500 Subject: [PATCH 103/417] Upgrade `multi_key_sort` Use the Python 3.8 argument list. Accept `key_func` to allow customization. --- tubesync/sync/utils.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 9f599672..917a9531 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -2,7 +2,7 @@ import os import re import math from copy import deepcopy -from operator import itemgetter +from operator import attrgetter, itemgetter from pathlib import Path from tempfile import NamedTemporaryFile import requests @@ -179,10 +179,16 @@ def seconds_to_timestr(seconds): return '{:02d}:{:02d}:{:02d}'.format(hour, minutes, seconds) -def multi_key_sort(sort_dict, specs, use_reversed=False): - result = list(sort_dict) +def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None): + result = list(iterable) + if key_func is None: + # itemgetter is the default + if item or not (item or attr): + key_func = itemgetter + elif attr: + key_func = attrgetter for key, reverse in reversed(specs): - result = sorted(result, key=itemgetter(key), reverse=reverse) + result.sort(key=key_func(key), reverse=reverse) if use_reversed: return list(reversed(result)) return result From f905f4cf771f75ad7df52aa6eb30d9e2115dd1ce Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 2 Mar 2025 20:46:05 -0500 Subject: [PATCH 104/417] Sort for the user in Python --- tubesync/sync/views.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index d15a8df2..e911bff7 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -27,7 +27,7 @@ from .models import Source, Media, MediaServer from .forms import (ValidateSourceForm, ConfirmDeleteSourceForm, RedownloadMediaForm, SkipMediaForm, EnableMediaForm, ResetTasksForm, ConfirmDeleteMediaServerForm) -from .utils import validate_url, delete_file +from .utils import validate_url, delete_file, multi_key_sort from .tasks import (map_task_to_instance, get_error_message, get_source_completed_tasks, get_media_download_task, delete_task_by_media, index_source_task) @@ -782,8 +782,8 @@ class TasksView(ListView): prefix = '-' if 'ASC' != order else '' _priority = f'{prefix}priority' return qs.order_by( - 'run_at', _priority, + 'run_at', ) def get_context_data(self, *args, **kwargs): @@ -858,6 +858,19 @@ class TasksView(ListView): elif mapped: data['scheduled'].append(task) + order = getattr(settings, + 'BACKGROUND_TASK_PRIORITY_ORDERING', + 'DESC' + ) + sort_keys = ( + # key, reverse + ('run_now', False), + ('priority', 'ASC' != order), + ('run_at', False), + ) + data['errors'] = multi_key_sort(data['errors'], sort_keys, attr=True) + data['scheduled'] = multi_key_sort(data['scheduled'], sort_keys, attr=True) + return data From d277619a73717bec079fc50b63c68576819dfbc0 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 08:58:33 -0500 Subject: [PATCH 105/417] Sort immediate tasks at the start --- tubesync/sync/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index e911bff7..cc721ffb 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -864,7 +864,7 @@ class TasksView(ListView): ) sort_keys = ( # key, reverse - ('run_now', False), + ('run_now', True), ('priority', 'ASC' != order), ('run_at', False), ) From f94474093ab2134465416d0aed19ec74e20bfe59 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 09:03:29 -0500 Subject: [PATCH 106/417] Let long running web workers finish --- tubesync/tubesync/gunicorn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/gunicorn.py b/tubesync/tubesync/gunicorn.py index 0058fa65..97e092da 100644 --- a/tubesync/tubesync/gunicorn.py +++ b/tubesync/tubesync/gunicorn.py @@ -23,7 +23,7 @@ def get_bind(): workers = get_num_workers() -timeout = 30 +timeout = 90 chdir = '/app' daemon = False pidfile = '/run/app/gunicorn.pid' From 663dbd48e12f8683bdc98f7cfc5cb91446d14d7c Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 09:10:53 -0500 Subject: [PATCH 107/417] Adjust `nginx` timeout --- config/root/etc/nginx/nginx.conf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index 8a132d34..f9d12ebd 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -103,15 +103,16 @@ http { # Authentication and proxying location / { proxy_pass http://127.0.0.1:8080; - proxy_set_header Host localhost:8080; + proxy_set_header Host localhost:$proxy_port; proxy_set_header X-Forwarded-Host $x_forwarded_host; proxy_set_header X-Forwarded-Port $x_forwarded_port; proxy_set_header X-Forwarded-Proto $x_forwarded_proto; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Real-IP $remote_addr; proxy_redirect off; - proxy_read_timeout 59; - proxy_connect_timeout 10; + # this read timeout should be lower than gunicorn's timeout + proxy_read_timeout 89s; + proxy_connect_timeout 10s; } # File dwnload and streaming From c5e96d6a292d2ed713328643b400570976c1132e Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 09:16:56 -0500 Subject: [PATCH 108/417] Correct inconsistent spacing in nginx.conf --- config/root/etc/nginx/nginx.conf | 70 ++++++++++++++++---------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index f9d12ebd..f8df9cf5 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -6,49 +6,49 @@ worker_cpu_affinity auto; pid /run/nginx.pid; events { - worker_connections 1024; + worker_connections 1024; } http { - # Basic settings - sendfile on; - tcp_nopush on; - tcp_nodelay on; - keepalive_timeout 300; - types_hash_max_size 2048; - server_tokens off; - server_names_hash_bucket_size 64; - server_name_in_redirect off; - client_body_in_file_only clean; - client_body_buffer_size 32K; - client_max_body_size 100M; - send_timeout 300s; - large_client_header_buffers 4 8k; + # Basic settings + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 300; + types_hash_max_size 2048; + server_tokens off; + server_names_hash_bucket_size 64; + server_name_in_redirect off; + client_body_in_file_only clean; + client_body_buffer_size 32K; + client_max_body_size 100M; + send_timeout 300s; + large_client_header_buffers 4 8k; - # Mime type handling - include /etc/nginx/mime.types; - default_type application/octet-stream; + # Mime type handling + include /etc/nginx/mime.types; + default_type application/octet-stream; - # Default security headers - add_header X-Frame-Options SAMEORIGIN; - add_header X-Content-Type-Options nosniff; - add_header X-XSS-Protection "1; mode=block"; + # Default security headers + add_header X-Frame-Options SAMEORIGIN; + add_header X-Content-Type-Options nosniff; + add_header X-XSS-Protection "1; mode=block"; - # Logging - log_format host '$remote_addr - $remote_user [$time_local] "[$host] $request" $status $bytes_sent "$http_referer" "$http_user_agent" "$gzip_ratio"'; - access_log /dev/stdout; - error_log stderr; + # Logging + log_format host '$remote_addr - $remote_user [$time_local] "[$host] $request" $status $bytes_sent "$http_referer" "$http_user_agent" "$gzip_ratio"'; + access_log /dev/stdout; + error_log stderr; - # GZIP - gzip on; - gzip_disable "msie6"; - gzip_vary on; - gzip_proxied any; - gzip_comp_level 6; - gzip_buffers 16 8k; - gzip_http_version 1.1; - gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + # GZIP + gzip on; + gzip_disable "msie6"; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_buffers 16 8k; + gzip_http_version 1.1; + gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; # X-Forwarded-Host (pass-through, or set) map $http_x_forwarded_host $x_forwarded_host { From efbbc7a1789069df589921480b3019ccc5dc9423 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 09:32:00 -0500 Subject: [PATCH 109/417] Keep a copy of long running web responses --- config/root/etc/nginx/nginx.conf | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index f8df9cf5..d278da3e 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -50,6 +50,14 @@ http { gzip_http_version 1.1; gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + # Caching proxy requests + proxy_cache_lock on; + proxy_cache_use_stale updating; + # temporary files in shared memory + proxy_temp_path /dev/shm/nginx/tmp 1; + # change this to /config/cache/nginx for a persistent cache + proxy_cache_path /dev/shm/nginx/cache levels=1:2:2 keys_zone=gunicorn:4m inactive=48h max_size=256m min_free=16m; + # X-Forwarded-Host (pass-through, or set) map $http_x_forwarded_host $x_forwarded_host { default $http_x_forwarded_host; @@ -110,9 +118,14 @@ http { proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Real-IP $remote_addr; proxy_redirect off; + # this read timeout should be lower than gunicorn's timeout proxy_read_timeout 89s; proxy_connect_timeout 10s; + + # cache long running web requests + proxy_cache gunicorn; + proxy_cache_lock_timeout 88s; } # File dwnload and streaming From 72d3ce37b91b96ed0c6b98916687671e17bcaf39 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 09:56:39 -0500 Subject: [PATCH 110/417] fixup: remove unneeded spaces --- config/root/etc/nginx/nginx.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index d278da3e..dbf8628a 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -57,7 +57,7 @@ http { proxy_temp_path /dev/shm/nginx/tmp 1; # change this to /config/cache/nginx for a persistent cache proxy_cache_path /dev/shm/nginx/cache levels=1:2:2 keys_zone=gunicorn:4m inactive=48h max_size=256m min_free=16m; - + # X-Forwarded-Host (pass-through, or set) map $http_x_forwarded_host $x_forwarded_host { default $http_x_forwarded_host; From 623affef4ab13f67276e4e4cfbf29e3c7e08230a Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 10:06:47 -0500 Subject: [PATCH 111/417] Verify `nginx` configuration for the image --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 5ee70af1..851165ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -350,6 +350,8 @@ COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings. RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ + # Check nginx configuration + nginx -t && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ From 09d386040d79f3edb399ba724352c58c467dd868 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 3 Mar 2025 14:09:56 -0500 Subject: [PATCH 112/417] `nginx` only creates one directory level --- config/root/etc/nginx/nginx.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index dbf8628a..e6b99b68 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -54,9 +54,9 @@ http { proxy_cache_lock on; proxy_cache_use_stale updating; # temporary files in shared memory - proxy_temp_path /dev/shm/nginx/tmp 1; + proxy_temp_path /dev/shm/nginx-tmp 1; # change this to /config/cache/nginx for a persistent cache - proxy_cache_path /dev/shm/nginx/cache levels=1:2:2 keys_zone=gunicorn:4m inactive=48h max_size=256m min_free=16m; + proxy_cache_path /dev/shm/nginx-cache levels=1:2:2 keys_zone=gunicorn:4m inactive=48h max_size=256m min_free=16m; # X-Forwarded-Host (pass-through, or set) map $http_x_forwarded_host $x_forwarded_host { From 5a0d0345a8e54386482d9a6da1eccb625a781d4f Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 4 Mar 2025 11:11:40 -0500 Subject: [PATCH 113/417] Bump `ffmpeg` --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 851165ea..95b909ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ # syntax=docker/dockerfile:1 # check=error=true -ARG FFMPEG_DATE="2025-02-18-14-16" -ARG FFMPEG_VERSION="N-118500-g08e37fa082" +ARG FFMPEG_DATE="2025-03-04-15-43" +ARG FFMPEG_VERSION="N-118645-gf76195ff65" ARG S6_VERSION="3.2.0.2" From 2a0555376e0a2b7320163544357cace0e1bea619 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 4 Mar 2025 15:05:45 -0500 Subject: [PATCH 114/417] No more errors for `None` values set in metadata --- tubesync/sync/models.py | 89 ++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 36417b1f..0b1518b1 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -810,6 +810,30 @@ class Media(models.Model): fields = self.METADATA_FIELDS.get(field, {}) return fields.get(self.source.source_type, field) + def get_metadata_first_value(self, iterable, default=None, /): + ''' + fetch the first key with a value from metadata + ''' + + # str is an iterable of characters + # we do not want to look for each character! + if isinstance(iterable, str): + iterable = (iterable,) + for key in tuple(iterable): + # reminder: unmapped fields return the key itself + field = self.get_metadata_field(key) + value = self.loaded_metadata.get(field) + # value can be None because: + # - None was stored at the key + # - the key was not in the dictionary + # either way, we don't want those values + if value is None: + continue + if isinstance(value, str): + return value.strip() + return value + return default + def iter_formats(self): for fmt in self.formats: yield parse_media_format(fmt) @@ -1026,6 +1050,7 @@ class Media(models.Model): 'uploader': self.uploader, } + @property def has_metadata(self): return self.metadata is not None @@ -1087,6 +1112,7 @@ class Media(models.Model): except Exception as e: return {} + @property def refresh_formats(self): data = self.loaded_metadata @@ -1121,6 +1147,7 @@ class Media(models.Model): self.metadata = compact_json return True + @property def url(self): url = self.URLS.get(self.source.source_type, '') @@ -1128,32 +1155,24 @@ class Media(models.Model): @property def description(self): - field = self.get_metadata_field('description') - return self.loaded_metadata.get(field, '').strip() + return self.get_metadata_first_value('description', '') @property def metadata_title(self): - result = '' - for key in ('fulltitle', 'title'): - field = self.get_metadata_field(key) - value = self.loaded_metadata.get(field, '').strip() - if value: - result = value - break - return result + return self.get_metadata_first_value(('fulltitle', 'title',), '') def metadata_published(self, timestamp=None): - published_dt = None if timestamp is None: - field = self.get_metadata_field('timestamp') - timestamp = self.loaded_metadata.get(field, None) + timestamp = self.get_metadata_first_value('timestamp') if timestamp is not None: try: timestamp_float = float(timestamp) - published_dt = self.posix_epoch + timedelta(seconds=timestamp_float) except Exception as e: log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') - return published_dt + pass + else: + return self.posix_epoch + timedelta(seconds=timestamp_float) + return None @property def slugtitle(self): @@ -1162,8 +1181,7 @@ class Media(models.Model): @property def thumbnail(self): - field = self.get_metadata_field('thumbnail') - return self.loaded_metadata.get(field, '').strip() + return self.get_metadata_first_value('thumbnail', '') @property def name(self): @@ -1172,20 +1190,17 @@ class Media(models.Model): @property def upload_date(self): - field = self.get_metadata_field('upload_date') - try: - upload_date_str = self.loaded_metadata.get(field, '').strip() - except (AttributeError, ValueError) as e: - return None + upload_date_str = self.get_metadata_first_value('upload_date', '') try: return datetime.strptime(upload_date_str, '%Y%m%d') except (AttributeError, ValueError) as e: - return None + log.debug(f'Media.upload_date: {self.source} / {self}: strptime: {upload_date_str=}: {e}') + pass + return None @property def metadata_duration(self): - field = self.get_metadata_field('duration') - duration = self.loaded_metadata.get(field, 0) + duration = self.get_metadata_first_value('duration', 0) try: duration = int(duration) except (TypeError, ValueError): @@ -1201,45 +1216,37 @@ class Media(models.Model): @property def categories(self): - field = self.get_metadata_field('categories') - return self.loaded_metadata.get(field, []) + return self.get_metadata_first_value('categories', list()) @property def rating(self): - field = self.get_metadata_field('rating') - return self.loaded_metadata.get(field, 0) + return self.get_metadata_first_value('rating', 0) @property def votes(self): - field = self.get_metadata_field('upvotes') - upvotes = self.loaded_metadata.get(field, 0) + upvotes = self.get_metadata_first_value('upvotes', 0) if not isinstance(upvotes, int): upvotes = 0 - field = self.get_metadata_field('downvotes') - downvotes = self.loaded_metadata.get(field, 0) + downvotes = self.get_metadata_first_value('downvotes', 0) if not isinstance(downvotes, int): downvotes = 0 return upvotes + downvotes @property def age_limit(self): - field = self.get_metadata_field('age_limit') - return self.loaded_metadata.get(field, 0) + return self.get_metadata_first_value('age_limit', 0) @property def uploader(self): - field = self.get_metadata_field('uploader') - return self.loaded_metadata.get(field, '') + return self.get_metadata_first_value('uploader', '') @property def formats(self): - field = self.get_metadata_field('formats') - return self.loaded_metadata.get(field, []) + return self.get_metadata_first_value('formats', list()) @property def playlist_title(self): - field = self.get_metadata_field('playlist_title') - return self.loaded_metadata.get(field, '') + return self.get_metadata_first_value('playlist_title', '') @property def filename(self): From acb74dcc41dccf845d433235835f40e5594924f4 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 4 Mar 2025 15:51:12 -0500 Subject: [PATCH 115/417] Handle the `None` or `''` cases without logs This was happening way more often than I expected. --- tubesync/sync/models.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 0b1518b1..2e802599 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1190,11 +1190,13 @@ class Media(models.Model): @property def upload_date(self): - upload_date_str = self.get_metadata_first_value('upload_date', '') + upload_date_str = self.get_metadata_first_value('upload_date') + if not upload_date_str: + return None try: return datetime.strptime(upload_date_str, '%Y%m%d') except (AttributeError, ValueError) as e: - log.debug(f'Media.upload_date: {self.source} / {self}: strptime: {upload_date_str=}: {e}') + log.debug(f'Media.upload_date: {self.source} / {self}: strptime: {e}') pass return None From a00f3e8d6579ff6f48960a45117502ff0581e558 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 5 Mar 2025 08:51:47 -0500 Subject: [PATCH 116/417] Speed up `arm64` builds This is the dependency that takes so long to compile that it dominated every other part of the build time. It's a ~12 MiB wheel when we compile from the latest source. --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 851165ea..25a9fec6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -275,6 +275,7 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ pipenv \ pkgconf \ python3 \ + python3-libsass \ python3-wheel \ curl \ less \ From ad3ada370db6b554a10ea0d50c20d736f57eb7b6 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 5 Mar 2025 12:35:01 -0500 Subject: [PATCH 117/417] Name the buildx step This allows easy access to the step output variables. --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c1450eae..c1dd9205 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -37,6 +37,7 @@ jobs: - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx + id: buildx uses: docker/setup-buildx-action@v3 - name: Log into GitHub Container Registry run: echo "${{ secrets.REGISTRY_ACCESS_TOKEN }}" | docker login https://ghcr.io -u ${{ github.actor }} --password-stdin From b8503fd9e963fd0d25d1f4a6b8202a6c6c1e24d0 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 5 Mar 2025 16:10:47 -0500 Subject: [PATCH 118/417] =?UTF-8?q?Use`Media.metadata=5Fpublished=E2=80=8E?= =?UTF-8?q?`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It does the same operation, but has logging too. --- tubesync/sync/management/commands/import-existing-media.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/management/commands/import-existing-media.py b/tubesync/sync/management/commands/import-existing-media.py index 66f6648c..00fd5ef1 100644 --- a/tubesync/sync/management/commands/import-existing-media.py +++ b/tubesync/sync/management/commands/import-existing-media.py @@ -1,5 +1,4 @@ import os -from datetime import timedelta from pathlib import Path from django.core.management.base import BaseCommand, CommandError from common.logger import log @@ -56,11 +55,13 @@ class Command(BaseCommand): item.downloaded = True item.downloaded_filesize = Path(filepath).stat().st_size # set a reasonable download date - date = item.posix_epoch + timedelta(seconds=Path(filepath).stat().st_mtime) + date = item.metadata_published‎(Path(filepath).stat().st_mtime) if item.published and item.published > date: date = item.published if item.has_metadata: - metadata_date = item.posix_epoch + timedelta(seconds=item.loaded_metadata.get('epoch', 0)) + # TODO: switch to the newer function when it is merged from PR 807 + # item.get_metadata_first_value('epoch', 0) + metadata_date = item.metadata_published‎(item.loaded_metadata.get('epoch', 0)) if metadata_date and metadata_date > date: date = metadata_date if item.download_date and item.download_date > date: From 461ccf6017afab3d788fd90d1aaafc160dbadb52 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Mar 2025 10:12:44 -0500 Subject: [PATCH 119/417] Removed extra characters Also, use the new function. --- tubesync/sync/management/commands/import-existing-media.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/management/commands/import-existing-media.py b/tubesync/sync/management/commands/import-existing-media.py index 00fd5ef1..7dddc8c4 100644 --- a/tubesync/sync/management/commands/import-existing-media.py +++ b/tubesync/sync/management/commands/import-existing-media.py @@ -55,13 +55,11 @@ class Command(BaseCommand): item.downloaded = True item.downloaded_filesize = Path(filepath).stat().st_size # set a reasonable download date - date = item.metadata_published‎(Path(filepath).stat().st_mtime) + date = item.metadata_published(Path(filepath).stat().st_mtime) if item.published and item.published > date: date = item.published if item.has_metadata: - # TODO: switch to the newer function when it is merged from PR 807 - # item.get_metadata_first_value('epoch', 0) - metadata_date = item.metadata_published‎(item.loaded_metadata.get('epoch', 0)) + metadata_date = item.metadata_published(item.get_metadata_first_value('epoch', 0)) if metadata_date and metadata_date > date: date = metadata_date if item.download_date and item.download_date > date: From dc0e1d7552e76016a5529750a3db78962e8074ef Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 6 Mar 2025 11:06:51 -0500 Subject: [PATCH 120/417] Fix 2160 resolution label --- tubesync/sync/choices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/choices.py b/tubesync/sync/choices.py index f0c6e45a..c67de54b 100644 --- a/tubesync/sync/choices.py +++ b/tubesync/sync/choices.py @@ -132,7 +132,7 @@ class SourceResolution(models.TextChoices): VIDEO_720P = '720p', _('720p (HD)') VIDEO_1080P = '1080p', _('1080p (Full HD)') VIDEO_1440P = '1440p', _('1440p (2K)') - VIDEO_2160P = '2160p', _('4320p (8K)') + VIDEO_2160P = '2160p', _('2160p (4K)') VIDEO_4320P = '4320p', _('4320p (8K)') @classmethod From baec5ad0616789237d1a92c3965808caf4f7e870 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Mar 2025 13:26:22 -0500 Subject: [PATCH 121/417] Filter latest downloads with `download_date IS NOT NULL` --- tubesync/sync/views.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index cc721ffb..99844a39 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -75,7 +75,9 @@ class DashboardView(TemplateView): data['average_bytes_per_media'] = 0 # Latest downloads data['latest_downloads'] = Media.objects.filter( - downloaded=True, downloaded_filesize__isnull=False + downloaded=True, + download_date__isnull=False, + downloaded_filesize__isnull=False, ).defer('metadata').order_by('-download_date')[:10] # Largest downloads data['largest_downloads'] = Media.objects.filter( From 7b9819c18d363f38c8a1c0e81911180081e7c87a Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Mar 2025 21:27:09 -0500 Subject: [PATCH 122/417] Remove useless duplicated assignment --- config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index aa234f46..ff0d4d55 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -1,10 +1,8 @@ #!/command/with-contenv bash # Change runtime user UID and GID -PUID="${PUID:-911}" -PUID="${PUID:-911}" -groupmod -o -g "$PGID" app -usermod -o -u "$PUID" app +groupmod -o -g "${PGID:=911}" app +usermod -o -u "${PUID:=911}" app # Reset permissions chown -R app:app /run/app From 6f349b0f91b756ee49057c695cc8ebe9580364fa Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 7 Mar 2025 23:22:04 -0500 Subject: [PATCH 123/417] Disable proxies for health checking --- tubesync/healthcheck.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/healthcheck.py b/tubesync/healthcheck.py index 0826b7bb..5cdc63ed 100755 --- a/tubesync/healthcheck.py +++ b/tubesync/healthcheck.py @@ -18,6 +18,8 @@ import requests TIMEOUT = 5 # Seconds HTTP_USER = os.getenv('HTTP_USER') HTTP_PASS = os.getenv('HTTP_PASS') +# never use proxy for healthcheck requests +os.environ['no_proxy'] = '*' def do_heatlhcheck(url): From d20e9956665e84393e72ce58582eaedd0867c76f Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 07:25:18 -0500 Subject: [PATCH 124/417] Create check_thumbnails.py Patch to use `check_thumbnails` instead of `check_formats` to mean test downloading every possible thumbnail URL. --- patches/yt_dlp/patches/check_thumbnails.py | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 patches/yt_dlp/patches/check_thumbnails.py diff --git a/patches/yt_dlp/patches/check_thumbnails.py b/patches/yt_dlp/patches/check_thumbnails.py new file mode 100644 index 00000000..5c4f8095 --- /dev/null +++ b/patches/yt_dlp/patches/check_thumbnails.py @@ -0,0 +1,42 @@ +from yt_dlp import YoutubeDL + +class PatchedYoutubeDL(YoutubeDL): + + def _sanitize_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] + if not thumbnails: + return + + + def check_thumbnails(thumbnails): + for t in thumbnails: + self.to_screen(f'[info] Testing thumbnail {t["id"]}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + continue + yield t + + + self._sort_thumbnails(thumbnails) + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = str(i) + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) + + + if self.params.get('check_thumbnails') is True: + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) + else: + info_dict['thumbnails'] = thumbnails + + +YoutubeDL.__unpatched___sanitize_thumbnails = YoutubeDL._sanitize_thumbnails +YoutubeDL._sanitize_thumbnails = PatchedYoutubeDL._sanitize_thumbnails From 5f6852049692cfbea437d38b4b05773f28a2695b Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 07:29:00 -0500 Subject: [PATCH 125/417] Use the new patch --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 95eebb8a..b3d6cbbf 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -17,6 +17,7 @@ from django.conf import settings from .hooks import postprocessor_hook, progress_hook from .utils import mkdir_p import yt_dlp +import yt_dlp.patches.check_thumbnails from yt_dlp.utils import remove_end From b072b314d28bf676b08abc2b1555ceeedee39072 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 07:40:07 -0500 Subject: [PATCH 126/417] Create __init__.py --- patches/yt_dlp/patches/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 patches/yt_dlp/patches/__init__.py diff --git a/patches/yt_dlp/patches/__init__.py b/patches/yt_dlp/patches/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/patches/yt_dlp/patches/__init__.py @@ -0,0 +1 @@ + From 2e12737583e43f1881d36a1f78c51cca201c56f1 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 07:54:27 -0500 Subject: [PATCH 127/417] Copy `patches/yt_dlp` for tests --- .github/workflows/ci.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c1dd9205..ce7acbb8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,7 +28,9 @@ jobs: pip install pipenv pipenv install --system --skip-lock - name: Set up Django environment - run: cp tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py + run: | + cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py + cp -v -a -t /usr/local/lib/python3.*/dist-packages/yt_dlp/ tubesync/patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 containerise: From 1d1cb6dc1495a6f01e03b33c8145eee9512bba95 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 08:00:22 -0500 Subject: [PATCH 128/417] Use `Python3_ROOT_DIR` environment variable --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ce7acbb8..6f562686 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,7 +30,7 @@ jobs: - name: Set up Django environment run: | cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py - cp -v -a -t /usr/local/lib/python3.*/dist-packages/yt_dlp/ tubesync/patches/yt_dlp/* + cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/dist-packages/yt_dlp/ tubesync/patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 containerise: From ca904f37d3e8e9095984b2498654d42d75405763 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 08:10:34 -0500 Subject: [PATCH 129/417] Show me where `yt_dlp` is loaded from --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6f562686..a92016da 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,6 +30,7 @@ jobs: - name: Set up Django environment run: | cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py + python -v -m yt_dlp 2>&1| grep ^Adding cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/dist-packages/yt_dlp/ tubesync/patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 From af7d6292af196e3af19440ef7f04c384be812cc5 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 08:16:55 -0500 Subject: [PATCH 130/417] Find should work for old versions too --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a92016da..72be3e4e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,8 +30,8 @@ jobs: - name: Set up Django environment run: | cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py - python -v -m yt_dlp 2>&1| grep ^Adding - cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/dist-packages/yt_dlp/ tubesync/patches/yt_dlp/* + find /usr /opt -name yt_dlp -type d -print + cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ tubesync/patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 containerise: From 629ff5cfc81317a20a073a847ddb3cc08a185b18 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 08:23:38 -0500 Subject: [PATCH 131/417] Use the correct source path --- .github/workflows/ci.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 72be3e4e..6068cab1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,8 +30,7 @@ jobs: - name: Set up Django environment run: | cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py - find /usr /opt -name yt_dlp -type d -print - cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ tubesync/patches/yt_dlp/* + cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 containerise: From cffe8348c3d20523cec186ccbb343dca3d91a5c4 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 09:26:40 -0500 Subject: [PATCH 132/417] Passthrough module for `patch` --- patches/yt_dlp/patches/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/patches/yt_dlp/patches/__init__.py b/patches/yt_dlp/patches/__init__.py index 8b137891..f2d40a97 100644 --- a/patches/yt_dlp/patches/__init__.py +++ b/patches/yt_dlp/patches/__init__.py @@ -1 +1,5 @@ +from yt_dlp.compat.compat_utils import passthrough_module + +passthrough_module(__name__, '.patch') +del passthrough_module From a8fd6ee00beebf79573f57cedde8eaa892ad4d47 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 09:28:24 -0500 Subject: [PATCH 133/417] Adjust import --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index b3d6cbbf..edcb3c0e 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -17,7 +17,7 @@ from django.conf import settings from .hooks import postprocessor_hook, progress_hook from .utils import mkdir_p import yt_dlp -import yt_dlp.patches.check_thumbnails +import yt_dlp.patch.check_thumbnails from yt_dlp.utils import remove_end From 19c301ad76db2bc3f18e1bb05c2d7ee01b1d762c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 10:08:32 -0500 Subject: [PATCH 134/417] Rename patches to patch --- patches/yt_dlp/{patches => patch}/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename patches/yt_dlp/{patches => patch}/__init__.py (100%) diff --git a/patches/yt_dlp/patches/__init__.py b/patches/yt_dlp/patch/__init__.py similarity index 100% rename from patches/yt_dlp/patches/__init__.py rename to patches/yt_dlp/patch/__init__.py From 4f9e0bf949b3f52db72d614b7eacf83535eb37d0 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 10:09:26 -0500 Subject: [PATCH 135/417] Rename patches to patch --- patches/yt_dlp/{patches => patch}/check_thumbnails.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename patches/yt_dlp/{patches => patch}/check_thumbnails.py (100%) diff --git a/patches/yt_dlp/patches/check_thumbnails.py b/patches/yt_dlp/patch/check_thumbnails.py similarity index 100% rename from patches/yt_dlp/patches/check_thumbnails.py rename to patches/yt_dlp/patch/check_thumbnails.py From b553443255e7788ad8e635835333b8fff0ec09cf Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 10:31:31 -0500 Subject: [PATCH 136/417] Copy patches before trying to use them --- Dockerfile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 95b909ce..99003e01 100644 --- a/Dockerfile +++ b/Dockerfile @@ -346,6 +346,14 @@ RUN --mount=type=tmpfs,target=/cache \ COPY tubesync /app COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings.py +# patch background_task +COPY patches/background_task/ \ + /usr/local/lib/python3/dist-packages/background_task/ + +# patch yt_dlp +COPY patches/yt_dlp/ \ + /usr/local/lib/python3/dist-packages/yt_dlp/ + # Build app RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database @@ -371,14 +379,6 @@ RUN set -x && \ # Copy root COPY config/root / -# patch background_task -COPY patches/background_task/ \ - /usr/local/lib/python3/dist-packages/background_task/ - -# patch yt_dlp -COPY patches/yt_dlp/ \ - /usr/local/lib/python3/dist-packages/yt_dlp/ - # Create a healthcheck HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"] From 7c0891c70370ba3b58b8cb3a8d5286308144a6e0 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 10:45:28 -0500 Subject: [PATCH 137/417] Link to the `python3` version immediately after installing it --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 99003e01..2343d962 100644 --- a/Dockerfile +++ b/Dockerfile @@ -279,6 +279,8 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ curl \ less \ && \ + # Link to the current python3 version + ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ # Clean up apt-get -y autopurge && \ apt-get -y autoclean && \ @@ -369,8 +371,6 @@ RUN set -x && \ mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ - # Link to the current python3 version - ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ # Append software versions ffmpeg_version=$(/usr/local/bin/ffmpeg -version | awk -v 'ev=31' '1 == NR && "ffmpeg" == $1 { print $3; ev=0; } END { exit ev; }') && \ test -n "${ffmpeg_version}" && \ From c2aa9a4b9f358feadda73418c66720175d58c282 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 11:07:13 -0500 Subject: [PATCH 138/417] Explicitly turn off checking of thumbnails --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index edcb3c0e..483142f3 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -155,6 +155,7 @@ def get_media_info(url, days=None): 'logger': log, 'extract_flat': True, 'check_formats': True, + 'check_thumbnails': False, 'daterange': yt_dlp.utils.DateRange(start=start), 'extractor_args': { 'youtube': {'formats': ['missing_pot']}, From 01a9f07d5896e40b7856969cd882d23fd0c5cac7 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 11:59:44 -0500 Subject: [PATCH 139/417] Import missing functions from `yt_dlp.utils` --- patches/yt_dlp/patch/check_thumbnails.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/patches/yt_dlp/patch/check_thumbnails.py b/patches/yt_dlp/patch/check_thumbnails.py index 5c4f8095..25723bb6 100644 --- a/patches/yt_dlp/patch/check_thumbnails.py +++ b/patches/yt_dlp/patch/check_thumbnails.py @@ -1,4 +1,5 @@ from yt_dlp import YoutubeDL +from yt_dlp.utils import sanitize_url, LazyList class PatchedYoutubeDL(YoutubeDL): @@ -14,7 +15,7 @@ class PatchedYoutubeDL(YoutubeDL): def check_thumbnails(thumbnails): for t in thumbnails: - self.to_screen(f'[info] Testing thumbnail {t["id"]}') + self.to_screen(f'[info] Testing thumbnail {t["id"]}: {t["url"]!r}') try: self.urlopen(HEADRequest(t['url'])) except network_exceptions as err: From 5cd372c92d5965f28dd41ddc423193d6fd6462fc Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 8 Mar 2025 17:04:11 -0500 Subject: [PATCH 140/417] Do not try to refresh formats when no metadata exists --- tubesync/sync/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a6bd6d84..fdc954a3 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -597,6 +597,7 @@ def save_all_media_for_source(source_id): skip=False, manual_skip=False, downloaded=False, + metadata__isnull=False, ) for media in refresh_qs: try: From 3c94e5a0b33c113c02a5bf69a8c4f7427af8f92d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 06:56:39 -0400 Subject: [PATCH 141/417] Add and use `getenv` `os.getenv` makes no guarantees about the return type for default values. --- tubesync/tubesync/local_settings.py.container | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index 4b73b7d7..d1021cd9 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -5,24 +5,49 @@ from urllib.parse import urljoin from common.utils import parse_database_connection_string +def getenv(key, default=None, /, *, string=True, integer=False): + ''' + Calls `os.getenv` and guarantees that a string is returned + ''' + + unsupported_type_msg = 'Unsupported type for positional argument, "{}": {}' + assert isinstance(key, (str,)), unsupported_type_msg.format('key', type(key)) + assert isinstance(default, (str, bool, float, int, None.__class__,)), unsupported_type_msg.format('default', type(default)) + + d = default + k = key + if default is not None: + d = str(default) + import os # just in case it wasn't already imported + + r = os.getenv(k, d) + if r is None: + if string: r = str() + if integer: r = int() + elif integer: + r = int(float(r)) + return r + + BASE_DIR = Path(__file__).resolve().parent.parent ROOT_DIR = Path('/') CONFIG_BASE_DIR = ROOT_DIR / 'config' DOWNLOADS_BASE_DIR = ROOT_DIR / 'downloads' -DJANGO_URL_PREFIX = os.getenv('DJANGO_URL_PREFIX', None) -STATIC_URL = str(os.getenv('DJANGO_STATIC_URL', '/static/')) +DJANGO_URL_PREFIX = getenv('DJANGO_URL_PREFIX', str()).strip() +STATIC_URL = getenv('DJANGO_STATIC_URL', '/static/').strip() if DJANGO_URL_PREFIX and STATIC_URL: STATIC_URL = urljoin(DJANGO_URL_PREFIX, STATIC_URL[1:]) # This is not ever meant to be a public web interface so this isn't too critical -SECRET_KEY = str(os.getenv('DJANGO_SECRET_KEY', 'tubesync-django-secret')) +SECRET_KEY = getenv('DJANGO_SECRET_KEY', 'tubesync-django-secret') -ALLOWED_HOSTS_STR = str(os.getenv('TUBESYNC_HOSTS', '*')) +ALLOWED_HOSTS_STR = getenv('TUBESYNC_HOSTS', '*') ALLOWED_HOSTS = ALLOWED_HOSTS_STR.split(',') -DEBUG = True if os.getenv('TUBESYNC_DEBUG', False) else False -FORCE_SCRIPT_NAME = os.getenv('DJANGO_FORCE_SCRIPT_NAME', DJANGO_URL_PREFIX) +DEBUG_STR = getenv('TUBESYNC_DEBUG', False) +DEBUG = True if 'true' == DEBUG_STR.strip().lower() else False +FORCE_SCRIPT_NAME = getenv('DJANGO_FORCE_SCRIPT_NAME', DJANGO_URL_PREFIX) database_dict = {} @@ -34,7 +59,8 @@ if database_connection_env: if database_dict: print(f'Using database connection: {database_dict["ENGINE"]}://' f'{database_dict["USER"]}:[hidden]@{database_dict["HOST"]}:' - f'{database_dict["PORT"]}/{database_dict["NAME"]}', file=sys.stdout) + f'{database_dict["PORT"]}/{database_dict["NAME"]}', + file=sys.stdout, flush=True) DATABASES = { 'default': database_dict, } @@ -60,7 +86,7 @@ else: DEFAULT_THREADS = 1 -BACKGROUND_TASK_ASYNC_THREADS = int(os.getenv('TUBESYNC_WORKERS', DEFAULT_THREADS)) +BACKGROUND_TASK_ASYNC_THREADS = getenv('TUBESYNC_WORKERS', DEFAULT_THREADS, integer=True) MEDIA_ROOT = CONFIG_BASE_DIR / 'media' From c2653b76a92081698ff23b8073a9d8e21f07ba06 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 09:34:36 -0400 Subject: [PATCH 142/417] Add `getenv` to `common.utils` --- tubesync/common/utils.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 95efd9f3..007f3f0d 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -1,3 +1,4 @@ +import os import string from datetime import datetime from urllib.parse import urlunsplit, urlencode, urlparse @@ -6,6 +7,44 @@ from yt_dlp.utils import LazyList from .errors import DatabaseConnectionError +def getenv(key, default=None, /, *, integer=False, string=True): + ''' + Guarantees a returned type from calling `os.getenv` + The caller can request the integer type, + or use the default string type. + ''' + + args = dict(key=key, default=default, integer=integer, string=string) + supported_types = dict(zip(args.keys(), ( + (str,), # key + ( + bool, + float, + int, + str, + None.__class__, + ), # default + (bool,) * (len(args.keys()) - 2), + ))) + unsupported_type_msg = 'Unsupported type for positional argument, "{}": {}' + for k, t in supported_types.items(): + v = args[k] + assert isinstance(v, t), unsupported_type_msg.format(k, type(v)) + + d = str(default) if default is not None else None + + # just in case `os` wasn't already imported + import os + + r = os.getenv(key, d) + if r is None: + if string: r = str() + if integer: r = int() + elif integer: + r = int(float(r)) + return r + + def parse_database_connection_string(database_connection_string): ''' Parses a connection string in a URL style format, such as: From 7315cb985398dc2c4a375d44649ef698148b440f Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 09:37:11 -0400 Subject: [PATCH 143/417] Remove `getenv` from local_settings.py.container --- tubesync/tubesync/local_settings.py.container | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index d1021cd9..c2986ac2 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -2,31 +2,7 @@ import os import sys from pathlib import Path from urllib.parse import urljoin -from common.utils import parse_database_connection_string - - -def getenv(key, default=None, /, *, string=True, integer=False): - ''' - Calls `os.getenv` and guarantees that a string is returned - ''' - - unsupported_type_msg = 'Unsupported type for positional argument, "{}": {}' - assert isinstance(key, (str,)), unsupported_type_msg.format('key', type(key)) - assert isinstance(default, (str, bool, float, int, None.__class__,)), unsupported_type_msg.format('default', type(default)) - - d = default - k = key - if default is not None: - d = str(default) - import os # just in case it wasn't already imported - - r = os.getenv(k, d) - if r is None: - if string: r = str() - if integer: r = int() - elif integer: - r = int(float(r)) - return r +from common.utils import getenv, parse_database_connection_string BASE_DIR = Path(__file__).resolve().parent.parent From eabcb36aaa58e2f56efb83023bba352ba04b73c6 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 09:43:01 -0400 Subject: [PATCH 144/417] Switch to `common.utils.getenv` in settings.py --- tubesync/tubesync/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index a9f4061c..ff88a669 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -1,5 +1,5 @@ -import os from pathlib import Path +from common.utils import getenv BASE_DIR = Path(__file__).resolve().parent.parent @@ -97,7 +97,7 @@ AUTH_PASSWORD_VALIDATORS = [ LANGUAGE_CODE = 'en-us' -TIME_ZONE = os.getenv('TZ', 'UTC') +TIME_ZONE = getenv('TZ', 'UTC') USE_I18N = True USE_L10N = True USE_TZ = True From e5c0abbdca62a7dc4449d0daf93ec6ec177ca97d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 09:45:46 -0400 Subject: [PATCH 145/417] `os` was imported --- tubesync/common/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 007f3f0d..acb55561 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -33,9 +33,6 @@ def getenv(key, default=None, /, *, integer=False, string=True): d = str(default) if default is not None else None - # just in case `os` wasn't already imported - import os - r = os.getenv(key, d) if r is None: if string: r = str() From c0115c0431ab22f05f475fc8d1d495804dbc6606 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 10:38:04 -0400 Subject: [PATCH 146/417] Update local_settings.py.container --- tubesync/tubesync/local_settings.py.container | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index c2986ac2..629bb5ff 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -1,4 +1,3 @@ -import os import sys from pathlib import Path from urllib.parse import urljoin @@ -9,7 +8,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent ROOT_DIR = Path('/') CONFIG_BASE_DIR = ROOT_DIR / 'config' DOWNLOADS_BASE_DIR = ROOT_DIR / 'downloads' -DJANGO_URL_PREFIX = getenv('DJANGO_URL_PREFIX', str()).strip() +DJANGO_URL_PREFIX = getenv('DJANGO_URL_PREFIX').strip() STATIC_URL = getenv('DJANGO_STATIC_URL', '/static/').strip() if DJANGO_URL_PREFIX and STATIC_URL: STATIC_URL = urljoin(DJANGO_URL_PREFIX, STATIC_URL[1:]) @@ -27,7 +26,7 @@ FORCE_SCRIPT_NAME = getenv('DJANGO_FORCE_SCRIPT_NAME', DJANGO_URL_PREFIX) database_dict = {} -database_connection_env = os.getenv('DATABASE_CONNECTION', '') +database_connection_env = getenv('DATABASE_CONNECTION') if database_connection_env: database_dict = parse_database_connection_string(database_connection_env) @@ -72,14 +71,14 @@ YOUTUBE_DL_TEMPDIR = DOWNLOAD_ROOT / 'cache' COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt' -HEALTHCHECK_FIREWALL_STR = str(os.getenv('TUBESYNC_HEALTHCHECK_FIREWALL', 'True')).strip().lower() -HEALTHCHECK_FIREWALL = True if HEALTHCHECK_FIREWALL_STR == 'true' else False -HEALTHCHECK_ALLOWED_IPS_STR = str(os.getenv('TUBESYNC_HEALTHCHECK_ALLOWED_IPS', '127.0.0.1')) +HEALTHCHECK_FIREWALL_STR = getenv('TUBESYNC_HEALTHCHECK_FIREWALL', True) +HEALTHCHECK_FIREWALL = ( 'true' == HEALTHCHECK_FIREWALL_STR.strip().lower() ) +HEALTHCHECK_ALLOWED_IPS_STR = getenv('TUBESYNC_HEALTHCHECK_ALLOWED_IPS', '127.0.0.1') HEALTHCHECK_ALLOWED_IPS = HEALTHCHECK_ALLOWED_IPS_STR.split(',') -BASICAUTH_USERNAME = os.getenv('HTTP_USER', '').strip() -BASICAUTH_PASSWORD = os.getenv('HTTP_PASS', '').strip() +BASICAUTH_USERNAME = getenv('HTTP_USER').strip() +BASICAUTH_PASSWORD = getenv('HTTP_PASS').strip() if BASICAUTH_USERNAME and BASICAUTH_PASSWORD: BASICAUTH_DISABLE = False BASICAUTH_USERS = { @@ -90,25 +89,25 @@ else: BASICAUTH_USERS = {} -SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR = os.getenv('TUBESYNC_DIRECTORY_PREFIX', 'True').strip().lower() -SOURCE_DOWNLOAD_DIRECTORY_PREFIX = True if SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR == 'true' else False +SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR = getenv('TUBESYNC_DIRECTORY_PREFIX', True) +SOURCE_DOWNLOAD_DIRECTORY_PREFIX = ( 'true' == SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR.strip().lower() ) -SHRINK_NEW_MEDIA_METADATA_STR = os.getenv('TUBESYNC_SHRINK_NEW', 'false').strip().lower() -SHRINK_NEW_MEDIA_METADATA = ( 'true' == SHRINK_NEW_MEDIA_METADATA_STR ) -SHRINK_OLD_MEDIA_METADATA_STR = os.getenv('TUBESYNC_SHRINK_OLD', 'false').strip().lower() -SHRINK_OLD_MEDIA_METADATA = ( 'true' == SHRINK_OLD_MEDIA_METADATA_STR ) +SHRINK_NEW_MEDIA_METADATA_STR = getenv('TUBESYNC_SHRINK_NEW', False) +SHRINK_NEW_MEDIA_METADATA = ( 'true' == SHRINK_NEW_MEDIA_METADATA_STR.strip().lower() ) +SHRINK_OLD_MEDIA_METADATA_STR = getenv('TUBESYNC_SHRINK_OLD', False) +SHRINK_OLD_MEDIA_METADATA = ( 'true' == SHRINK_OLD_MEDIA_METADATA_STR.strip().lower() ) # TUBESYNC_RENAME_ALL_SOURCES: True or False -RENAME_ALL_SOURCES_STR = os.getenv('TUBESYNC_RENAME_ALL_SOURCES', 'False').strip().lower() -RENAME_ALL_SOURCES = ( 'true' == RENAME_ALL_SOURCES_STR ) +RENAME_ALL_SOURCES_STR = getenv('TUBESYNC_RENAME_ALL_SOURCES', False) +RENAME_ALL_SOURCES = ( 'true' == RENAME_ALL_SOURCES_STR.strip().lower() ) # TUBESYNC_RENAME_SOURCES: A comma-separated list of Source directories -RENAME_SOURCES_STR = os.getenv('TUBESYNC_RENAME_SOURCES', '') +RENAME_SOURCES_STR = getenv('TUBESYNC_RENAME_SOURCES') RENAME_SOURCES = RENAME_SOURCES_STR.split(',') if RENAME_SOURCES_STR else None -VIDEO_HEIGHT_CUTOFF = int(os.getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", "240")) +VIDEO_HEIGHT_CUTOFF = getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", 240, integer=True) # ensure that the current directory exists @@ -119,4 +118,11 @@ old_youtube_cache_dirs = list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube-*')) old_youtube_cache_dirs.extend(list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube/youtube-*'))) for cache_dir in old_youtube_cache_dirs: cache_dir.rename(YOUTUBE_DL_CACHEDIR / cache_dir.name) +# try to remove the old, hopefully empty, directory +empty_old_youtube_dir = YOUTUBE_DL_CACHEDIR.parent / 'youtube' +if empty_old_youtube_dir.is_dir(): + try: + empty_old_youtube_dir.rmdir() + except: + pass From 3a6c3170745c6ac0f83e449199ed49f2d1971cc3 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 14:25:30 -0400 Subject: [PATCH 147/417] Update 0028_alter_source_source_resolution.py Match this to the label change in dc0e1d7552e76016a5529750a3db78962e8074ef. --- tubesync/sync/migrations/0028_alter_source_source_resolution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/migrations/0028_alter_source_source_resolution.py b/tubesync/sync/migrations/0028_alter_source_source_resolution.py index d3535892..e72f7307 100644 --- a/tubesync/sync/migrations/0028_alter_source_source_resolution.py +++ b/tubesync/sync/migrations/0028_alter_source_source_resolution.py @@ -11,7 +11,7 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='source', name='source_resolution', - field=models.CharField(choices=[('audio', 'Audio only'), ('360p', '360p (SD)'), ('480p', '480p (SD)'), ('720p', '720p (HD)'), ('1080p', '1080p (Full HD)'), ('1440p', '1440p (2K)'), ('2160p', '4320p (8K)'), ('4320p', '4320p (8K)')], db_index=True, default='1080p', help_text='Source resolution, desired video resolution to download', max_length=8, verbose_name='source resolution'), + field=models.CharField(choices=[('audio', 'Audio only'), ('360p', '360p (SD)'), ('480p', '480p (SD)'), ('720p', '720p (HD)'), ('1080p', '1080p (Full HD)'), ('1440p', '1440p (2K)'), ('2160p', '2160p (4K)'), ('4320p', '4320p (8K)')], db_index=True, default='1080p', help_text='Source resolution, desired video resolution to download', max_length=8, verbose_name='source resolution'), ), ] From be71f8cc10d4cc54f973590c50080ea1e58ce338 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 15:42:18 -0400 Subject: [PATCH 148/417] Display the shorter engine instead of driver --- tubesync/tubesync/local_settings.py.container | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index 629bb5ff..cc20f73b 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -32,7 +32,7 @@ if database_connection_env: if database_dict: - print(f'Using database connection: {database_dict["ENGINE"]}://' + print(f'Using database connection: {database_dict["DRIVER"]}://' f'{database_dict["USER"]}:[hidden]@{database_dict["HOST"]}:' f'{database_dict["PORT"]}/{database_dict["NAME"]}', file=sys.stdout, flush=True) From 9893383e476261568b1c67f8bfbbbf20a0e4c084 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 17:55:15 -0400 Subject: [PATCH 149/417] Tweak tasks ordering --- tubesync/sync/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 99844a39..3e1470b2 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -866,9 +866,9 @@ class TasksView(ListView): ) sort_keys = ( # key, reverse - ('run_now', True), - ('priority', 'ASC' != order), ('run_at', False), + ('priority', 'ASC' != order), + ('run_now', True), ) data['errors'] = multi_key_sort(data['errors'], sort_keys, attr=True) data['scheduled'] = multi_key_sort(data['scheduled'], sort_keys, attr=True) From 6f63714118bd7925e65c530ee882777d0bb772f7 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 17:59:18 -0400 Subject: [PATCH 150/417] Remove errors from scheduled Errors are technically scheduled too, but the numbers don't add up for the user and can be confusing when they are included. --- tubesync/sync/templates/sync/tasks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 9aa61d04..b0fcb49e 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -56,7 +56,7 @@
-

{{ total_scheduled|intcomma }} Scheduled ({{ scheduled|length|intcomma }} on this page)

+

{{ (total_scheduled -total_errors)|intcomma }} Scheduled ({{ scheduled|length|intcomma }} on this page)

Tasks which are scheduled to run in the future or are waiting in a queue to be processed. They can be waiting for an available worker to run immediately, or From 11e815e633123b2c465f08ca33428366e1cfb88d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 18:17:02 -0400 Subject: [PATCH 151/417] Use with tag --- tubesync/sync/templates/sync/tasks.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index b0fcb49e..34a77229 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -56,7 +56,9 @@

-

{{ (total_scheduled -total_errors)|intcomma }} Scheduled ({{ scheduled|length|intcomma }} on this page)

+ {% with adjusted=(total_scheduled - total_errors) %} +

{{ adjusted|intcomma }} Scheduled ({{ scheduled|length|intcomma }} on this page)

+ {% endwith %}

Tasks which are scheduled to run in the future or are waiting in a queue to be processed. They can be waiting for an available worker to run immediately, or From 29c2d4470476052e60869c8dc0296e0687b8323f Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 19:27:34 -0400 Subject: [PATCH 152/417] Use a filter --- tubesync/sync/templates/sync/tasks.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 34a77229..1202bf70 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -1,4 +1,4 @@ -{% extends 'base.html' %}{% load humanize %} +{% extends 'base.html' %}{% load humanize %}{% load filters %} {% block headtitle %}Tasks{% endblock %} @@ -56,7 +56,7 @@

- {% with adjusted=(total_scheduled - total_errors) %} + {% with adjusted=total_scheduled|sub:total_errors ) %}

{{ adjusted|intcomma }} Scheduled ({{ scheduled|length|intcomma }} on this page)

{% endwith %}

From e284f760afbd208ae2ff076df8cc23de2820e58a Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 19:33:45 -0400 Subject: [PATCH 153/417] Add `sub` filter This is the opposite of the default `add` filter. --- tubesync/sync/templatetags/filters.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tubesync/sync/templatetags/filters.py b/tubesync/sync/templatetags/filters.py index 6762f4c4..444969e9 100644 --- a/tubesync/sync/templatetags/filters.py +++ b/tubesync/sync/templatetags/filters.py @@ -12,3 +12,14 @@ def bytesformat(input): return output return output[: -1 ] + 'iB' +@register.filter(is_safe=False) +def sub(value, arg): + """Subtract the arg from the value.""" + try: + return int(value) - int(arg) + except (ValueError, TypeError): + try: + return value - arg + except Exception: + return "" + From 674a1d1d94ae3f9286f6c0749ba219b836e60e3e Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 19:37:47 -0400 Subject: [PATCH 154/417] fixup: remove extra ')' --- tubesync/sync/templates/sync/tasks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 1202bf70..2b7a1250 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -56,7 +56,7 @@

- {% with adjusted=total_scheduled|sub:total_errors ) %} + {% with adjusted=total_scheduled|sub:total_errors %}

{{ adjusted|intcomma }} Scheduled ({{ scheduled|length|intcomma }} on this page)

{% endwith %}

From 58472f77858417dd2f408d9d2f8f2694fae83b3f Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 21:47:48 -0400 Subject: [PATCH 155/417] Add explicit transactions for certain tasks --- tubesync/sync/tasks.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index fdc954a3..d85c6be7 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -17,6 +17,7 @@ from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile from django.utils import timezone +from django.db.tansaction import atomic from django.db.utils import IntegrityError from django.utils.translation import gettext_lazy as _ from background_task import background @@ -179,6 +180,7 @@ def cleanup_removed_media(source, videos): @background(schedule=300, remove_existing_tasks=True) +@atomic(durable=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -221,7 +223,8 @@ def index_source_task(source_id): if published_dt is not None: media.published = published_dt try: - media.save() + with atomic(): + media.save() log.debug(f'Indexed media: {source} / {media}') # log the new media instances new_media_instance = ( @@ -611,9 +614,10 @@ def save_all_media_for_source(source_id): # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated - for media in mqs: - if media.uuid not in already_saved: - media.save() + with atomic(): + for media in mqs: + if media.uuid not in already_saved: + media.save() @background(schedule=60, remove_existing_tasks=True) @@ -626,6 +630,7 @@ def rename_media(media_id): @background(schedule=300, remove_existing_tasks=True) +@atomic(durable=True) def rename_all_media_for_source(source_id): try: source = Source.objects.get(pk=source_id) @@ -653,7 +658,8 @@ def rename_all_media_for_source(source_id): downloaded=True, ) for media in mqs: - media.rename_files() + with atomic(): + media.rename_files() @background(schedule=60, remove_existing_tasks=True) From 84d42fb2ab1f94a17f99f4ce19b71e3251120515 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 22:02:44 -0400 Subject: [PATCH 156/417] Add more logging to `get_media_info` --- tubesync/sync/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 95eebb8a..a739a4f7 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -156,10 +156,13 @@ def get_media_info(url, days=None): 'check_formats': True, 'daterange': yt_dlp.utils.DateRange(start=start), 'extractor_args': { - 'youtube': {'formats': ['missing_pot']}, 'youtubetab': {'approximate_date': ['true']}, }, + 'sleep_interval_requests': 2, + 'verbose': True if settings.DEBUG else False, }) + if start: + log.debug(f'get_media_info: used date range: {opts["daterange"]} for URL: {url}') response = {} with yt_dlp.YoutubeDL(opts) as y: try: From 4b3605f65ee46c4647a66ce7acab19258a9b6373 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 22:31:56 -0400 Subject: [PATCH 157/417] Use a temporary directory for testing formats --- tubesync/sync/youtube.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index a739a4f7..48cff0c9 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -146,6 +146,14 @@ def get_media_info(url, days=None): f'yesterday-{days!s}days' if days else None ) opts = get_yt_opts() + paths = opts.get('paths', dict()) + if 'temp' in paths: + temp_dir_obj = TemporaryDirectory(prefix='.yt_dlp-', dir=paths['temp']) + temp_dir_path = Path(temp_dir_obj.name) + (temp_dir_path / '.ignore').touch(exist_ok=True) + paths.update({ + 'temp': str(temp_dir_path), + }) opts.update({ 'ignoreerrors': False, # explicitly set this to catch exceptions 'ignore_no_formats_error': False, # we must fail first to try again with this enabled @@ -158,6 +166,7 @@ def get_media_info(url, days=None): 'extractor_args': { 'youtubetab': {'approximate_date': ['true']}, }, + 'paths': paths, 'sleep_interval_requests': 2, 'verbose': True if settings.DEBUG else False, }) From 79ed138aa103de6969b4220df3b6f36c1db97adf Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Mar 2025 22:45:49 -0400 Subject: [PATCH 158/417] fixup: typo --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d85c6be7..4a1884d8 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -17,7 +17,7 @@ from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile from django.utils import timezone -from django.db.tansaction import atomic +from django.db.transaction import atomic from django.db.utils import IntegrityError from django.utils.translation import gettext_lazy as _ from background_task import background From 27955d7b7b51533a77a6640c2d8c1c698b04ee7a Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Mar 2025 16:42:25 -0400 Subject: [PATCH 159/417] Improve checking media efficiency --- tubesync/sync/signals.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index c03a4f72..404974c7 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -167,6 +167,7 @@ def task_task_failed(sender, task_id, completed_task, **kwargs): @receiver(post_save, sender=Media) def media_post_save(sender, instance, created, **kwargs): + media = instance # If the media is skipped manually, bail. if instance.manual_skip: return @@ -176,12 +177,27 @@ def media_post_save(sender, instance, created, **kwargs): # Reset the skip flag if the download cap has changed if the media has not # already been downloaded downloaded = instance.downloaded + existing_media_metadata_task = get_media_metadata_task(str(instance.pk)) + existing_media_download_task = get_media_download_task(str(instance.pk)) if not downloaded: - skip_changed = filter_media(instance) + # the decision to download was already made if a download task exists + if not existing_media_download_task: + # Recalculate the "can_download" flag, this may + # need to change if the source specifications have been changed + if instance.metadata: + if instance.get_format_str(): + if not instance.can_download: + instance.can_download = True + can_download_changed = True + else: + if instance.can_download: + instance.can_download = False + can_download_changed = True + # Recalculate the "skip_changed" flag + skip_changed = filter_media(instance) else: # Downloaded media might need to be renamed # Check settings before any rename tasks are scheduled - media = instance rename_sources_setting = settings.RENAME_SOURCES or list() create_rename_task = ( ( @@ -200,18 +216,6 @@ def media_post_save(sender, instance, created, **kwargs): remove_existing_tasks=True ) - # Recalculate the "can_download" flag, this may - # need to change if the source specifications have been changed - if instance.metadata: - if instance.get_format_str(): - if not instance.can_download: - instance.can_download = True - can_download_changed = True - else: - if instance.can_download: - instance.can_download = False - can_download_changed = True - existing_media_metadata_task = get_media_metadata_task(str(instance.pk)) # If the media is missing metadata schedule it to be downloaded if not (instance.skip or instance.metadata or existing_media_metadata_task): log.info(f'Scheduling task to download metadata for: {instance.url}') @@ -239,7 +243,6 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name=verbose_name.format(instance.name), remove_existing_tasks=True ) - existing_media_download_task = get_media_download_task(str(instance.pk)) # If the media has not yet been downloaded schedule it to be downloaded if not (instance.media_file_exists or instance.filepath.exists() or existing_media_download_task): # The file was deleted after it was downloaded, skip this media. From 3f10e45e6a1fa74380832d28f22a781616655118 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Mar 2025 20:26:47 -0400 Subject: [PATCH 160/417] Add timing and profiling decorators --- tubesync/common/utils.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 95efd9f3..a292ddc5 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -1,7 +1,11 @@ +import cProfile +import emoji +import io +import pstats import string +import time from datetime import datetime from urllib.parse import urlunsplit, urlencode, urlparse -import emoji from yt_dlp.utils import LazyList from .errors import DatabaseConnectionError @@ -136,3 +140,29 @@ def json_serial(obj): if isinstance(obj, LazyList): return list(obj) raise TypeError(f'Type {type(obj)} is not json_serial()-able') + + +def time_func(func): + def wrapper(*args, **kwargs): + start = time.perf_counter() + result = func(*args, **kwargs) + end = time.perf_counter() + elapsed = end - start + return (result, (start - end, start, end,),) + return wrapper + + +def profile_func(func): + def wrapper(*args, **kwargs): + s = io.StringIO() + with cProfile.Profile() as pr: + pr.enable() + result = func(*args, **kwargs) + pr.disable() + ps = pstats.Stats(pr, stream=s) + ps.sort_stats( + pstats.SortKey.CUMULATIVE + ).print_stats() + return (result, (s.getvalue(), ps),) + return wrapper + From 5f11779dc2c55ee7e6f673cef8feb0f264679912 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 05:17:46 -0400 Subject: [PATCH 161/417] Time some index functions --- tubesync/sync/tasks.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index fdc954a3..58768b04 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -183,16 +183,38 @@ def index_source_task(source_id): ''' Indexes media available from a Source object. ''' + + from common.utils import time_func, profile_func + def get_source(source_id): + @time_func + def f(sid): + return Source.objects.get(pk=sid) + rt = f(source_id) + elapsed = rt[1][0] + log.debug(f'get_source: took {elapsed:.6f} seconds') + return rt[0] + def time_model_function(instance, func): + @time_func + def f(o, c): + return o.c() + rt = f(instance, func) + elapsed = rt[1][0] + log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') + return rt[0] + try: - source = Source.objects.get(pk=source_id) + #source = Source.objects.get(pk=source_id) + source = get_source(source_id) except Source.DoesNotExist: # Task triggered but the Source has been deleted, delete the task return # Reset any errors source.has_failed = False - source.save() + #source.save() + time_model_function(source, source.save) # Index the source - videos = source.index_media() + #videos = source.index_media() + videos = time_model_function(source, source.index_media) if not videos: raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no ' f'media to index, is the source key valid? Check the ' @@ -200,7 +222,8 @@ def index_source_task(source_id): f'is reachable') # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() - source.save() + #source.save() + time_model_function(source, source.save) log.info(f'Found {len(videos)} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) for video in videos: @@ -221,7 +244,8 @@ def index_source_task(source_id): if published_dt is not None: media.published = published_dt try: - media.save() + #media.save() + time_model_function(media, media.save) log.debug(f'Indexed media: {source} / {media}') # log the new media instances new_media_instance = ( From d69866ded8c391715ea5a315a44aa7109f21d67b Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 05:45:00 -0400 Subject: [PATCH 162/417] fixup: call function with instance --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 58768b04..bfec2d2e 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -196,7 +196,7 @@ def index_source_task(source_id): def time_model_function(instance, func): @time_func def f(o, c): - return o.c() + return c(o) rt = f(instance, func) elapsed = rt[1][0] log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') From 76c60830eb093822344611635c4c4736b811bceb Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 06:54:33 -0400 Subject: [PATCH 163/417] Positive timing values --- tubesync/common/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index a292ddc5..7bf89041 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -147,8 +147,7 @@ def time_func(func): start = time.perf_counter() result = func(*args, **kwargs) end = time.perf_counter() - elapsed = end - start - return (result, (start - end, start, end,),) + return (result, (end - start, start, end,),) return wrapper @@ -163,6 +162,6 @@ def profile_func(func): ps.sort_stats( pstats.SortKey.CUMULATIVE ).print_stats() - return (result, (s.getvalue(), ps),) + return (result, (s.getvalue(), ps, s),) return wrapper From 08fb55eb64c0db5de459fe03f6b7d39517f64905 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 06:59:40 -0400 Subject: [PATCH 164/417] fixup: we don't need to assign self --- tubesync/sync/tasks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index bfec2d2e..10c91ca3 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -195,9 +195,9 @@ def index_source_task(source_id): return rt[0] def time_model_function(instance, func): @time_func - def f(o, c): - return c(o) - rt = f(instance, func) + def f(c): + return c() + rt = f(func) elapsed = rt[1][0] log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') return rt[0] From 8d65b5785216c80efd6881c5a9415fe5e91d259f Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 07:26:35 -0400 Subject: [PATCH 165/417] Remove explicit `libsass` We need this to use the package added in #808. --- Pipfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Pipfile b/Pipfile index b0aad1e4..9f8adf33 100644 --- a/Pipfile +++ b/Pipfile @@ -9,7 +9,6 @@ autopep8 = "*" [packages] django = "*" django-sass-processor = "*" -libsass = "*" pillow = "*" whitenoise = "*" gunicorn = "*" From e174c42bf50bb2273298ae834d1a935bc0c15155 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 08:14:23 -0400 Subject: [PATCH 166/417] Remove explicit `django-compressor` Also, add some of the useful optional dependencies. --- Pipfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Pipfile b/Pipfile index 9f8adf33..2b051bf8 100644 --- a/Pipfile +++ b/Pipfile @@ -8,11 +8,10 @@ autopep8 = "*" [packages] django = "*" -django-sass-processor = "*" +django-sass-processor = {extras = ["management-command"], version = "*"} pillow = "*" whitenoise = "*" gunicorn = "*" -django-compressor = "*" httptools = "*" django-background-tasks = ">=1.2.8" django-basicauth = "*" @@ -21,3 +20,5 @@ mysqlclient = "*" yt-dlp = "*" requests = {extras = ["socks"], version = "*"} emoji = "*" +brotli = "*" +html5lib = "*" From 239cfca534994692b97665845b906e18d8372860 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 08:24:02 -0400 Subject: [PATCH 167/417] Use socks support from the operating system --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 52f20479..47892191 100644 --- a/Dockerfile +++ b/Dockerfile @@ -276,6 +276,7 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ pkgconf \ python3 \ python3-libsass \ + python3-python-socks \ python3-wheel \ curl \ less \ From 7db0048f80e8869d25764b031ff430420b91a0e3 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 08:25:57 -0400 Subject: [PATCH 168/417] Remove requests[socks] from Pipfile --- Pipfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Pipfile b/Pipfile index 9f8adf33..a6bc52d8 100644 --- a/Pipfile +++ b/Pipfile @@ -19,5 +19,4 @@ django-basicauth = "*" psycopg2-binary = "*" mysqlclient = "*" yt-dlp = "*" -requests = {extras = ["socks"], version = "*"} emoji = "*" From 092e5ef54cb78c28834ee39fe35dd1f99ca194da Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 08:38:25 -0400 Subject: [PATCH 169/417] Add `python3-socks` --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 47892191..9f579b58 100644 --- a/Dockerfile +++ b/Dockerfile @@ -277,6 +277,7 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ python3 \ python3-libsass \ python3-python-socks \ + python3-socks \ python3-wheel \ curl \ less \ From 2cd33672afa59939c59f4cebcef80a533fe40966 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 09:05:38 -0400 Subject: [PATCH 170/417] Remove `python3-python-socks` --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 9f579b58..43aebe28 100644 --- a/Dockerfile +++ b/Dockerfile @@ -276,7 +276,6 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ pkgconf \ python3 \ python3-libsass \ - python3-python-socks \ python3-socks \ python3-wheel \ curl \ From aa54a88cdb1ae9cfe35d779378e6ce2a9e9668f5 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 09:25:57 -0400 Subject: [PATCH 171/417] Add `urllib3[socks]` and `requests[socks]` These each have slightly different version requirements for `PySocks`. --- Pipfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Pipfile b/Pipfile index a6bc52d8..0080822f 100644 --- a/Pipfile +++ b/Pipfile @@ -18,5 +18,7 @@ django-background-tasks = ">=1.2.8" django-basicauth = "*" psycopg2-binary = "*" mysqlclient = "*" +urllib3 = {extras = ["socks"], version = "*"} +requests = {extras = ["socks"], version = "*"} yt-dlp = "*" emoji = "*" From f0f7edfd479827e081960fe0426486b06fe9c688 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 09:28:26 -0400 Subject: [PATCH 172/417] Add `PySocks` Upgrade to the latest, if the OS version is too old. --- Pipfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Pipfile b/Pipfile index 0080822f..907755b7 100644 --- a/Pipfile +++ b/Pipfile @@ -18,6 +18,7 @@ django-background-tasks = ">=1.2.8" django-basicauth = "*" psycopg2-binary = "*" mysqlclient = "*" +PySocks = "*" urllib3 = {extras = ["socks"], version = "*"} requests = {extras = ["socks"], version = "*"} yt-dlp = "*" From c865890f32d1f8d5cb2b9dc0af0eb303615ee1a4 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 11:56:55 -0400 Subject: [PATCH 173/417] Delete pip.conf --- pip.conf | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 pip.conf diff --git a/pip.conf b/pip.conf deleted file mode 100644 index e92bae15..00000000 --- a/pip.conf +++ /dev/null @@ -1,2 +0,0 @@ -[global] -extra-index-url=https://www.piwheels.org/simple From 552e46faf4462dbcac5353838e1cac64ba73b74a Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 12:00:04 -0400 Subject: [PATCH 174/417] Remove `/etc/pip.conf` from Dockerfile --- Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 43aebe28..173994d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -286,9 +286,6 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ apt-get -y autoclean && \ rm -rf /tmp/* -# Copy over pip.conf to use piwheels -COPY pip.conf /etc/pip.conf - # Switch workdir to the the app WORKDIR /app From af0e300ef1750b2d566252d731a47e638063ebdf Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 16:59:33 -0400 Subject: [PATCH 175/417] Download metadata before indexing another source --- tubesync/sync/tasks.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 4a1884d8..498d73fe 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -234,6 +234,13 @@ def index_source_task(source_id): ) if new_media_instance: log.info(f'Indexed new media: {source} / {media}') + log.info(f'Scheduling task to download metadata for: {media.url}') + verbose_name = _('Downloading metadata for "{}"') + download_media_metadata( + str(media.pk), + priority=9, + verbose_name=verbose_name.format(media.pk), + ) except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') # Tack on a cleanup of old completed tasks From 3c714859dbcb9716ed0ed416b77668c1a1a7a568 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 17:14:44 -0400 Subject: [PATCH 176/417] Rename then check media --- tubesync/sync/signals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 404974c7..6f43e1bc 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -129,7 +129,7 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name = _('Checking all media for source "{}"') save_all_media_for_source( str(instance.pk), - priority=9, + priority=25, verbose_name=verbose_name.format(instance.name), remove_existing_tasks=True ) @@ -211,7 +211,7 @@ def media_post_save(sender, instance, created, **kwargs): rename_media( str(media.pk), queue=str(media.pk), - priority=16, + priority=20, verbose_name=verbose_name.format(media.key, media.name), remove_existing_tasks=True ) From 89f2f2b0d3f57b7a343840c162051358e9d9ab0d Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 17:39:33 -0400 Subject: [PATCH 177/417] Show progress on tasks page --- tubesync/sync/tasks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 4a1884d8..a75c269a 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -203,9 +203,12 @@ def index_source_task(source_id): # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() source.save() - log.info(f'Found {len(videos)} media items for source: {source}') + num_videos = len(videos) + log.info(f'Found {num_videos} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) - for video in videos: + tvn_format = '[{}' + f'/{num_videos}] {task.verbose_name}' + for vn, video in enumerate(videos, start=1): + task.verbose_name = tvn_format.format(vn) # Create or update each video as a Media object key = video.get(source.key_field, None) if not key: From cfb117b555cf42b1f9f79f19f2a6d863bf849426 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 21:10:42 -0400 Subject: [PATCH 178/417] Tweaked logged message --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 10c91ca3..9760b6cf 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -199,7 +199,7 @@ def index_source_task(source_id): return c() rt = f(func) elapsed = rt[1][0] - log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') + log.debug(f'time_model_function: {func}: took {elapsed:.6f} seconds') return rt[0] try: From ef5b939caf2a3f8270e3f18823a5abdaebc02dd7 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 13:02:55 -0400 Subject: [PATCH 179/417] Check the copied `nginx` configuration Checking before the copy doesn't help. Fixes #804 --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 43aebe28..81b3e31e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -352,8 +352,6 @@ COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings. RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ - # Check nginx configuration - nginx -t && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ @@ -373,6 +371,9 @@ RUN set -x && \ # Copy root COPY config/root / +# Check nginx configuration copied from config/root/etc +RUN set -x && nginx -t + # patch background_task COPY patches/background_task/ \ /usr/local/lib/python3/dist-packages/background_task/ From 2640a4ae7cb4b7d4d7164ef4f25876955f8e8551 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Thu, 13 Mar 2025 02:55:09 +0900 Subject: [PATCH 180/417] docs: update README.md Rasperry Pi -> Raspberry Pi --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index af3cd910..17367a4a 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ currently just Plex, to complete the PVR experience. TubeSync is designed to be run in a container, such as via Docker or Podman. It also works in a Docker Compose stack. `amd64` (most desktop PCs and servers) and `arm64` -(modern ARM computers, such as the Rasperry Pi 3 or later) are supported. +(modern ARM computers, such as the Raspberry Pi 3 or later) are supported. Example (with Docker on *nix): @@ -356,7 +356,7 @@ etc.). Configuration of this is beyond the scope of this README. Only two are supported, for the moment: - `amd64` (most desktop PCs and servers) - `arm64` -(modern ARM computers, such as the Rasperry Pi 3 or later) +(modern ARM computers, such as the Raspberry Pi 3 or later) Others may be made available, if there is demand. From 3ad0fad72e27ee38171f0cf809b40fb5fd4fcc03 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 15:38:52 -0400 Subject: [PATCH 181/417] Get task functions --- tubesync/sync/tasks.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a75c269a..1f7ec3ab 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -114,27 +114,26 @@ def get_source_completed_tasks(source_id, only_errors=False): q['failed_at__isnull'] = False return CompletedTask.objects.filter(**q).order_by('-failed_at') +def get_tasks(task_name, id=None, /, instance=None): + assert not (id is None and instance is None) + arg = str(id or instance.pk) + return Task.objects.get_task(str(task_name), args=(arg,),) + +def get_first_task(task_name, id=None, /, *, instance=None): + tqs = get_tasks(task_name, id, instance).order_by('run_at') + return tqs[0] if tqs.count() else False def get_media_download_task(media_id): - try: - return Task.objects.get_task('sync.tasks.download_media', - args=(str(media_id),))[0] - except IndexError: - return False + return get_first_task('sync.tasks.download_media', media_id) def get_media_metadata_task(media_id): - try: - return Task.objects.get_task('sync.tasks.download_media_metadata', - args=(str(media_id),))[0] - except IndexError: - return False + return get_first_task('sync.tasks.download_media_metadata', media_id) def get_media_premiere_task(media_id): - try: - return Task.objects.get_task('sync.tasks.wait_for_media_premiere', - args=(str(media_id),))[0] - except IndexError: - return False + return get_first_task('sync.tasks.wait_for_media_premiere', media_id) + +def get_source_index_task(source_id): + return get_first_task('sync.tasks.index_source_task', source_id) def delete_task_by_source(task_name, source_id): now = timezone.now() @@ -206,6 +205,7 @@ def index_source_task(source_id): num_videos = len(videos) log.info(f'Found {num_videos} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) + task = get_source_index_task(source_id) tvn_format = '[{}' + f'/{num_videos}] {task.verbose_name}' for vn, video in enumerate(videos, start=1): task.verbose_name = tvn_format.format(vn) From c0355e8f696973b72f1710d353c52a395c0ece9d Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 15:46:55 -0400 Subject: [PATCH 182/417] Reset task verbose name after the loop ends --- tubesync/sync/tasks.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 1f7ec3ab..c99d83aa 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -206,9 +206,12 @@ def index_source_task(source_id): log.info(f'Found {num_videos} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) task = get_source_index_task(source_id) - tvn_format = '[{}' + f'/{num_videos}] {task.verbose_name}' + if task: + verbose_name = task.verbose_name + tvn_format = '[{}' + f'/{num_videos}] {verbose_name}' for vn, video in enumerate(videos, start=1): - task.verbose_name = tvn_format.format(vn) + if task: + task.verbose_name = tvn_format.format(vn) # Create or update each video as a Media object key = video.get(source.key_field, None) if not key: @@ -239,6 +242,8 @@ def index_source_task(source_id): log.info(f'Indexed new media: {source} / {media}') except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') + if task: + task.verbose_name = verbose_name # Tack on a cleanup of old completed tasks cleanup_completed_tasks() # Tack on a cleanup of old media From 408a3e1c952560324d086d719606365f922b555a Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 17:32:19 -0400 Subject: [PATCH 183/417] Save the updated `verbose_name` of the task --- tubesync/sync/tasks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index c99d83aa..3b3dce45 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -212,6 +212,7 @@ def index_source_task(source_id): for vn, video in enumerate(videos, start=1): if task: task.verbose_name = tvn_format.format(vn) + task.save(update_fields=('verbose_name')) # Create or update each video as a Media object key = video.get(source.key_field, None) if not key: @@ -243,7 +244,8 @@ def index_source_task(source_id): except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') if task: - task.verbose_name = verbose_name + task.verbose_name = verbose_name + task.save(update_fields=('verbose_name')) # Tack on a cleanup of old completed tasks cleanup_completed_tasks() # Tack on a cleanup of old media From d1cc05a8f41df3f542558ff60d085f2b2f649851 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 20:05:58 -0400 Subject: [PATCH 184/417] Use '/tmp' instead of '/' for odd cases --- tubesync/sync/signals.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 6f43e1bc..338e912e 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -43,6 +43,8 @@ def source_pre_save(sender, instance, **kwargs): work_directory = existing_dirpath for _count in range(parents_count, 0, -1): work_directory = work_directory.parent + if Path(existing_dirpath.root).resolve(strict=True) == Path(work_directory).resolve(strict=True): + work_directory = Path('/tmp') with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None From f7dbd0cf8263e504f8dc64e1e8926d55be617698 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 20:17:18 -0400 Subject: [PATCH 185/417] We can't rename across devices so don't leave `/downloads` --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 338e912e..c348c714 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -44,7 +44,7 @@ def source_pre_save(sender, instance, **kwargs): for _count in range(parents_count, 0, -1): work_directory = work_directory.parent if Path(existing_dirpath.root).resolve(strict=True) == Path(work_directory).resolve(strict=True): - work_directory = Path('/tmp') + work_directory = Path('/downloads') with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None From 469858a33aea2cae2137850ef86ae0c4e9b63aac Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 20:28:08 -0400 Subject: [PATCH 186/417] Use the `DOWNLOADS_BASE_DIR` setting --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index c348c714..555ea9be 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -44,7 +44,7 @@ def source_pre_save(sender, instance, **kwargs): for _count in range(parents_count, 0, -1): work_directory = work_directory.parent if Path(existing_dirpath.root).resolve(strict=True) == Path(work_directory).resolve(strict=True): - work_directory = Path('/downloads') + work_directory = Path(settings.DOWNLOADS_BASE_DIR) with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None From 291631f76fe254b3eaf20707e7806cd12652db5b Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 12 Mar 2025 20:40:14 -0400 Subject: [PATCH 187/417] Use `DOWNLOAD_ROOT` setting instead --- tubesync/sync/signals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 555ea9be..8bea1ce2 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -43,8 +43,8 @@ def source_pre_save(sender, instance, **kwargs): work_directory = existing_dirpath for _count in range(parents_count, 0, -1): work_directory = work_directory.parent - if Path(existing_dirpath.root).resolve(strict=True) == Path(work_directory).resolve(strict=True): - work_directory = Path(settings.DOWNLOADS_BASE_DIR) + if not Path(work_directory).resolve(strict=True).is_relative_to(Path(settings.DOWNLOAD_ROOT)): + work_directory = Path(settings.DOWNLOAD_ROOT) with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None From 487e8011517580706c4eb155fb55cf1fe9402e0d Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 13 Mar 2025 03:23:27 -0400 Subject: [PATCH 188/417] Increase episode number calculation speed --- tubesync/sync/models.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2e802599..9ab126db 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1507,17 +1507,35 @@ class Media(models.Model): def calculate_episode_number(self): if self.source.is_playlist: - sorted_media = Media.objects.filter(source=self.source) + sorted_media = Media.objects.filter( + source=self.source, + metadata__isnull=False, + ).order_by( + 'published', + 'created', + 'key', + ) else: - self_year = self.upload_date.year if self.upload_date else self.created.year - filtered_media = Media.objects.filter(source=self.source, published__year=self_year) - filtered_media = [m for m in filtered_media if m.upload_date is not None] - sorted_media = sorted(filtered_media, key=lambda x: (x.upload_date, x.key)) - position_counter = 1 - for media in sorted_media: + self_year = self.created.year # unlikely to be accurate + if self.published: + self_year = self.published.year + elif self.has_metadata and self.upload_date: + self_year = self.upload_date.year + elif self.download_date: + # also, unlikely to be accurate + self_year = self.download_date.year + sorted_media = Media.objects.filter( + source=self.source, + metadata__isnull=False, + published__year=self_year, + ).order_by( + 'published', + 'created', + 'key', + ) + for counter, media in enumerate(sorted_media, start=1): if media == self: - return position_counter - position_counter += 1 + return counter def get_episode_str(self, use_padding=False): episode_number = self.calculate_episode_number() From f99c8fc5963d3235f1346c6de164baf5ef806640 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 13 Mar 2025 06:16:01 -0400 Subject: [PATCH 189/417] Use set since tuple is dangerous for strings Even in explicit form tuple used a collection of characters instead of a single string. I hate that Python has these little traps. --- tubesync/sync/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3b3dce45..3bb6a329 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -212,7 +212,7 @@ def index_source_task(source_id): for vn, video in enumerate(videos, start=1): if task: task.verbose_name = tvn_format.format(vn) - task.save(update_fields=('verbose_name')) + task.save(update_fields={'verbose_name'}) # Create or update each video as a Media object key = video.get(source.key_field, None) if not key: @@ -245,7 +245,7 @@ def index_source_task(source_id): log.error(f'Index media failed: {source} / {media} with "{e}"') if task: task.verbose_name = verbose_name - task.save(update_fields=('verbose_name')) + task.save(update_fields={'verbose_name'}) # Tack on a cleanup of old completed tasks cleanup_completed_tasks() # Tack on a cleanup of old media From 68c6b78179e2545aaa4ae2760523c6fdaa98e24a Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 13 Mar 2025 16:46:58 -0400 Subject: [PATCH 190/417] Apply the suggested "Sync Now" changes Fixes #641 --- tubesync/sync/templates/sync/sources.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/templates/sync/sources.html b/tubesync/sync/templates/sync/sources.html index 0c9a2ee8..627f4397 100644 --- a/tubesync/sync/templates/sync/sources.html +++ b/tubesync/sync/templates/sync/sources.html @@ -24,7 +24,7 @@

{% for source in sources %} - + {{ source.icon|safe }} {{ source.name }} ({{ source.get_source_type_display }} "{{ source.key }}")
{{ source.format_summary }}
@@ -34,7 +34,7 @@ {{ source.media_count }} media items, {{ source.downloaded_count }} downloaded{% if source.delete_old_media and source.days_to_keep > 0 %}, keeping {{ source.days_to_keep }} days of media{% endif %} {% endif %}
- + Sync Now
{% empty %} You haven't added any sources. From 40b4032bbc0452cd4773963069e61afec9c08893 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 13 Mar 2025 16:50:35 -0400 Subject: [PATCH 191/417] Move style to CSS --- tubesync/common/static/styles/tubesync.scss | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubesync/common/static/styles/tubesync.scss b/tubesync/common/static/styles/tubesync.scss index 30a41fd8..1012ecfc 100644 --- a/tubesync/common/static/styles/tubesync.scss +++ b/tubesync/common/static/styles/tubesync.scss @@ -29,4 +29,9 @@ html { .help-text > i { padding-right: 6px; -} \ No newline at end of file +} + +.issue-641 { + display: block !important; + overflow-wrap: anywhere; +} From 45c9b9ce811272517bc0aa896bfdbb36ad849518 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 13 Mar 2025 16:52:20 -0400 Subject: [PATCH 192/417] Use the new CSS class --- tubesync/sync/templates/sync/sources.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/sources.html b/tubesync/sync/templates/sync/sources.html index 627f4397..0e766e23 100644 --- a/tubesync/sync/templates/sync/sources.html +++ b/tubesync/sync/templates/sync/sources.html @@ -24,7 +24,7 @@
{% for source in sources %} - + {{ source.icon|safe }} {{ source.name }} ({{ source.get_source_type_display }} "{{ source.key }}")
{{ source.format_summary }}
From d991b9593d8acbcd60be1794485dc1cde1925aec Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 07:59:34 -0400 Subject: [PATCH 193/417] Add `Source.is_active` property --- tubesync/sync/models.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 9ab126db..0ae25bd5 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -333,6 +333,15 @@ class Source(models.Model): replaced = self.name.replace('_', '-').replace('&', 'and').replace('+', 'and') return slugify(replaced)[:80] + @property + def is_active(self): + active = ( + self.download_media or + self.index_streams or + self.index_videos + ) + return self.source.index_schedule and active + @property def is_audio(self): return self.source_resolution == SourceResolution.AUDIO.value From 016cb498abfb178ab4c4de8d2be20f7766071336 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 07:59:34 -0400 Subject: [PATCH 194/417] Add `Source.deactivate()` function --- tubesync/sync/models.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 0ae25bd5..7606d664 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -333,6 +333,18 @@ class Source(models.Model): replaced = self.name.replace('_', '-').replace('&', 'and').replace('+', 'and') return slugify(replaced)[:80] + def deactivate(self): + self.download_media = False + self.index_streams = False + self.index_videos = False + self.index_schedule = IndexSchedule.NEVER + self.save(update_fields={ + 'download_media', + 'index_streams', + 'index_videos', + 'index_schedule', + }) + @property def is_active(self): active = ( From 5a46dce2a13bb6ad1bd6bb6910a6b6c766170182 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 08:11:37 -0400 Subject: [PATCH 195/417] Do not update media servers about media from inactive sources --- tubesync/sync/signals.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 8bea1ce2..3738e3a9 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -344,6 +344,8 @@ def media_post_delete(sender, instance, **kwargs): log.info(f'Deleting file for: {instance} path: {file}') delete_file(file) + if not instance.source.is_active + return # Schedule a task to update media servers for mediaserver in MediaServer.objects.all(): log.info(f'Scheduling media server updates') From 9d03c0d5d295112d6cff311fe2d798206c145e1e Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 08:14:29 -0400 Subject: [PATCH 196/417] Do not detach the signal --- tubesync/sync/management/commands/delete-source.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/management/commands/delete-source.py b/tubesync/sync/management/commands/delete-source.py index 104ec887..5cdace18 100644 --- a/tubesync/sync/management/commands/delete-source.py +++ b/tubesync/sync/management/commands/delete-source.py @@ -29,8 +29,8 @@ class Command(BaseCommand): except Source.DoesNotExist: raise CommandError(f'Source does not exist with ' f'UUID: {source_uuid}') - # Detach post-delete signal for Media so we don't spam media servers - signals.post_delete.disconnect(media_post_delete, sender=Media) + # Reconfigure the source to not update the disk or media servers + source.deactivate() # Delete the source, triggering pre-delete signals for each media item log.info(f'Found source with UUID "{source.uuid}" with name ' f'"{source.name}" and deleting it, this may take some time!') @@ -45,7 +45,5 @@ class Command(BaseCommand): verbose_name=verbose_name.format(mediaserver), remove_existing_tasks=True ) - # Re-attach signals - signals.post_delete.connect(media_post_delete, sender=Media) # All done log.info('Done') From 247b9f2a72d9789f8fdb5adc1687f35a4700c214 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 08:15:41 -0400 Subject: [PATCH 197/417] Update the media server quicker than the default --- tubesync/sync/management/commands/delete-source.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/management/commands/delete-source.py b/tubesync/sync/management/commands/delete-source.py index 5cdace18..98ff59b5 100644 --- a/tubesync/sync/management/commands/delete-source.py +++ b/tubesync/sync/management/commands/delete-source.py @@ -42,6 +42,7 @@ class Command(BaseCommand): rescan_media_server( str(mediaserver.pk), priority=0, + schedule=30, verbose_name=verbose_name.format(mediaserver), remove_existing_tasks=True ) From bdf9a69f48bc137fba27256503ab838e7eae29bb Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 08:24:41 -0400 Subject: [PATCH 198/417] Clean up my `git status` --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 17e61eba..c5cd63bc 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ __pycache__/ # C extensions *.so +# vim swap files +.*.swp + # Distribution / packaging .Python build/ From 453b9eaa79f205e99386d7e8b2de659450272e8c Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 11:32:34 -0400 Subject: [PATCH 199/417] Mark the directory for removal after the Source is deleted --- tubesync/sync/signals.py | 6 ++++++ tubesync/sync/views.py | 12 ++---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 3738e3a9..f4f0d25f 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -1,4 +1,5 @@ from pathlib import Path +from shutil import rmtree from tempfile import TemporaryDirectory from django.conf import settings from django.db.models.signals import pre_save, post_save, pre_delete, post_delete @@ -151,6 +152,11 @@ def source_post_delete(sender, instance, **kwargs): # Triggered after a source is deleted log.info(f'Deleting tasks for source: {instance.name}') delete_task_by_source('sync.tasks.index_source_task', instance.pk) + source = instance + # Remove the directory, if the user requested that + directory_path = Path(source.directory_path) + if (directory_path / '.to_be_removed').is_file(): + rmtree(directory_path, True) @receiver(task_failed, sender=Task) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 99844a39..4c8e672b 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -3,7 +3,6 @@ import os import json from base64 import b64decode import pathlib -import shutil import sys from django.conf import settings from django.http import FileResponse, Http404, HttpResponseNotFound, HttpResponseRedirect @@ -415,15 +414,8 @@ class DeleteSourceView(DeleteView, FormMixin): delete_media = True if delete_media_val is not False else False if delete_media: source = self.get_object() - for media in Media.objects.filter(source=source): - if media.media_file: - file_path = media.media_file.path - matching_files = glob.glob(os.path.splitext(file_path)[0] + '.*') - for file in matching_files: - delete_file(file) - directory_path = source.directory_path - if os.path.exists(directory_path): - shutil.rmtree(directory_path, True) + directory_path = pathlib.Path(source.directory_path) + (directory_path / '.to_be_removed').touch(exist_ok=True) return super().post(request, *args, **kwargs) def get_success_url(self): From 9f171025a635cbb908387289fec3bf1ea8726da8 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 11:33:32 -0400 Subject: [PATCH 200/417] Remove the index task before trying to delete the Source --- tubesync/sync/signals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index f4f0d25f..66a30232 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -142,6 +142,8 @@ def source_post_save(sender, instance, created, **kwargs): def source_pre_delete(sender, instance, **kwargs): # Triggered before a source is deleted, delete all media objects to trigger # the Media models post_delete signal + log.info(f'Deleting tasks for source: {instance.name}') + delete_task_by_source('sync.tasks.index_source_task', instance.pk) for media in Media.objects.filter(source=instance): log.info(f'Deleting media for source: {instance.name} item: {media.name}') media.delete() @@ -150,8 +152,6 @@ def source_pre_delete(sender, instance, **kwargs): @receiver(post_delete, sender=Source) def source_post_delete(sender, instance, **kwargs): # Triggered after a source is deleted - log.info(f'Deleting tasks for source: {instance.name}') - delete_task_by_source('sync.tasks.index_source_task', instance.pk) source = instance # Remove the directory, if the user requested that directory_path = Path(source.directory_path) From cbed39b798c9ebb80ad0a2c8d21b9ab00ec68d9d Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 12:02:14 -0400 Subject: [PATCH 201/417] Log the Source directory path before deleting --- tubesync/sync/management/commands/delete-source.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/management/commands/delete-source.py b/tubesync/sync/management/commands/delete-source.py index 98ff59b5..206aee7f 100644 --- a/tubesync/sync/management/commands/delete-source.py +++ b/tubesync/sync/management/commands/delete-source.py @@ -34,6 +34,7 @@ class Command(BaseCommand): # Delete the source, triggering pre-delete signals for each media item log.info(f'Found source with UUID "{source.uuid}" with name ' f'"{source.name}" and deleting it, this may take some time!') + log.info(f'Source directory: {source.directory_path}') source.delete() # Update any media servers for mediaserver in MediaServer.objects.all(): From 640f51fe9567d6911cea65553f36faa2be751553 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 13:00:08 -0400 Subject: [PATCH 202/417] Deactivate the Source before deletion --- tubesync/sync/signals.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 66a30232..5f97c563 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -142,6 +142,8 @@ def source_post_save(sender, instance, created, **kwargs): def source_pre_delete(sender, instance, **kwargs): # Triggered before a source is deleted, delete all media objects to trigger # the Media models post_delete signal + log.info(f'Deactivating source: {instance.name}') + instance.deactivate() log.info(f'Deleting tasks for source: {instance.name}') delete_task_by_source('sync.tasks.index_source_task', instance.pk) for media in Media.objects.filter(source=instance): From 81ab9e3c92b0034ec157778f9f56fb43cf3775d2 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 13:32:49 -0400 Subject: [PATCH 203/417] Add and use `delete_all_media_for_source` task --- tubesync/sync/signals.py | 19 ++++++++++++++++--- tubesync/sync/tasks.py | 21 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 5f97c563..063aa545 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -146,9 +146,22 @@ def source_pre_delete(sender, instance, **kwargs): instance.deactivate() log.info(f'Deleting tasks for source: {instance.name}') delete_task_by_source('sync.tasks.index_source_task', instance.pk) - for media in Media.objects.filter(source=instance): - log.info(f'Deleting media for source: {instance.name} item: {media.name}') - media.delete() + # Schedule deletion of media + verbose_name = _('Deleting all media for source "{}"') + delete_all_media_for_source( + str(instance.pk), + str(instance.name), + priority=1, + verbose_name=verbose_name.format(instance.name), + ) + # Try to do it all immediately + # If this is killed, the scheduled task should do the work instead. + delete_all_media_for_source.now( + str(instance.pk), + str(instance.name), + priority=0, + verbose_name=verbose_name.format(instance.name), + ) @receiver(post_delete, sender=Source) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 498d73fe..dbbd804a 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -690,3 +690,24 @@ def wait_for_media_premiere(media_id): media.title = _(f'Premieres in {hours(media.published - now)} hours') media.save() +@background(schedule=300, remove_existing_tasks=False) +@atomic(durable=True) +def delete_all_media_for_source(source_id, source_name): + source = None + try: + source = Source.objects.get(pk=source_id) + except Source.DoesNotExist: + # Task triggered but the source no longer exists, do nothing + log.error(f'Task delete_all_media_for_source(pk={source_id}) called but no ' + f'source exists with ID: {source_id}') + pass + mqs = Media.objects.all().defer( + 'metadata', + ).filter( + source=source or source_id, + ) + for media in mqs: + log.info(f'Deleting media for source: {source_name} item: {media.name}') + with atomic(): + media.delete() + From fc058a82f7b732c8b0b6f5a085809feafde74991 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 13:52:04 -0400 Subject: [PATCH 204/417] Map the new task --- tubesync/sync/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index dbbd804a..782db84c 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -55,6 +55,7 @@ def map_task_to_instance(task): 'sync.tasks.rename_media': Media, 'sync.tasks.rename_all_media_for_source': Source, 'sync.tasks.wait_for_media_premiere': Media, + 'sync.tasks.delete_all_media_for_source': Source, } MODEL_URL_MAP = { Source: 'sync:source', From 8314019781f6af02d11fa48b5837c9be15e32d65 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 14:59:58 -0400 Subject: [PATCH 205/417] Remove timing measurement from tasks.py --- tubesync/sync/tasks.py | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 9760b6cf..10ab6905 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -184,37 +184,16 @@ def index_source_task(source_id): Indexes media available from a Source object. ''' - from common.utils import time_func, profile_func - def get_source(source_id): - @time_func - def f(sid): - return Source.objects.get(pk=sid) - rt = f(source_id) - elapsed = rt[1][0] - log.debug(f'get_source: took {elapsed:.6f} seconds') - return rt[0] - def time_model_function(instance, func): - @time_func - def f(c): - return c() - rt = f(func) - elapsed = rt[1][0] - log.debug(f'time_model_function: {func}: took {elapsed:.6f} seconds') - return rt[0] - try: - #source = Source.objects.get(pk=source_id) - source = get_source(source_id) + source = Source.objects.get(pk=source_id) except Source.DoesNotExist: # Task triggered but the Source has been deleted, delete the task return # Reset any errors source.has_failed = False - #source.save() - time_model_function(source, source.save) + source.save() # Index the source - #videos = source.index_media() - videos = time_model_function(source, source.index_media) + videos = source.index_media() if not videos: raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no ' f'media to index, is the source key valid? Check the ' @@ -222,8 +201,7 @@ def index_source_task(source_id): f'is reachable') # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() - #source.save() - time_model_function(source, source.save) + source.save() log.info(f'Found {len(videos)} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) for video in videos: @@ -244,8 +222,7 @@ def index_source_task(source_id): if published_dt is not None: media.published = published_dt try: - #media.save() - time_model_function(media, media.save) + media.save() log.debug(f'Indexed media: {source} / {media}') # log the new media instances new_media_instance = ( From 393330a99fb6c351338f30d6e921212b143bfe68 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 15:03:22 -0400 Subject: [PATCH 206/417] Remove an extra blank line --- tubesync/sync/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 10ab6905..fdc954a3 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -183,7 +183,6 @@ def index_source_task(source_id): ''' Indexes media available from a Source object. ''' - try: source = Source.objects.get(pk=source_id) except Source.DoesNotExist: From 054943854326ffe612eef4beaa6669109c534f8b Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 15:16:31 -0400 Subject: [PATCH 207/417] fixup! Do not update media servers about media from inactive sources --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 063aa545..f1577f72 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -365,7 +365,7 @@ def media_post_delete(sender, instance, **kwargs): log.info(f'Deleting file for: {instance} path: {file}') delete_file(file) - if not instance.source.is_active + if not instance.source.is_active: return # Schedule a task to update media servers for mediaserver in MediaServer.objects.all(): From 1d3e880c95802318437996272ed668c5397c8af6 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 15:24:48 -0400 Subject: [PATCH 208/417] fixup! Add `Source.is_active` property --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 7606d664..176b69ee 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -352,7 +352,7 @@ class Source(models.Model): self.index_streams or self.index_videos ) - return self.source.index_schedule and active + return self.index_schedule and active @property def is_audio(self): From b18760082f58bb468a3070b5781acc49134a8e62 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 15:32:52 -0400 Subject: [PATCH 209/417] fixup! Add and use `delete_all_media_for_source` task --- tubesync/sync/signals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index f1577f72..f6ad7826 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -13,8 +13,8 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta download_media_thumbnail, download_media_metadata, map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images, - save_all_media_for_source, rename_media, - get_media_metadata_task, get_media_download_task) + delete_all_media_for_source, save_all_media_for_source, + rename_media, get_media_metadata_task, get_media_download_task) from .utils import delete_file, glob_quote, mkdir_p from .filtering import filter_media from .choices import Val, YouTube_SourceType From 51a6a08f9c8768004f4b40ec369683784bbde23f Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 15:42:03 -0400 Subject: [PATCH 210/417] This task is being called from a transaction already --- tubesync/sync/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 782db84c..d004362a 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -692,7 +692,6 @@ def wait_for_media_premiere(media_id): media.save() @background(schedule=300, remove_existing_tasks=False) -@atomic(durable=True) def delete_all_media_for_source(source_id, source_name): source = None try: From 7997b42ca656dc136c11844894c33573c813e2cf Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 15:45:22 -0400 Subject: [PATCH 211/417] Do not use keyword arguments with the underlying function --- tubesync/sync/signals.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index f6ad7826..812d4c85 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -159,8 +159,6 @@ def source_pre_delete(sender, instance, **kwargs): delete_all_media_for_source.now( str(instance.pk), str(instance.name), - priority=0, - verbose_name=verbose_name.format(instance.name), ) From 22a7cb76b00f2dad27d355aa148689d8cc38c1f5 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 11:09:43 -0400 Subject: [PATCH 212/417] Clean up more tasks for deleted sources --- tubesync/sync/signals.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 812d4c85..666155e2 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -146,7 +146,11 @@ def source_pre_delete(sender, instance, **kwargs): instance.deactivate() log.info(f'Deleting tasks for source: {instance.name}') delete_task_by_source('sync.tasks.index_source_task', instance.pk) + delete_task_by_source('sync.tasks.check_source_directory_exists', instance.pk) + delete_task_by_source('sync.tasks.rename_all_media_for_source', instance.pk) + delete_task_by_source('sync.tasks.save_all_media_for_source', instance.pk) # Schedule deletion of media + delete_task_by_source('sync.tasks.delete_all_media_for_source', instance.pk) verbose_name = _('Deleting all media for source "{}"') delete_all_media_for_source( str(instance.pk), @@ -166,9 +170,16 @@ def source_pre_delete(sender, instance, **kwargs): def source_post_delete(sender, instance, **kwargs): # Triggered after a source is deleted source = instance + log.info(f'Deleting tasks for removed source: {source.name}') + delete_task_by_source('sync.tasks.index_source_task', instance.pk) + delete_task_by_source('sync.tasks.check_source_directory_exists', instance.pk) + delete_task_by_source('sync.tasks.delete_all_media_for_source', instance.pk) + delete_task_by_source('sync.tasks.rename_all_media_for_source', instance.pk) + delete_task_by_source('sync.tasks.save_all_media_for_source', instance.pk) # Remove the directory, if the user requested that directory_path = Path(source.directory_path) if (directory_path / '.to_be_removed').is_file(): + log.info(f'Deleting directory for: {source.name}: {directory_path}') rmtree(directory_path, True) From 0ba16d08d7235cb1b92d50abbd4331fbf24971f1 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 11:19:08 -0400 Subject: [PATCH 213/417] Add `geo_verification_proxy` to default `yt-dlp` settings --- tubesync/tubesync/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index ff88a669..f00eceaa 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -172,6 +172,7 @@ YOUTUBE_DEFAULTS = { 'ignoreerrors': True, # Skip on errors (such as unavailable videos in playlists) 'cachedir': False, # Disable on-disk caching 'addmetadata': True, # Embed metadata during postprocessing where available + 'geo_verification_proxy': getenv('geo_verification_proxy'), } COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt' From ed46bf96f88b1a17967dd7b605ffe5264247c547 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 11:21:55 -0400 Subject: [PATCH 214/417] Set to `None` for empty strings --- tubesync/tubesync/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index f00eceaa..fc309b28 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -172,7 +172,7 @@ YOUTUBE_DEFAULTS = { 'ignoreerrors': True, # Skip on errors (such as unavailable videos in playlists) 'cachedir': False, # Disable on-disk caching 'addmetadata': True, # Embed metadata during postprocessing where available - 'geo_verification_proxy': getenv('geo_verification_proxy'), + 'geo_verification_proxy': getenv('geo_verification_proxy').strip() or None, } COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt' From 8dc2455bfaaa6ca8c3ab2bed8b9d1108988e2384 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 11:53:58 -0400 Subject: [PATCH 215/417] Use the `tubesync-base`stage --- Dockerfile | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 96c10a9f..197c3948 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,36 @@ ARG FFMPEG_CHECKSUM_ALGORITHM="sha256" ARG S6_CHECKSUM_ALGORITHM="sha256" +FROM debian:${DEBIAN_VERSION} AS tubesync-base + +ARG TARGETARCH + +ENV DEBIAN_FRONTEND="noninteractive" \ + HOME="/root" \ + LANGUAGE="en_US.UTF-8" \ + LANG="en_US.UTF-8" \ + LC_ALL="en_US.UTF-8" \ + TERM="xterm" \ + # Do not include compiled byte-code + PIP_NO_COMPILE=1 \ + PIP_ROOT_USER_ACTION='ignore' + +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ + # to be careful, ensure that these files aren't from a different architecture + rm -f /var/cache/apt/*cache.bin ; \ + # Update from the network and keep cache + rm -f /etc/apt/apt.conf.d/docker-clean ; \ + set -x && \ + apt-get update && \ + # Install locales + apt-get -y --no-install-recommends install locales && \ + printf -- "en_US.UTF-8 UTF-8\n" > /etc/locale.gen && \ + locale-gen en_US.UTF-8 && \ + # Clean up + apt-get -y autopurge && \ + apt-get -y autoclean + FROM alpine:${ALPINE_VERSION} AS ffmpeg-download ARG FFMPEG_DATE ARG FFMPEG_VERSION @@ -218,24 +248,13 @@ RUN set -eu ; \ FROM scratch AS s6-overlay COPY --from=s6-overlay-extracted /s6-overlay-rootfs / -FROM debian:${DEBIAN_VERSION} AS tubesync +FROM tubesync-base AS tubesync ARG S6_VERSION ARG FFMPEG_DATE ARG FFMPEG_VERSION -ENV DEBIAN_FRONTEND="noninteractive" \ - HOME="/root" \ - LANGUAGE="en_US.UTF-8" \ - LANG="en_US.UTF-8" \ - LC_ALL="en_US.UTF-8" \ - TERM="xterm" \ - # Do not include compiled byte-code - PIP_NO_COMPILE=1 \ - PIP_ROOT_USER_ACTION='ignore' \ - S6_CMD_WAIT_FOR_SERVICES_MAXTIME="0" - ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ FFMPEG_VERSION="${FFMPEG_VERSION}" @@ -388,6 +407,7 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck # ENVS and ports ENV PYTHONPATH="/app" \ PYTHONPYCACHEPREFIX="/config/cache/pycache" \ + S6_CMD_WAIT_FOR_SERVICES_MAXTIME="0" \ XDG_CACHE_HOME="/config/cache" EXPOSE 4848 From 9021411037b59e08b3cb731156521850f34bffe0 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 12:24:43 -0400 Subject: [PATCH 216/417] Reorganize layers for better caching --- Dockerfile | 60 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/Dockerfile b/Dockerfile index 197c3948..4d001162 100644 --- a/Dockerfile +++ b/Dockerfile @@ -255,34 +255,17 @@ ARG S6_VERSION ARG FFMPEG_DATE ARG FFMPEG_VERSION +ARG TARGETARCH + ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ FFMPEG_VERSION="${FFMPEG_VERSION}" -# Install third party software -COPY --from=s6-overlay / / -COPY --from=ffmpeg /usr/local/bin/ /usr/local/bin/ - # Reminder: the SHELL handles all variables -RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=locked,target=/var/cache/apt \ +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ set -x && \ - # Update from the network and keep cache - rm -f /etc/apt/apt.conf.d/docker-clean && \ apt-get update && \ - # Install locales - apt-get -y --no-install-recommends install locales && \ - printf -- "en_US.UTF-8 UTF-8\n" > /etc/locale.gen && \ - locale-gen en_US.UTF-8 && \ - # Install file - apt-get -y --no-install-recommends install file && \ - # Installed s6 (using COPY earlier) - file -L /command/s6-overlay-suexec && \ - # Installed ffmpeg (using COPY earlier) - /usr/local/bin/ffmpeg -version && \ - file /usr/local/bin/ff* && \ - # Clean up file - apt-get -y autoremove --purge file && \ # Install dependencies we keep # Install required distro packages apt-get -y --no-install-recommends install \ @@ -302,10 +285,34 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ && \ # Link to the current python3 version ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ + # Create a 'app' user which the application will run as + groupadd app && \ + useradd -M -d /app -s /bin/false -g app app && \ # Clean up apt-get -y autopurge && \ apt-get -y autoclean && \ - rm -rf /tmp/* + rm -v -rf /tmp/* + +# Install third party software +COPY --from=s6-overlay / / +COPY --from=ffmpeg /usr/local/bin/ /usr/local/bin/ + +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ + set -x && \ + apt-get update && \ + # Install file + apt-get -y --no-install-recommends install file && \ + # Installed s6 (using COPY earlier) + file -L /command/s6-overlay-suexec && \ + # Installed ffmpeg (using COPY earlier) + /usr/local/bin/ffmpeg -version && \ + file /usr/local/bin/ff* && \ + # Clean up file + apt-get -y autoremove --purge file && \ + # Clean up + apt-get -y autopurge && \ + apt-get -y autoclean # Copy over pip.conf to use piwheels COPY pip.conf /etc/pip.conf @@ -316,12 +323,10 @@ WORKDIR /app # Set up the app RUN --mount=type=tmpfs,target=/cache \ --mount=type=cache,id=pipenv-cache,sharing=locked,target=/cache/pipenv \ - --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=locked,target=/var/cache/apt \ + --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ --mount=type=bind,source=Pipfile,target=/app/Pipfile \ set -x && \ - # Update from the network and keep cache - rm -f /etc/apt/apt.conf.d/docker-clean && \ apt-get update && \ # Install required build packages apt-get -y --no-install-recommends install \ @@ -337,9 +342,6 @@ RUN --mount=type=tmpfs,target=/cache \ python3-pip \ zlib1g-dev \ && \ - # Create a 'app' user which the application will run as - groupadd app && \ - useradd -M -d /app -s /bin/false -g app app && \ # Install non-distro packages cp -at /tmp/ "${HOME}" && \ HOME="/tmp/${HOME#/}" \ From 5383d579461b0b026bce1c4fc954d89c402b4ce5 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 12:40:43 -0400 Subject: [PATCH 217/417] Remove an unnecessary `rm` --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4d001162..55d927ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -290,8 +290,7 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va useradd -M -d /app -s /bin/false -g app app && \ # Clean up apt-get -y autopurge && \ - apt-get -y autoclean && \ - rm -v -rf /tmp/* + apt-get -y autoclean # Install third party software COPY --from=s6-overlay / / From 40e4d98ef4cced9f7529677dd386c02a838fe1c6 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 15:57:44 -0400 Subject: [PATCH 218/417] Use the `curl-cffi` extra to support `impersonate` option --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index 3c29cafb..adab344d 100644 --- a/Pipfile +++ b/Pipfile @@ -20,7 +20,7 @@ mysqlclient = "*" PySocks = "*" urllib3 = {extras = ["socks"], version = "*"} requests = {extras = ["socks"], version = "*"} -yt-dlp = "*" +yt-dlp = {extras = ["curl-cffi"], version = "*"} emoji = "*" brotli = "*" html5lib = "*" From 7ff1fa3e0c056199598418b87361d12bca66a164 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 16:40:18 -0400 Subject: [PATCH 219/417] Add the `default` extra too --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index adab344d..cedab5cf 100644 --- a/Pipfile +++ b/Pipfile @@ -20,7 +20,7 @@ mysqlclient = "*" PySocks = "*" urllib3 = {extras = ["socks"], version = "*"} requests = {extras = ["socks"], version = "*"} -yt-dlp = {extras = ["curl-cffi"], version = "*"} +yt-dlp = {extras = ["default", "curl-cffi"], version = "*"} emoji = "*" brotli = "*" html5lib = "*" From ec96a86c07f746755e581fafe49d286ca1f83fb7 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 20:31:09 -0400 Subject: [PATCH 220/417] Refresh formats only after metadata was saved --- tubesync/sync/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d004362a..0f2bffcd 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -565,7 +565,8 @@ def download_media(media_id): f'expected outfile does not exist: {filepath}') log.error(err) # Try refreshing formats - media.refresh_formats + if media.has_metadata: + media.refresh_formats # Raising an error here triggers the task to be re-attempted (or fail) raise DownloadFailedException(err) From ec45f29e1d30bd147954e966a6fafa5b46ddc909 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 21:05:39 -0400 Subject: [PATCH 221/417] Use smaller transactions --- tubesync/sync/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3bb6a329..702086fe 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -179,7 +179,6 @@ def cleanup_removed_media(source, videos): @background(schedule=300, remove_existing_tasks=True) -@atomic(durable=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -210,9 +209,6 @@ def index_source_task(source_id): verbose_name = task.verbose_name tvn_format = '[{}' + f'/{num_videos}] {verbose_name}' for vn, video in enumerate(videos, start=1): - if task: - task.verbose_name = tvn_format.format(vn) - task.save(update_fields={'verbose_name'}) # Create or update each video as a Media object key = video.get(source.key_field, None) if not key: @@ -229,8 +225,12 @@ def index_source_task(source_id): published_dt = media.metadata_published(timestamp) if published_dt is not None: media.published = published_dt + if task: + task.verbose_name = tvn_format.format(vn) try: with atomic(): + if task: + task.save(update_fields={'verbose_name'}) media.save() log.debug(f'Indexed media: {source} / {media}') # log the new media instances From f69eed6b44ceca1423ba2b1a106a5f8b785eb6e1 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 15 Mar 2025 21:55:33 -0400 Subject: [PATCH 222/417] Base request delay on the number of tasks running --- tubesync/sync/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index d6419da0..66ca3a08 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -169,7 +169,7 @@ def get_media_info(url, days=None): 'youtubetab': {'approximate_date': ['true']}, }, 'paths': paths, - 'sleep_interval_requests': 2, + 'sleep_interval_requests': 2 * settings.BACKGROUND_TASK_ASYNC_THREADS, 'verbose': True if settings.DEBUG else False, }) if start: @@ -281,7 +281,7 @@ def download_media( 'overwrites': None, 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20), 'max_sleep_interval': settings.DOWNLOAD_MEDIA_DELAY, - 'sleep_interval_requests': 5, + 'sleep_interval_requests': 1 + (2 * settings.BACKGROUND_TASK_ASYNC_THREADS), 'paths': opts.get('paths', dict()), 'postprocessor_args': opts.get('postprocessor_args', dict()), 'postprocessor_hooks': opts.get('postprocessor_hooks', list()), From ddf0de3c7186910661f57d6f0757792960d17a9b Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 16 Mar 2025 08:57:04 -0400 Subject: [PATCH 223/417] Limit metadata refresh attempts --- tubesync/sync/models.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 176b69ee..a2da8d5b 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -800,6 +800,7 @@ class Media(models.Model): if self.created and self.downloaded and not self.media_file_exists: fp_list = list((self.filepath,)) if self.media_file: + # Try the new computed directory + the file base name from the database fp_list.append(self.filepath.parent / Path(self.media_file.path).name) for filepath in fp_list: if filepath.exists(): @@ -813,8 +814,9 @@ class Media(models.Model): update_fields = {'media_file', 'skip'}.union(update_fields) # Trigger an update of derived fields from metadata - if self.metadata: + if update_fields is None or 'metadata' in update_fields: setattr(self, '_cached_metadata_dict', None) + if self.metadata: self.title = self.metadata_title[:200] self.duration = self.metadata_duration if update_fields is not None and "metadata" in update_fields: @@ -1077,6 +1079,17 @@ class Media(models.Model): return self.metadata is not None + def save_to_metadata(self, key, value, /): + data = self.loaded_metadata + data[key] = value + from common.utils import json_serial + compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) + self.metadata = compact_json + self.save(update_fields={'metadata'}) + from common.logger import log + log.debug(f'Saved to metadata: {self.key} / {self.uuid}: {key=}: {value}') + + @property def reduce_data(self): now = timezone.now() @@ -1136,18 +1149,32 @@ class Media(models.Model): @property def refresh_formats(self): + if not self.has_metadata: + return data = self.loaded_metadata metadata_seconds = data.get('epoch', None) if not metadata_seconds: self.metadata = None + self.save(update_fields={'metadata'}) return False now = timezone.now() - formats_seconds = data.get('formats_epoch', metadata_seconds) + attempted_key = '_refresh_formats_attempted' + attempted_seconds = data.get(attempted_key) + if attempted_seconds: + # skip for recent unsuccessful refresh attempts also + attempted_dt = self.metadata_published(attempted_seconds) + if (now - attempted_dt) < timedelta(seconds=self.source.index_schedule): + return False + # skip for recent successful formats refresh + refreshed_key = 'formats_epoch' + formats_seconds = data.get(refreshed_key, metadata_seconds) metadata_dt = self.metadata_published(formats_seconds) if (now - metadata_dt) < timedelta(seconds=self.source.index_schedule): return False + last_attempt = round((now - self.posix_epoch).total_seconds()) + self.save_to_metadata(attempted_key, last_attempt) self.skip = False metadata = self.index_metadata() if self.skip: @@ -1158,14 +1185,10 @@ class Media(models.Model): response = filter_response(metadata, True) field = self.get_metadata_field('formats') - data[field] = response.get(field, []) + self.save_to_metadata(field, response.get(field, [])) + self.save_to_metadata(refreshed_key, response.get('epoch', formats_seconds)) if data.get('availability', 'public') != response.get('availability', 'public'): - data['availability'] = response.get('availability', 'public') - data['formats_epoch'] = response.get('epoch', formats_seconds) - - from common.utils import json_serial - compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) - self.metadata = compact_json + self.save_to_metadata('availability', response.get('availability', 'public')) return True From 3d4d965880d74f2725b559c2a92cf7042bcbd4e1 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 16 Mar 2025 11:39:57 -0400 Subject: [PATCH 224/417] Create fatal_http_errors.py This is a sketch of how a new patch is implemented. It doesn't do anything yet. --- patches/yt_dlp/patch/fatal_http_errors.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 patches/yt_dlp/patch/fatal_http_errors.py diff --git a/patches/yt_dlp/patch/fatal_http_errors.py b/patches/yt_dlp/patch/fatal_http_errors.py new file mode 100644 index 00000000..6eec650c --- /dev/null +++ b/patches/yt_dlp/patch/fatal_http_errors.py @@ -0,0 +1,11 @@ +from yt_dlp.extractor.youtube import YoutubeIE + + +class PatchedYoutubeIE(YoutubeIE): + + def FUNC(self): + pass + + +#YoutubeIE.__unpatched__FUNC = YoutubeIE.FUNC +#YoutubeIE.FUNC = PatchedYoutubeIE.FUNC From 217c0898daf85bd8942f3cd3ff64333cf50a19b0 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 16 Mar 2025 11:47:32 -0400 Subject: [PATCH 225/417] Fill in the original function we will patch --- patches/yt_dlp/patch/fatal_http_errors.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/patches/yt_dlp/patch/fatal_http_errors.py b/patches/yt_dlp/patch/fatal_http_errors.py index 6eec650c..45d09b2d 100644 --- a/patches/yt_dlp/patch/fatal_http_errors.py +++ b/patches/yt_dlp/patch/fatal_http_errors.py @@ -3,9 +3,23 @@ from yt_dlp.extractor.youtube import YoutubeIE class PatchedYoutubeIE(YoutubeIE): - def FUNC(self): - pass + def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): + webpage = None + if 'webpage' not in self._configuration_arg('player_skip'): + query = {'bpctr': '9999999999', 'has_verified': '1'} + pp = self._configuration_arg('player_params', [None], casesense=True)[0] + if pp: + query['pp'] = pp + webpage = self._download_webpage_with_retries(webpage_url, video_id, query=query) + + master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() + + player_responses, player_url = self._extract_player_responses( + self._get_requested_clients(url, smuggled_data), + video_id, webpage, master_ytcfg, smuggled_data) + + return webpage, master_ytcfg, player_responses, player_url -#YoutubeIE.__unpatched__FUNC = YoutubeIE.FUNC -#YoutubeIE.FUNC = PatchedYoutubeIE.FUNC +YoutubeIE.__unpatched___download_player_responses = YoutubeIE._download_player_responses +YoutubeIE._download_player_responses = PatchedYoutubeIE._download_player_responses From b3b4007dcca70c4784c39874aa771a3faae0ce33 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 16 Mar 2025 11:50:17 -0400 Subject: [PATCH 226/417] Change the patched function --- patches/yt_dlp/patch/fatal_http_errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/yt_dlp/patch/fatal_http_errors.py b/patches/yt_dlp/patch/fatal_http_errors.py index 45d09b2d..442db436 100644 --- a/patches/yt_dlp/patch/fatal_http_errors.py +++ b/patches/yt_dlp/patch/fatal_http_errors.py @@ -10,7 +10,7 @@ class PatchedYoutubeIE(YoutubeIE): pp = self._configuration_arg('player_params', [None], casesense=True)[0] if pp: query['pp'] = pp - webpage = self._download_webpage_with_retries(webpage_url, video_id, query=query) + webpage = self._download_webpage_with_retries(webpage_url, video_id, retry_fatal=True, query=query) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() From 5401407e6dfa821be33580b31f76b49b9c047e4b Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 16 Mar 2025 11:54:01 -0400 Subject: [PATCH 227/417] Apply the patch in youtube.py --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index d6419da0..a1a53683 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -18,6 +18,7 @@ from .hooks import postprocessor_hook, progress_hook from .utils import mkdir_p import yt_dlp import yt_dlp.patch.check_thumbnails +import yt_dlp.patch.fatal_http_errors from yt_dlp.utils import remove_end From c2b21c016e2d1815c9c33769482202337a0660f8 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 16 Mar 2025 15:17:11 -0400 Subject: [PATCH 228/417] Prevent `filename` from beginning with `/` --- tubesync/sync/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 176b69ee..5d36191e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1276,7 +1276,8 @@ class Media(models.Model): # Create a suitable filename from the source media_format media_format = str(self.source.media_format) media_details = self.format_dict - return media_format.format(**media_details) + result = media_format.format(**media_details) + return '.' + result if '/' == result[0] else result @property def directory_path(self): From 5df8500a10a31cd3de3886818a77c3ff6e49753b Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 16 Mar 2025 17:57:07 -0400 Subject: [PATCH 229/417] Upgrade `psycopg` --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index 3c29cafb..14bb0ccb 100644 --- a/Pipfile +++ b/Pipfile @@ -15,7 +15,7 @@ gunicorn = "*" httptools = "*" django-background-tasks = ">=1.2.8" django-basicauth = "*" -psycopg2-binary = "*" +psycopg = {extras = ["binary", "pool"], version = "*"} mysqlclient = "*" PySocks = "*" urllib3 = {extras = ["socks"], version = "*"} From deed3a339383837ed85542460359073c5de29d7c Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Mar 2025 09:50:15 -0400 Subject: [PATCH 230/417] Reduce the amount of time the database is locked --- tubesync/sync/tasks.py | 88 +++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d004362a..7e7a0f0f 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -181,7 +181,6 @@ def cleanup_removed_media(source, videos): @background(schedule=300, remove_existing_tasks=True) -@atomic(durable=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -206,51 +205,54 @@ def index_source_task(source_id): source.save() log.info(f'Found {len(videos)} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) - for video in videos: - # Create or update each video as a Media object - key = video.get(source.key_field, None) - if not key: - # Video has no unique key (ID), it can't be indexed - continue - try: - media = Media.objects.get(key=key, source=source) - except Media.DoesNotExist: - media = Media(key=key) - media.source = source - media.duration = float(video.get(fields('duration', media), None) or 0) or None - media.title = str(video.get(fields('title', media), ''))[:200] - timestamp = video.get(fields('timestamp', media), None) - published_dt = media.metadata_published(timestamp) - if published_dt is not None: - media.published = published_dt - try: - with atomic(): - media.save() - log.debug(f'Indexed media: {source} / {media}') - # log the new media instances - new_media_instance = ( - media.created and - source.last_crawl and - media.created >= source.last_crawl - ) - if new_media_instance: - log.info(f'Indexed new media: {source} / {media}') - log.info(f'Scheduling task to download metadata for: {media.url}') - verbose_name = _('Downloading metadata for "{}"') - download_media_metadata( - str(media.pk), - priority=9, - verbose_name=verbose_name.format(media.pk), + with atomic(durable=True): + for video in videos: + # Create or update each video as a Media object + key = video.get(source.key_field, None) + if not key: + # Video has no unique key (ID), it can't be indexed + continue + try: + media = Media.objects.get(key=key, source=source) + except Media.DoesNotExist: + media = Media(key=key) + media.source = source + media.duration = float(video.get(fields('duration', media), None) or 0) or None + media.title = str(video.get(fields('title', media), ''))[:200] + timestamp = video.get(fields('timestamp', media), None) + published_dt = media.metadata_published(timestamp) + if published_dt is not None: + media.published = published_dt + try: + with atomic(): + media.save() + except IntegrityError as e: + log.error(f'Index media failed: {source} / {media} with "{e}"') + else: + log.debug(f'Indexed media: {source} / {media}') + # log the new media instances + new_media_instance = ( + media.created and + source.last_crawl and + media.created >= source.last_crawl ) - except IntegrityError as e: - log.error(f'Index media failed: {source} / {media} with "{e}"') + if new_media_instance: + log.info(f'Indexed new media: {source} / {media}') + log.info(f'Scheduling task to download metadata for: {media.url}') + verbose_name = _('Downloading metadata for "{}"') + download_media_metadata( + str(media.pk), + priority=20, + verbose_name=verbose_name.format(media.pk), + ) # Tack on a cleanup of old completed tasks cleanup_completed_tasks() - # Tack on a cleanup of old media - cleanup_old_media() - if source.delete_removed_media: - log.info(f'Cleaning up media no longer in source: {source}') - cleanup_removed_media(source, videos) + with atomic(durable=True): + # Tack on a cleanup of old media + cleanup_old_media() + if source.delete_removed_media: + log.info(f'Cleaning up media no longer in source: {source}') + cleanup_removed_media(source, videos) @background(schedule=0) From ac80c6ce678ce154c023dd6d94973dde0213ea9b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Mar 2025 11:37:21 -0400 Subject: [PATCH 231/417] Metadata at priority 20 --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 666155e2..77e5686e 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -254,7 +254,7 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name = _('Downloading metadata for "{}"') download_media_metadata( str(instance.pk), - priority=10, + priority=20, verbose_name=verbose_name.format(instance.pk), remove_existing_tasks=True ) From 8eec7320db674569ac15967a782f4b802a8575a1 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Mar 2025 11:43:00 -0400 Subject: [PATCH 232/417] Reschedule download tasks when metadata is not ready --- tubesync/sync/tasks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 7e7a0f0f..b15e4c71 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -422,6 +422,8 @@ def download_media_thumbnail(media_id, url): except Media.DoesNotExist: # Task triggered but the media no longer exists, do nothing return + if not media.has_metadata: + raise DownloadFailedException('Metadata is not yet available.') if media.skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' @@ -459,6 +461,8 @@ def download_media(media_id): except Media.DoesNotExist: # Task triggered but the media no longer exists, do nothing return + if not media.has_metadata: + raise DownloadFailedException('Metadata is not yet available.') if media.skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' From 5f0323e4af7fbfb30a3b4db2a24eed3440ceecdc Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Mar 2025 12:21:40 -0400 Subject: [PATCH 233/417] Add `NoMetadataException` --- tubesync/common/errors.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tubesync/common/errors.py b/tubesync/common/errors.py index 130510a7..87d8aa4d 100644 --- a/tubesync/common/errors.py +++ b/tubesync/common/errors.py @@ -14,6 +14,14 @@ class NoFormatException(Exception): pass +class NoMetadataException(Exception): + ''' + Raised when a media item is attempted to be downloaded but it has no valid + metadata. + ''' + pass + + class DownloadFailedException(Exception): ''' Raised when a downloaded media file is expected to be present, but doesn't From b074f6bae9e8de4e78a08ccb6bbd63b714dfb614 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Mar 2025 12:24:16 -0400 Subject: [PATCH 234/417] Switch to `NoMetadataException` --- tubesync/sync/tasks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index b15e4c71..dfec330d 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -23,7 +23,7 @@ from django.utils.translation import gettext_lazy as _ from background_task import background from background_task.models import Task, CompletedTask from common.logger import log -from common.errors import NoMediaException, DownloadFailedException +from common.errors import NoMediaException, NoMetadataException, DownloadFailedException from common.utils import json_serial from .models import Source, Media, MediaServer from .utils import (get_remote_image, resize_image_to_height, delete_file, @@ -423,7 +423,7 @@ def download_media_thumbnail(media_id, url): # Task triggered but the media no longer exists, do nothing return if not media.has_metadata: - raise DownloadFailedException('Metadata is not yet available.') + raise NoMetadataException('Metadata is not yet available.') if media.skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' @@ -462,7 +462,7 @@ def download_media(media_id): # Task triggered but the media no longer exists, do nothing return if not media.has_metadata: - raise DownloadFailedException('Metadata is not yet available.') + raise NoMetadataException('Metadata is not yet available.') if media.skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' From 4bc510bdad1a694c960d1e0e39fb13c06db86af1 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 09:31:43 -0400 Subject: [PATCH 235/417] The workflow should work for forks also --- .github/workflows/ci.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6068cab1..d59fb9f5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,7 +34,10 @@ jobs: - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 containerise: + if: ${{ always() }} + needs: test runs-on: ubuntu-latest + timeout-minutes: 120 steps: - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -42,13 +45,18 @@ jobs: id: buildx uses: docker/setup-buildx-action@v3 - name: Log into GitHub Container Registry - run: echo "${{ secrets.REGISTRY_ACCESS_TOKEN }}" | docker login https://ghcr.io -u ${{ github.actor }} --password-stdin + env: + DOCKER_REGISTRY: https://ghcr.io + DOCKER_USERNAME: ${{ github.actor }} + DOCKER_TOKEN: ${{ 'meeb' == github.repository_owner && secrets.REGISTRY_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} + run: echo '${{ env.DOCKER_TOKEN }}' | docker login --password-stdin --username '${{ env.DOCKER_USERNAME }}' '${{ env.DOCKER_REGISTRY }}' - name: Lowercase github username for ghcr id: string uses: ASzc/change-string-case-action@v6 with: string: ${{ github.actor }} - name: Build and push + timeout-minutes: 60 uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 From c1cc71691af9cc20879b5edb7312583e3c67ec5e Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 11:23:53 -0400 Subject: [PATCH 236/417] Use shell variables --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d59fb9f5..eb31f519 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -49,7 +49,7 @@ jobs: DOCKER_REGISTRY: https://ghcr.io DOCKER_USERNAME: ${{ github.actor }} DOCKER_TOKEN: ${{ 'meeb' == github.repository_owner && secrets.REGISTRY_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - run: echo '${{ env.DOCKER_TOKEN }}' | docker login --password-stdin --username '${{ env.DOCKER_USERNAME }}' '${{ env.DOCKER_REGISTRY }}' + run: echo "${DOCKER_TOKEN}" | docker login --password-stdin --username "${DOCKER_USERNAME}" "${DOCKER_REGISTRY}" - name: Lowercase github username for ghcr id: string uses: ASzc/change-string-case-action@v6 From 85fb479c5ead27fcb256ea7a9e45658ee2bcf716 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 17:22:27 -0400 Subject: [PATCH 237/417] Better indexing of inactive sources --- tubesync/sync/tasks.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index dfec330d..4d9d4c7c 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -190,6 +190,23 @@ def index_source_task(source_id): except Source.DoesNotExist: # Task triggered but the Source has been deleted, delete the task return + # An inactive Source would return an empty list for videos anyway + if not source.is_active: + cleanup_completed_tasks() + # deleting expired media should still happen when an index task is requested + with atomic(durable=True): + cleanup_old_media() + # Schedule a task to update media servers + log.info(f'Scheduling media server updates') + verbose_name = _('Request media server rescan for "{}"') + for mediaserver in MediaServer.objects.all(): + rescan_media_server( + str(mediaserver.pk), + priority=30, + verbose_name=verbose_name.format(mediaserver), + remove_existing_tasks=True, + ) + return # Reset any errors source.has_failed = False source.save() @@ -254,6 +271,17 @@ def index_source_task(source_id): log.info(f'Cleaning up media no longer in source: {source}') cleanup_removed_media(source, videos) + # Schedule a task to update media servers + log.info(f'Scheduling media server updates') + verbose_name = _('Request media server rescan for "{}"') + for mediaserver in MediaServer.objects.all(): + rescan_media_server( + str(mediaserver.pk), + priority=30, + verbose_name=verbose_name.format(mediaserver), + remove_existing_tasks=True, + ) + @background(schedule=0) def check_source_directory_exists(source_id): From 34eea62c847daa4c5710910090b99a9114368009 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 17:33:28 -0400 Subject: [PATCH 238/417] Don't log inside the loop --- tubesync/sync/signals.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 77e5686e..9284ea97 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -377,14 +377,13 @@ def media_post_delete(sender, instance, **kwargs): if not instance.source.is_active: return # Schedule a task to update media servers + log.info(f'Scheduling media server updates') + verbose_name = _('Request media server rescan for "{}"') for mediaserver in MediaServer.objects.all(): - log.info(f'Scheduling media server updates') - verbose_name = _('Request media server rescan for "{}"') rescan_media_server( str(mediaserver.pk), - schedule=5, - priority=0, + priority=30, verbose_name=verbose_name.format(mediaserver), - remove_existing_tasks=True + remove_existing_tasks=True, ) From 7e721c98a7ffbed8d4be12f3103b72220b4ee31c Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 17:42:06 -0400 Subject: [PATCH 239/417] Don't update media servers for every Media item --- tubesync/sync/signals.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 9284ea97..be848a0a 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -374,16 +374,3 @@ def media_post_delete(sender, instance, **kwargs): log.info(f'Deleting file for: {instance} path: {file}') delete_file(file) - if not instance.source.is_active: - return - # Schedule a task to update media servers - log.info(f'Scheduling media server updates') - verbose_name = _('Request media server rescan for "{}"') - for mediaserver in MediaServer.objects.all(): - rescan_media_server( - str(mediaserver.pk), - priority=30, - verbose_name=verbose_name.format(mediaserver), - remove_existing_tasks=True, - ) - From 17b82d426472e7f621b5fff732a7ba333b94f2c1 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 18:13:24 -0400 Subject: [PATCH 240/417] Schedule update of media servers after deletion loops --- tubesync/sync/tasks.py | 88 ++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 4d9d4c7c..d502f904 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -160,24 +160,47 @@ def cleanup_completed_tasks(): CompletedTask.objects.filter(run_at__lt=delta).delete() +def schedule_media_servers_update(): + with atomic(): + # Schedule a task to update media servers + log.info(f'Scheduling media server updates') + verbose_name = _('Request media server rescan for "{}"') + for mediaserver in MediaServer.objects.all(): + rescan_media_server( + str(mediaserver.pk), + priority=30, + verbose_name=verbose_name.format(mediaserver), + remove_existing_tasks=True, + ) + + def cleanup_old_media(): - for source in Source.objects.filter(delete_old_media=True, days_to_keep__gt=0): - delta = timezone.now() - timedelta(days=source.days_to_keep) - for media in source.media_source.filter(downloaded=True, download_date__lt=delta): - log.info(f'Deleting expired media: {source} / {media} ' - f'(now older than {source.days_to_keep} days / ' - f'download_date before {delta})') - # .delete() also triggers a pre_delete signal that removes the files - media.delete() + with atomic(): + for source in Source.objects.filter(delete_old_media=True, days_to_keep__gt=0): + delta = timezone.now() - timedelta(days=source.days_to_keep) + for media in source.media_source.filter(downloaded=True, download_date__lt=delta): + log.info(f'Deleting expired media: {source} / {media} ' + f'(now older than {source.days_to_keep} days / ' + f'download_date before {delta})') + with atomic(): + # .delete() also triggers a pre_delete/post_delete signals that remove files + media.delete() + schedule_media_servers_update() def cleanup_removed_media(source, videos): - media_objects = Media.objects.filter(source=source) - for media in media_objects: - matching_source_item = [video['id'] for video in videos if video['id'] == media.key] - if not matching_source_item: - log.info(f'{media.name} is no longer in source, removing') - media.delete() + if not source.delete_removed_media: + return + log.info(f'Cleaning up media no longer in source: {source}') + with atomic(durable=True): + media_objects = Media.objects.filter(source=source) + for media in media_objects: + matching_source_item = [video['id'] for video in videos if video['id'] == media.key] + if not matching_source_item: + log.info(f'{media.name} is no longer in source, removing') + with atomic(): + media.delete() + schedule_media_servers_update() @background(schedule=300, remove_existing_tasks=True) @@ -185,6 +208,7 @@ def index_source_task(source_id): ''' Indexes media available from a Source object. ''' + cleanup_completed_tasks() try: source = Source.objects.get(pk=source_id) except Source.DoesNotExist: @@ -192,20 +216,8 @@ def index_source_task(source_id): return # An inactive Source would return an empty list for videos anyway if not source.is_active: - cleanup_completed_tasks() # deleting expired media should still happen when an index task is requested - with atomic(durable=True): - cleanup_old_media() - # Schedule a task to update media servers - log.info(f'Scheduling media server updates') - verbose_name = _('Request media server rescan for "{}"') - for mediaserver in MediaServer.objects.all(): - rescan_media_server( - str(mediaserver.pk), - priority=30, - verbose_name=verbose_name.format(mediaserver), - remove_existing_tasks=True, - ) + cleanup_old_media() return # Reset any errors source.has_failed = False @@ -262,25 +274,9 @@ def index_source_task(source_id): priority=20, verbose_name=verbose_name.format(media.pk), ) - # Tack on a cleanup of old completed tasks - cleanup_completed_tasks() - with atomic(durable=True): - # Tack on a cleanup of old media - cleanup_old_media() - if source.delete_removed_media: - log.info(f'Cleaning up media no longer in source: {source}') - cleanup_removed_media(source, videos) - - # Schedule a task to update media servers - log.info(f'Scheduling media server updates') - verbose_name = _('Request media server rescan for "{}"') - for mediaserver in MediaServer.objects.all(): - rescan_media_server( - str(mediaserver.pk), - priority=30, - verbose_name=verbose_name.format(mediaserver), - remove_existing_tasks=True, - ) + # Cleanup of old downloaded media and media no longer available from the source + cleanup_old_media() + cleanup_removed_media(source, videos) @background(schedule=0) From 8f9fbb9a4cb856c994f1ea63789ef9d2b366a82a Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 18:25:31 -0400 Subject: [PATCH 241/417] Call `cleanup_removed_media` from within the transaction --- tubesync/sync/tasks.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d502f904..b3850a32 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -192,14 +192,13 @@ def cleanup_removed_media(source, videos): if not source.delete_removed_media: return log.info(f'Cleaning up media no longer in source: {source}') - with atomic(durable=True): - media_objects = Media.objects.filter(source=source) - for media in media_objects: - matching_source_item = [video['id'] for video in videos if video['id'] == media.key] - if not matching_source_item: - log.info(f'{media.name} is no longer in source, removing') - with atomic(): - media.delete() + media_objects = Media.objects.filter(source=source) + for media in media_objects: + matching_source_item = [video['id'] for video in videos if video['id'] == media.key] + if not matching_source_item: + log.info(f'{media.name} is no longer in source, removing') + with atomic(): + media.delete() schedule_media_servers_update() @@ -209,6 +208,8 @@ def index_source_task(source_id): Indexes media available from a Source object. ''' cleanup_completed_tasks() + # deleting expired media should happen any time an index task is requested + cleanup_old_media() try: source = Source.objects.get(pk=source_id) except Source.DoesNotExist: @@ -216,8 +217,6 @@ def index_source_task(source_id): return # An inactive Source would return an empty list for videos anyway if not source.is_active: - # deleting expired media should still happen when an index task is requested - cleanup_old_media() return # Reset any errors source.has_failed = False @@ -274,9 +273,8 @@ def index_source_task(source_id): priority=20, verbose_name=verbose_name.format(media.pk), ) - # Cleanup of old downloaded media and media no longer available from the source - cleanup_old_media() - cleanup_removed_media(source, videos) + # Cleanup of media no longer available from the source + cleanup_removed_media(source, videos) @background(schedule=0) From 021f4b172ae0971712ca6661445813f32ab5254f Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 20:05:14 -0400 Subject: [PATCH 242/417] Display progress for checking task --- tubesync/sync/tasks.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 702086fe..d89d3f66 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -132,6 +132,9 @@ def get_media_metadata_task(media_id): def get_media_premiere_task(media_id): return get_first_task('sync.tasks.wait_for_media_premiere', media_id) +def get_source_check_task(source_id): + return get_first_task('sync.tasks.save_all_media_for_source', source_id) + def get_source_index_task(source_id): return get_first_task('sync.tasks.index_source_task', source_id) @@ -605,6 +608,7 @@ def save_all_media_for_source(source_id): already_saved = set() mqs = Media.objects.filter(source=source) + task = get_source_check_task(source_id) refresh_qs = mqs.filter( can_download=False, skip=False, @@ -612,22 +616,40 @@ def save_all_media_for_source(source_id): downloaded=False, metadata__isnull=False, ) - for media in refresh_qs: + if task: + verbose_name = task.verbose_name + tvn_format = '[{}' + f'/{refresh_qs.count()}] {verbose_name}' + for mn, media in enumerate(refresh_qs, start=1): + if task: + task.verbose_name = tvn_format.format(mn) + with atomic(): + task.save(update_fields={'verbose_name'}) try: media.refresh_formats except YouTubeError as e: log.debug(f'Failed to refresh formats for: {source} / {media.key}: {e!s}') pass else: - media.save() + with atomic(): + media.save() already_saved.add(media.uuid) # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated - with atomic(): - for media in mqs: + if task: + tvn_format = '[{}' + f'/{mqs.count()}] {verbose_name}' + for mn, media in enumerate(mqs, start=1): + if task: + task.verbose_name = tvn_format.format(mn) + with atomic(): + task.save(update_fields={'verbose_name'}) if media.uuid not in already_saved: - media.save() + with atomic(): + media.save() + if task: + task.verbose_name = verbose_name + with atomic(): + task.save(update_fields={'verbose_name'}) @background(schedule=60, remove_existing_tasks=True) From d2458a297965428729cd876db3803953bec0dbce Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 20:12:54 -0400 Subject: [PATCH 243/417] Keep transactions specific to task --- tubesync/sync/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d89d3f66..fd5d1800 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -230,11 +230,10 @@ def index_source_task(source_id): media.published = published_dt if task: task.verbose_name = tvn_format.format(vn) - try: with atomic(): - if task: task.save(update_fields={'verbose_name'}) - media.save() + try: + media.save() log.debug(f'Indexed media: {source} / {media}') # log the new media instances new_media_instance = ( @@ -248,7 +247,8 @@ def index_source_task(source_id): log.error(f'Index media failed: {source} / {media} with "{e}"') if task: task.verbose_name = verbose_name - task.save(update_fields={'verbose_name'}) + with atomic(): + task.save(update_fields={'verbose_name'}) # Tack on a cleanup of old completed tasks cleanup_completed_tasks() # Tack on a cleanup of old media From 1f72718f317b6f7c66301b316f2f8db611589709 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 20:15:17 -0400 Subject: [PATCH 244/417] fixup: indentation --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index fd5d1800..cf0d99d4 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -231,7 +231,7 @@ def index_source_task(source_id): if task: task.verbose_name = tvn_format.format(vn) with atomic(): - task.save(update_fields={'verbose_name'}) + task.save(update_fields={'verbose_name'}) try: media.save() log.debug(f'Indexed media: {source} / {media}') From abae403a8fbed7fb5520a551f91174c2aba47916 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 18 Mar 2025 21:12:18 -0400 Subject: [PATCH 245/417] Remove extra blank lines --- tubesync/sync/tasks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index c510b8fd..4fcf8455 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -254,12 +254,10 @@ def index_source_task(source_id): priority=20, verbose_name=verbose_name.format(media.pk), ) - if task: task.verbose_name = verbose_name with atomic(): task.save(update_fields={'verbose_name'}) - # Tack on a cleanup of old completed tasks cleanup_completed_tasks() with atomic(durable=True): From 281268772aa62d79f5f6e9c8b277ece54435cd23 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 00:02:26 -0400 Subject: [PATCH 246/417] Add a default thumbnail URL before metadata is available --- tubesync/sync/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 5d36191e..b2656129 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1202,6 +1202,8 @@ class Media(models.Model): @property def thumbnail(self): + if not self.has_metadata: + return f'https://i.ytimg.com/vi/{self.key}/maxresdefault.jpg' return self.get_metadata_first_value('thumbnail', '') @property From 65f86b116151bb03afa2eed54986e268cce3f98f Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 00:13:23 -0400 Subject: [PATCH 247/417] Use the default argument --- tubesync/sync/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index b2656129..168f4d8e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1202,9 +1202,8 @@ class Media(models.Model): @property def thumbnail(self): - if not self.has_metadata: - return f'https://i.ytimg.com/vi/{self.key}/maxresdefault.jpg' - return self.get_metadata_first_value('thumbnail', '') + default = f'https://i.ytimg.com/vi/{self.key}/maxresdefault.jpg' + return self.get_metadata_first_value('thumbnail', default) @property def name(self): From 3d6a217f20c0b3f7f9cce51da4284073fc229d92 Mon Sep 17 00:00:00 2001 From: meeb Date: Wed, 19 Mar 2025 15:36:36 +1100 Subject: [PATCH 248/417] bump ffmpeg and yt-dlp --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 99e2b102..cb8094f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ # syntax=docker/dockerfile:1 # check=error=true -ARG FFMPEG_DATE="2025-03-04-15-43" -ARG FFMPEG_VERSION="N-118645-gf76195ff65" +ARG FFMPEG_DATE="2025-03-18-14-20" +ARG FFMPEG_VERSION="N-118860-g81c50c33b6" ARG S6_VERSION="3.2.0.2" From 20959fb4c2a400f42c6a6ca366e3f649af5b00ae Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 00:46:49 -0400 Subject: [PATCH 249/417] Thumbnail download can proceed without metadata --- tubesync/sync/tasks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 12c2afcb..183ee351 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -457,8 +457,6 @@ def download_media_thumbnail(media_id, url): except Media.DoesNotExist: # Task triggered but the media no longer exists, do nothing return - if not media.has_metadata: - raise NoMetadataException('Metadata is not yet available.') if media.skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' From e1f2cd0d85576346d90ac1e471ba64db5b4c9c01 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 01:00:06 -0400 Subject: [PATCH 250/417] fixup: indentation --- tubesync/sync/tasks.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 12c2afcb..9ec22447 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -276,12 +276,12 @@ def index_source_task(source_id): if new_media_instance: log.info(f'Indexed new media: {source} / {media}') log.info(f'Scheduling task to download metadata for: {media.url}') - verbose_name = _('Downloading metadata for "{}"') - download_media_metadata( - str(media.pk), - priority=20, - verbose_name=verbose_name.format(media.pk), - ) + verbose_name = _('Downloading metadata for "{}"') + download_media_metadata( + str(media.pk), + priority=20, + verbose_name=verbose_name.format(media.pk), + ) if task: task.verbose_name = verbose_name with atomic(): From f5a30c218b27cddc3f30b13c0c62af91dba9b9c9 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 01:23:02 -0400 Subject: [PATCH 251/417] Follow `yt-dlp/FFmpeg-Builds/releases` automatically --- .github/workflows/ci.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index eb31f519..53ac1d37 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -39,6 +39,20 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 120 steps: + - name: Set environment variables with GitHub CLI + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + { + # Fetch the latest+1 release from yt-dlp/FFmpeg-Builds + var='FFMPEG_DATE' ; + delim='"'"${var}"'_EOF"' ; + jq_arg='.[1].tag_name[10:]' ; + printf -- '%s<<%s\n' "${var}" "${delim}" ; + gh api repos/yt-dlp/FFmpeg-Builds/releases --cache 12h --jq "${jq_arg}" ; + printf -- '%s\n' "${delim}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_ENV}" - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -66,3 +80,5 @@ jobs: cache-to: type=inline build-args: | IMAGE_NAME=${{ env.IMAGE_NAME }} + FFMPEG_DATE=${{ env.FFMPEG_DATE }} + FFMPEG_VERSION=N From 35f0dcff555ad4f9887c6a1637acee28442a8285 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 01:35:19 -0400 Subject: [PATCH 252/417] Remove `FFMPEG_DATE` value from Dockerfile --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 99e2b102..4cfab87d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,7 @@ # syntax=docker/dockerfile:1 # check=error=true -ARG FFMPEG_DATE="2025-03-04-15-43" -ARG FFMPEG_VERSION="N-118645-gf76195ff65" - +ARG FFMPEG_VERSION="N" ARG S6_VERSION="3.2.0.2" ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a904f478" From d6785660b8d8def5bffd73566886a142b3d091d6 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 01:36:49 -0400 Subject: [PATCH 253/417] Don't override `FFMPEG_VERSION` yet --- .github/workflows/ci.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 53ac1d37..f68ab9db 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -81,4 +81,3 @@ jobs: build-args: | IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ env.FFMPEG_DATE }} - FFMPEG_VERSION=N From ba8ad209104adee1c7873dcbefd2c9054d30438a Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 01:59:07 -0400 Subject: [PATCH 254/417] Don't push for forks --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f68ab9db..325b902c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -74,7 +74,7 @@ jobs: uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 - push: true + push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'true' || 'false' }} tags: ghcr.io/${{ steps.string.outputs.lowercase }}/${{ env.IMAGE_NAME }}:latest cache-from: type=registry,ref=ghcr.io/${{ steps.string.outputs.lowercase }}/${{ env.IMAGE_NAME }}:latest cache-to: type=inline From 84e7a23fc95bdfb082148aa8e5a3facf34282d09 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 02:05:33 -0400 Subject: [PATCH 255/417] Avoid using `always()` --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 325b902c..6aa8e0e7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,7 +34,7 @@ jobs: - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 containerise: - if: ${{ always() }} + if: ${{ !cancelled() }} needs: test runs-on: ubuntu-latest timeout-minutes: 120 From 74439b2f36ec1c7d04d256a22c438bababb6e66a Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 08:16:18 -0400 Subject: [PATCH 256/417] Consistency --- tubesync/common/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index f99cdf23..58137abe 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -198,6 +198,6 @@ def profile_func(func): ps.sort_stats( pstats.SortKey.CUMULATIVE ).print_stats() - return (result, (s.getvalue(), ps, s),) + return (result, (s.getvalue(), ps, s,),) return wrapper From 13e7fc21a877eb0329a065ba0a8f6374644cf8e2 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Mar 2025 09:18:51 -0400 Subject: [PATCH 257/417] Use `Media.metadata_published` --- tubesync/sync/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 168f4d8e..b752a370 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1084,7 +1084,8 @@ class Media(models.Model): data = json.loads(self.metadata or "{}") if '_reduce_data_ran_at' in data.keys(): total_seconds = data['_reduce_data_ran_at'] - ran_at = self.posix_epoch + timedelta(seconds=total_seconds) + assert isinstance(total_seconds, int), type(total_seconds) + ran_at = self.metadata_published(total_seconds) if (now - ran_at) < timedelta(hours=1): return data From 612c6299c338b57fbbe65d9067549d26e96716a9 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 20 Mar 2025 02:04:02 -0400 Subject: [PATCH 258/417] More stable `ffmpeg` releases This code picks the same build date, as long as: 1. The release still exists 2. The commit is the latest --- .github/workflows/ci.yaml | 48 +++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6aa8e0e7..de052134 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -39,17 +39,56 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 120 steps: - - name: Set environment variables with GitHub CLI + - name: Retrieve yt-dlp/FFmpeg-Builds releases with GitHub CLI + id: ffmpeg env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_API_GQL_ASSETS: 25 + GH_API_GQL_RELEASES: 35 + GH_API_GQL_OWNER: yt-dlp + GH_API_GQL_REPO: FFmpeg-Builds run: | + gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC}) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; + { + var='releases' ; + delim='"'"${var}"'_EOF"' ; + printf -- '%s<<%s\n' "${var}" "${delim}" ; + gh api graphql --cache 12h \ + -F assets="${GH_API_GQL_ASSETS}" \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" ; + printf -- '%s\n' "${delim}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_OUTPUT}" + gh api graphql --cache 12h \ + -F assets="${GH_API_GQL_ASSETS}" \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; + - name: Set environment variables with GitHub CLI + run: | + cat >| .ffmpeg.releases.json <<'EOF' + ${{ steps.ffmpeg.outputs.releases }} + EOF { - # Fetch the latest+1 release from yt-dlp/FFmpeg-Builds var='FFMPEG_DATE' ; delim='"'"${var}"'_EOF"' ; - jq_arg='.[1].tag_name[10:]' ; printf -- '%s<<%s\n' "${var}" "${delim}" ; - gh api repos/yt-dlp/FFmpeg-Builds/releases --cache 12h --jq "${jq_arg}" ; + jq_arg='[foreach .[] as $release ([{}, []]; [ .[0] + {($release.commit): ([ $release.date ] + (.[0][($release.commit)] // []) ) }, [ .[1][0] // $release.commit ] ] ; .[0][(.[1][0])] ) ][-1][0]' ; + jq -r "${jq_arg}" -- .ffmpeg.releases.json ; + printf -- '%s\n' "${delim}" ; + + ffmpeg_date="$( jq -r "${jq_arg}" -- .ffmpeg.releases.json )" + + var='FFMPEG_VERSION' ; + delim='"'"${var}"'_EOF"' ; + printf -- '%s<<%s\n' "${var}" "${delim}" ; + jq_arg='.[]|select(.date == $date)|.versions[]|select(startswith("N-"))' ; + jq -r --arg date "${ffmpeg_date}" "${jq_arg}" -- .ffmpeg.releases.json ; printf -- '%s\n' "${delim}" ; unset -v delim jq_arg var ; } >> "${GITHUB_ENV}" @@ -81,3 +120,4 @@ jobs: build-args: | IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ env.FFMPEG_DATE }} + FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} From 10666b84444cd703ddaf490383731a919802875d Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 20 Mar 2025 11:21:55 -0400 Subject: [PATCH 259/417] Tweak name to be accurate again --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index de052134..ed7833ac 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -69,7 +69,7 @@ jobs: -F repo="${GH_API_GQL_REPO}" \ -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; - - name: Set environment variables with GitHub CLI + - name: Set environment variables with jq run: | cat >| .ffmpeg.releases.json <<'EOF' ${{ steps.ffmpeg.outputs.releases }} From e2f36d8e85358fe1f04e2b39a3cac0dc90804cbf Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Mar 2025 14:30:12 -0400 Subject: [PATCH 260/417] Add a clean up function for `verbose_name` --- tubesync/sync/tasks.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 45189f16..a4e0f6f4 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -238,7 +238,28 @@ def index_source_task(source_id): fields = lambda f, m: m.get_metadata_field(f) task = get_source_index_task(source_id) if task: - verbose_name = task.verbose_name + # TODO: clean up a leftover prefix from a repeating task that did not complete + def remove_enclosed(haystack, /, open='[', close=']', sep=' ', *, valid=None, start=None, end=None): + if not haystack: + return haystack + assert open and close, 'open and close are required to be non-empty strings' + o = haystack.find(open, start, end) + sep = sep or '' + n = close + sep + c = haystack.find(n, len(open)+o, end) + if -1 in {o, c}: + return haystack + content = haystack[len(open)+o:c] + if valid is not None: + found = set(content) + valid = set(valid) + invalid = found - valid + # assert not invalid, f'Invalid characters {invalid} found in: {content}' + if invalid: + return haystack + return haystack[:o] + haystack[len(n)+c:] + + verbose_name = remove_enclosed(task.verbose_name, valid='0123456789/') tvn_format = '[{}' + f'/{num_videos}] {verbose_name}' for vn, video in enumerate(videos, start=1): # Create or update each video as a Media object @@ -653,6 +674,7 @@ def save_all_media_for_source(source_id): ) if task: verbose_name = task.verbose_name + # TODO: clean verbose_name if this task repeats tvn_format = '[{}' + f'/{refresh_qs.count()}] {verbose_name}' for mn, media in enumerate(refresh_qs, start=1): if task: From ebea5d836935207dee02e4a561216cd51c7da04c Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Mar 2025 14:37:09 -0400 Subject: [PATCH 261/417] Add `remove_enclosed` function --- tubesync/common/utils.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 58137abe..5894f0fc 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -201,3 +201,24 @@ def profile_func(func): return (result, (s.getvalue(), ps, s,),) return wrapper + +def remove_enclosed(haystack, /, open='[', close=']', sep=' ', *, valid=None, start=None, end=None): + if not haystack: + return haystack + assert open and close, 'open and close are required to be non-empty strings' + o = haystack.find(open, start, end) + sep = sep or '' + n = close + sep + c = haystack.find(n, len(open)+o, end) + if -1 in {o, c}: + return haystack + if valid is not None: + content = haystack[len(open)+o:c] + found = set(content) + valid = set(valid) + invalid = found - valid + # assert not invalid, f'Invalid characters {invalid} found in: {content}' + if invalid: + return haystack + return haystack[:o] + haystack[len(n)+c:] + From 0febb409bbc9217b02a9ef74cb33ca1950d11b11 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Mar 2025 14:44:41 -0400 Subject: [PATCH 262/417] Use `remove_enclosed` function --- tubesync/sync/tasks.py | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a4e0f6f4..65ee8893 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -24,7 +24,7 @@ from background_task import background from background_task.models import Task, CompletedTask from common.logger import log from common.errors import NoMediaException, NoMetadataException, DownloadFailedException -from common.utils import json_serial +from common.utils import json_serial, remove_enclosed from .models import Source, Media, MediaServer from .utils import (get_remote_image, resize_image_to_height, delete_file, write_text_file, filter_response) @@ -238,29 +238,12 @@ def index_source_task(source_id): fields = lambda f, m: m.get_metadata_field(f) task = get_source_index_task(source_id) if task: - # TODO: clean up a leftover prefix from a repeating task that did not complete - def remove_enclosed(haystack, /, open='[', close=']', sep=' ', *, valid=None, start=None, end=None): - if not haystack: - return haystack - assert open and close, 'open and close are required to be non-empty strings' - o = haystack.find(open, start, end) - sep = sep or '' - n = close + sep - c = haystack.find(n, len(open)+o, end) - if -1 in {o, c}: - return haystack - content = haystack[len(open)+o:c] - if valid is not None: - found = set(content) - valid = set(valid) - invalid = found - valid - # assert not invalid, f'Invalid characters {invalid} found in: {content}' - if invalid: - return haystack - return haystack[:o] + haystack[len(n)+c:] - - verbose_name = remove_enclosed(task.verbose_name, valid='0123456789/') - tvn_format = '[{}' + f'/{num_videos}] {verbose_name}' + verbose_name = remove_enclosed( + task.verbose_name, '[', ']', ' ', + valid='0123456789/,', + end=task.verbose_name.find('Index'), + ) + tvn_format = '[{}' + f'/{num_videos:,}] {verbose_name}' for vn, video in enumerate(videos, start=1): # Create or update each video as a Media object key = video.get(source.key_field, None) From a55e9ccfb60ca224e72eee55da52f84f56654853 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Mar 2025 14:51:35 -0400 Subject: [PATCH 263/417] Clean up `verbose_name` for check task also --- tubesync/sync/tasks.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 65ee8893..6e839d1f 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -243,7 +243,7 @@ def index_source_task(source_id): valid='0123456789/,', end=task.verbose_name.find('Index'), ) - tvn_format = '[{}' + f'/{num_videos:,}] {verbose_name}' + tvn_format = '[{:,}' + f'/{num_videos:,}] {verbose_name}' for vn, video in enumerate(videos, start=1): # Create or update each video as a Media object key = video.get(source.key_field, None) @@ -656,9 +656,12 @@ def save_all_media_for_source(source_id): metadata__isnull=False, ) if task: - verbose_name = task.verbose_name - # TODO: clean verbose_name if this task repeats - tvn_format = '[{}' + f'/{refresh_qs.count()}] {verbose_name}' + verbose_name = remove_enclosed( + task.verbose_name, '[', ']', ' ', + valid='0123456789/,', + end=task.verbose_name.find('Check'), + ) + tvn_format = '[{:,}' + f'/{refresh_qs.count():,}] {verbose_name}' for mn, media in enumerate(refresh_qs, start=1): if task: task.verbose_name = tvn_format.format(mn) From 391fe738b5cda3b78a49dd227fd8f49a36ab7de0 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 21 Mar 2025 15:07:48 -0400 Subject: [PATCH 264/417] Use `remove_enclosed` function in hooks.py --- tubesync/sync/hooks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index c644da59..3bb3ce0d 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -2,6 +2,7 @@ import os import yt_dlp from common.logger import log +from common.utils import remove_enclosed from django.conf import settings @@ -81,9 +82,9 @@ class BaseStatus: if self.task_verbose_name is None: # clean up any previously prepended task_status # this happened because of duplicated tasks on my test system - s = task.verbose_name - cleaned = s[1+s.find(' Downloading '):] - self.task_verbose_name = cleaned + self.task_verbose_name = remove_enclosed( + task.verbose_name, '[', ']', ' ', + ) task.verbose_name = f'{self.task_status} {self.task_verbose_name}' task.save() From 47df1260740c48a12d5dc4c0e0c8798fe201310c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Mar 2025 11:38:43 -0400 Subject: [PATCH 265/417] Distinguish the loops in the checking task --- tubesync/sync/tasks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 6e839d1f..92e7dca9 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -661,7 +661,7 @@ def save_all_media_for_source(source_id): valid='0123456789/,', end=task.verbose_name.find('Check'), ) - tvn_format = '[{:,}' + f'/{refresh_qs.count():,}] {verbose_name}' + tvn_format = '[1/{:,}' + f'/{refresh_qs.count():,}] {verbose_name}' for mn, media in enumerate(refresh_qs, start=1): if task: task.verbose_name = tvn_format.format(mn) @@ -680,15 +680,15 @@ def save_all_media_for_source(source_id): # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated if task: - tvn_format = '[{}' + f'/{mqs.count()}] {verbose_name}' + tvn_format = '[2/{:,}' + f'/{mqs.count():,}] {verbose_name}' for mn, media in enumerate(mqs, start=1): if task: task.verbose_name = tvn_format.format(mn) with atomic(): task.save(update_fields={'verbose_name'}) - if media.uuid not in already_saved: - with atomic(): - media.save() + if media.uuid not in already_saved: + with atomic(): + media.save() if task: task.verbose_name = verbose_name with atomic(): From 2951cabd6bf1f979943d96734cb2bd4fc2d40380 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 22 Mar 2025 21:54:41 -0400 Subject: [PATCH 266/417] Add and use the `update_task_status` function --- tubesync/sync/tasks.py | 61 +++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 92e7dca9..0f8e4ecc 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -18,7 +18,7 @@ from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile from django.utils import timezone from django.db.transaction import atomic -from django.db.utils import IntegrityError +from django.db import DatabaseError, IntegrityError from django.utils.translation import gettext_lazy as _ from background_task import background from background_task.models import Task, CompletedTask @@ -106,6 +106,27 @@ def get_error_message(task): return error_message.split(':', 1)[1].strip() +def update_task_status(task, status): + if not task: + return False + if not task._verbose_name: + task._verbose_name = remove_enclosed( + task.verbose_name, '[', ']', ' ', + ) + if status is None: + task.verbose_name = task._verbose_name + else: + task.verbose_name = f'[{status}] {task._verbose_name}' + try: + with atomic(): + task.save(update_fields={'verbose_name'}) + except DatabaseError as e: + if 'Save with update_fields did not affect any rows.' == str(e): + pass + raise + return True + + def get_source_completed_tasks(source_id, only_errors=False): ''' Returns a queryset of CompletedTask objects for a source by source ID. @@ -238,18 +259,19 @@ def index_source_task(source_id): fields = lambda f, m: m.get_metadata_field(f) task = get_source_index_task(source_id) if task: - verbose_name = remove_enclosed( + task._verbose_name = remove_enclosed( task.verbose_name, '[', ']', ' ', valid='0123456789/,', end=task.verbose_name.find('Index'), ) - tvn_format = '[{:,}' + f'/{num_videos:,}] {verbose_name}' + tvn_format = '{:,}' + f'/{num_videos:,}' for vn, video in enumerate(videos, start=1): # Create or update each video as a Media object key = video.get(source.key_field, None) if not key: # Video has no unique key (ID), it can't be indexed continue + update_task_status(task, tvn_format.format(vn)) try: media = Media.objects.get(key=key, source=source) except Media.DoesNotExist: @@ -261,10 +283,6 @@ def index_source_task(source_id): published_dt = media.metadata_published(timestamp) if published_dt is not None: media.published = published_dt - if task: - task.verbose_name = tvn_format.format(vn) - with atomic(): - task.save(update_fields={'verbose_name'}) try: media.save() except IntegrityError as e: @@ -286,10 +304,8 @@ def index_source_task(source_id): priority=20, verbose_name=verbose_name.format(media.pk), ) - if task: - task.verbose_name = verbose_name - with atomic(): - task.save(update_fields={'verbose_name'}) + # Reset task.verbose_name to the saved value + update_task_status(task, None) # Cleanup of media no longer available from the source cleanup_removed_media(source, videos) @@ -656,17 +672,14 @@ def save_all_media_for_source(source_id): metadata__isnull=False, ) if task: - verbose_name = remove_enclosed( + task._verbose_name = remove_enclosed( task.verbose_name, '[', ']', ' ', valid='0123456789/,', end=task.verbose_name.find('Check'), ) - tvn_format = '[1/{:,}' + f'/{refresh_qs.count():,}] {verbose_name}' + tvn_format = '1/{:,}' + f'/{refresh_qs.count():,}' for mn, media in enumerate(refresh_qs, start=1): - if task: - task.verbose_name = tvn_format.format(mn) - with atomic(): - task.save(update_fields={'verbose_name'}) + update_task_status(task, tvn_format.format(mn)) try: media.refresh_formats except YouTubeError as e: @@ -679,20 +692,14 @@ def save_all_media_for_source(source_id): # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated - if task: - tvn_format = '[2/{:,}' + f'/{mqs.count():,}] {verbose_name}' + tvn_format = '2/{:,}' + f'/{mqs.count():,}' for mn, media in enumerate(mqs, start=1): - if task: - task.verbose_name = tvn_format.format(mn) - with atomic(): - task.save(update_fields={'verbose_name'}) + update_task_status(task, tvn_format.format(mn)) if media.uuid not in already_saved: with atomic(): media.save() - if task: - task.verbose_name = verbose_name - with atomic(): - task.save(update_fields={'verbose_name'}) + # Reset task.verbose_name to the saved value + update_task_status(task, None) @background(schedule=60, remove_existing_tasks=True) From 7ad9842b8ecc555eb953118a23b51d59a3227433 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 23 Mar 2025 03:58:41 -0400 Subject: [PATCH 267/417] Reorder some imports --- tubesync/sync/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 0f8e4ecc..1fe48ece 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -16,9 +16,9 @@ from PIL import Image from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile -from django.utils import timezone -from django.db.transaction import atomic from django.db import DatabaseError, IntegrityError +from django.db.transaction import atomic +from django.utils import timezone from django.utils.translation import gettext_lazy as _ from background_task import background from background_task.models import Task, CompletedTask From 30c5f57619b88256c2848b449364202ca80f4c54 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 12:10:01 -0400 Subject: [PATCH 268/417] Add the video number to the debug log message --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 1fe48ece..6cf0fc2d 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -288,7 +288,7 @@ def index_source_task(source_id): except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') else: - log.debug(f'Indexed media: {source} / {media}') + log.debug(f'Indexed media: {vn}: {source} / {media}') # log the new media instances new_media_instance = ( media.created and From b9f575fbc0f6fc8a71523e36d90675b40d48758c Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 12:24:08 -0400 Subject: [PATCH 269/417] Don't skip missing fragments during format testing --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 65b72e06..85278af6 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -170,6 +170,7 @@ def get_media_info(url, days=None): 'youtubetab': {'approximate_date': ['true']}, }, 'paths': paths, + 'skip_unavailable_fragments': False, 'sleep_interval_requests': 2 * settings.BACKGROUND_TASK_ASYNC_THREADS, 'verbose': True if settings.DEBUG else False, }) From e48abb2c7295e29494957caf313e63dcd42d40e1 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 12:28:10 -0400 Subject: [PATCH 270/417] Use `locked_at` as it is closer to started `run_at` is when it was ready to be processed. `locked_at` is when it was taken out of the queue. --- tubesync/sync/templates/sync/tasks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 2b7a1250..9cb9dfe1 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -25,7 +25,7 @@ {% for task in running %}
{{ task }}
- Task started at {{ task.run_at|date:'Y-m-d H:i:s' }} + Task started at {{ task.locked_at|date:'Y-m-d H:i:s' }}
{% empty %} There are no running tasks. From f9eb571dd868e1f31eab8fecb3b6525939ad3cf4 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 12:54:42 -0400 Subject: [PATCH 271/417] Resume partial downloads unless the `.clean` file is found --- tubesync/sync/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 65b72e06..1c4fd96e 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -296,7 +296,10 @@ def download_media( temp_dir_parent = ytopts['paths']['temp'] temp_dir_prefix = f'{temp_dir_prefix}{v_key}-' temp_dir_obj = TemporaryDirectory(prefix=temp_dir_prefix,dir=temp_dir_parent) - temp_dir_path = Path(temp_dir_obj.name) + if temp_dir_obj and (Path(temp_dir_parent) / '.clean').exists(): + temp_dir_path = Path(temp_dir_obj.name) + else: + temp_dir_path = Path(temp_dir_parent) (temp_dir_path / '.ignore').touch(exist_ok=True) ytopts['paths'].update({ 'home': str(output_dir), From e5e29885f198e6ba6a05d6769d83045c38e6eb39 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 13:17:31 -0400 Subject: [PATCH 272/417] Change the task expiration to 1 day --- tubesync/tubesync/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index fc309b28..602583ab 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -135,7 +135,7 @@ HEALTHCHECK_ALLOWED_IPS = ('127.0.0.1',) MAX_ATTEMPTS = 15 # Number of times tasks will be retried -MAX_RUN_TIME = 1800 # Maximum amount of time in seconds a task can run +MAX_RUN_TIME = 1*(24*60*60) # Maximum amount of time in seconds a task can run BACKGROUND_TASK_RUN_ASYNC = True # Run tasks async in the background BACKGROUND_TASK_ASYNC_THREADS = 1 # Number of async tasks to run at once MAX_BACKGROUND_TASK_ASYNC_THREADS = 8 # For sanity reasons From c10b1aa2554da2a550b4c8a3d07796cda0c5ada9 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 13:46:59 -0400 Subject: [PATCH 273/417] Add `nodename` to `Task.locked_by` --- patches/background_task/models.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index 0e46c5b6..be88d0f2 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -195,9 +195,14 @@ class Task(models.Model): Check if the locked_by process is still running. """ if self.locked_by: + pid, node = self.locked_by.split('/', 1) + # locked by a process on this node? + if os.uname().nodename[:(64-10)] != node: + return False + # is the process still running? try: - # won't kill the process. kill is a bad named system call - os.kill(int(self.locked_by), 0) + # Signal number zero won't kill the process. + os.kill(int(pid), 0) return True except: return False @@ -220,8 +225,9 @@ class Task(models.Model): def lock(self, locked_by): now = timezone.now() + owner = f'{locked_by[:8]}/{os.uname().nodename[:(64-10)}' unlocked = Task.objects.unlocked(now).filter(pk=self.pk) - updated = unlocked.update(locked_by=locked_by, locked_at=now) + updated = unlocked.update(locked_by=owner, locked_at=now) if updated: return Task.objects.get(pk=self.pk) return None @@ -423,9 +429,14 @@ class CompletedTask(models.Model): Check if the locked_by process is still running. """ if self.locked_by: + pid, node = self.locked_by.split('/', 1) + # locked by a process on this node? + if os.uname().nodename[:(64-10)] != node: + return False + # is the process still running? try: # won't kill the process. kill is a bad named system call - os.kill(int(self.locked_by), 0) + os.kill(int(pid), 0) return True except: return False From 2afad767f1f8af66cb00581ad8068f7501a92c39 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 15:08:32 -0400 Subject: [PATCH 274/417] Tasks `locked_at` before the current boot time are not locked --- patches/background_task/models.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index be88d0f2..ef44c9b1 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- -from datetime import timedelta +from datetime import datetime, timedelta, timezone as tz from hashlib import sha1 +from pathlib import Path import json import logging import os @@ -73,10 +74,17 @@ class TaskManager(models.Manager): return qs.filter(unlocked) def locked(self, now): + stats = None + kcore_path = Path('/proc/kcore') + boot_time = posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + if kcore_path.exists(): + stats = kcore_path.stats() + if stats: + boot_time += timedelta(seconds=stats.st_mtime) max_run_time = app_settings.BACKGROUND_TASK_MAX_RUN_TIME qs = self.get_queryset() expires_at = now - timedelta(seconds=max_run_time) - locked = Q(locked_by__isnull=False) & Q(locked_at__gt=expires_at) + locked = Q(locked_by__isnull=False) & Q(locked_at__gt=expires_at) & Q(locked_at__gt=boot_time) return qs.filter(locked) def failed(self): @@ -194,7 +202,7 @@ class Task(models.Model): """ Check if the locked_by process is still running. """ - if self.locked_by: + if self in objects.locked(timezone.now()) and self.locked_by: pid, node = self.locked_by.split('/', 1) # locked by a process on this node? if os.uname().nodename[:(64-10)] != node: From 5e506291410c14720c18a2719e92911ea6c431e6 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 15:12:13 -0400 Subject: [PATCH 275/417] Test the `background_task` changes --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ed7833ac..b7eef6ea 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,6 +30,7 @@ jobs: - name: Set up Django environment run: | cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py + cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/background_task/ patches/background_task/* cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 From 53855995422ad7bb04c4bb86cca98d61bbc93e0a Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 15:18:48 -0400 Subject: [PATCH 276/417] Reorder the copy layers --- Dockerfile | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index b3b38678..c0fcd9ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -362,9 +362,8 @@ RUN --mount=type=tmpfs,target=/cache \ apt-get -y autoclean && \ rm -v -rf /tmp/* -# Copy app -COPY tubesync /app -COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings.py +# Copy root +COPY config/root / # patch background_task COPY patches/background_task/ \ @@ -374,6 +373,10 @@ COPY patches/background_task/ \ COPY patches/yt_dlp/ \ /usr/local/lib/python3/dist-packages/yt_dlp/ +# Copy app +COPY tubesync /app +COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings.py + # Build app RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database @@ -387,17 +390,13 @@ RUN set -x && \ mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ + # Check nginx configuration copied from config/root/etc + nginx -t && \ # Append software versions ffmpeg_version=$(/usr/local/bin/ffmpeg -version | awk -v 'ev=31' '1 == NR && "ffmpeg" == $1 { print $3; ev=0; } END { exit ev; }') && \ test -n "${ffmpeg_version}" && \ printf -- "ffmpeg_version = '%s'\n" "${ffmpeg_version}" >> /app/common/third_party_versions.py -# Copy root -COPY config/root / - -# Check nginx configuration copied from config/root/etc -RUN set -x && nginx -t - # Create a healthcheck HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"] From 1fd8214f3e7f5242604f74b4531a30311cc7a6f2 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 15:50:22 -0400 Subject: [PATCH 277/417] Add a `TaskManager.boot_time` property --- patches/background_task/models.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index ef44c9b1..1abda5ab 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -39,6 +39,23 @@ class TaskQuerySet(models.QuerySet): class TaskManager(models.Manager): + _boot_time = posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + + @property + def boot_time(self): + if self._boot_time > self.posix_epoch: + return self._boot_time + stats = None + boot_time = self.posix_epoch + kcore_path = Path('/proc/kcore') + if kcore_path.exists(): + stats = kcore_path.stats() + if stats: + boot_time += timedelta(seconds=stats.st_mtime) + if boot_time > self._boot_time: + self._boot_time = boot_time + return self._boot_time + def get_queryset(self): return TaskQuerySet(self.model, using=self._db) @@ -70,21 +87,14 @@ class TaskManager(models.Manager): max_run_time = app_settings.BACKGROUND_TASK_MAX_RUN_TIME qs = self.get_queryset() expires_at = now - timedelta(seconds=max_run_time) - unlocked = Q(locked_by=None) | Q(locked_at__lt=expires_at) + unlocked = Q(locked_by=None) | Q(locked_at__lt=expires_at) | Q(locked_at__lt=self.boot_time) return qs.filter(unlocked) def locked(self, now): - stats = None - kcore_path = Path('/proc/kcore') - boot_time = posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) - if kcore_path.exists(): - stats = kcore_path.stats() - if stats: - boot_time += timedelta(seconds=stats.st_mtime) max_run_time = app_settings.BACKGROUND_TASK_MAX_RUN_TIME qs = self.get_queryset() expires_at = now - timedelta(seconds=max_run_time) - locked = Q(locked_by__isnull=False) & Q(locked_at__gt=expires_at) & Q(locked_at__gt=boot_time) + locked = Q(locked_by__isnull=False) & Q(locked_at__gt=expires_at) & Q(locked_at__gt=self.boot_time) return qs.filter(locked) def failed(self): From 41e78b430dd968c9f590837428537e8868353560 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 24 Mar 2025 16:03:56 -0400 Subject: [PATCH 278/417] Add and use the `Task.nodename` property --- patches/background_task/models.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index 1abda5ab..de246c57 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -208,14 +208,18 @@ class Task(models.Model): objects = TaskManager() + @property + def nodename(self): + return os.uname().nodename[:(64-10)] + def locked_by_pid_running(self): """ Check if the locked_by process is still running. """ if self in objects.locked(timezone.now()) and self.locked_by: - pid, node = self.locked_by.split('/', 1) + pid, nodename = self.locked_by.split('/', 1) # locked by a process on this node? - if os.uname().nodename[:(64-10)] != node: + if nodename != self.nodename: return False # is the process still running? try: @@ -243,7 +247,7 @@ class Task(models.Model): def lock(self, locked_by): now = timezone.now() - owner = f'{locked_by[:8]}/{os.uname().nodename[:(64-10)}' + owner = f'{locked_by[:8]}/{self.nodename}' unlocked = Task.objects.unlocked(now).filter(pk=self.pk) updated = unlocked.update(locked_by=owner, locked_at=now) if updated: From 7f7ba6b28623ca2d0d413d29dd04fb67da47bff5 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 25 Mar 2025 12:44:14 -0400 Subject: [PATCH 279/417] Update patches/background_task/models.py --- patches/background_task/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index de246c57..f7121961 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -49,7 +49,7 @@ class TaskManager(models.Manager): boot_time = self.posix_epoch kcore_path = Path('/proc/kcore') if kcore_path.exists(): - stats = kcore_path.stats() + stats = kcore_path.stat() if stats: boot_time += timedelta(seconds=stats.st_mtime) if boot_time > self._boot_time: From 91b7a4538e8ec626790924fe98054c66b7288bce Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 25 Mar 2025 23:00:17 -0400 Subject: [PATCH 280/417] Configure logging for `background_task` --- tubesync/common/logger.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/tubesync/common/logger.py b/tubesync/common/logger.py index a1fcf89a..3334b1e5 100644 --- a/tubesync/common/logger.py +++ b/tubesync/common/logger.py @@ -1,14 +1,32 @@ import logging from django.conf import settings +from .utils import getenv logging_level = logging.DEBUG if settings.DEBUG else logging.INFO +default_formatter = logging.Formatter( + '%(asctime)s [%(name)s/%(levelname)s] %(message)s' +) +default_sh = logging.StreamHandler() +default_sh.setFormatter(default_formatter) +default_sh.setLevel(logging_level) -log = logging.getLogger('tubesync') -log.setLevel(logging_level) -ch = logging.StreamHandler() -ch.setLevel(logging_level) -formatter = logging.Formatter('%(asctime)s [%(name)s/%(levelname)s] %(message)s') -ch.setFormatter(formatter) -log.addHandler(ch) +app_name = getenv('DJANGO_SETTINGS_MODULE') +first_part = app_name.split('.', 1)[0] +log = app_logger = logging.getLogger(first_part) +app_logger.addHandler(default_sh) +app_logger.setLevel(logging_level) + + +background_task_name = 'background_task.management.commands.process_tasks' +last_part = background_task_name.rsplit('.', 1)[-1] +background_task_formatter = logging.Formatter( + f'%(asctime)s [{last_part}/%(levelname)s] %(message)s' +) +background_task_sh = logging.StreamHandler() +background_task_sh.setFormatter(background_task_formatter) +background_task_sh.setLevel(logging_level) +background_task_logger = logging.getLogger(background_task_name) +background_task_logger.addHandler(background_task_sh) +background_task_logger.setLevel(logging_level) From b50709aef1799a5aabcf3815dec4d642418c3644 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 01:13:28 -0400 Subject: [PATCH 281/417] fixup: missed `self.` --- patches/background_task/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index f7121961..03544f26 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -216,7 +216,7 @@ class Task(models.Model): """ Check if the locked_by process is still running. """ - if self in objects.locked(timezone.now()) and self.locked_by: + if self in self.objects.locked(timezone.now()) and self.locked_by: pid, nodename = self.locked_by.split('/', 1) # locked by a process on this node? if nodename != self.nodename: From 51adb6575179dcefc9122d32507c6cb2601e67f1 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 02:12:47 -0400 Subject: [PATCH 282/417] fixup: use the class instead of an instance --- patches/background_task/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index 03544f26..e8a08b2e 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -216,7 +216,7 @@ class Task(models.Model): """ Check if the locked_by process is still running. """ - if self in self.objects.locked(timezone.now()) and self.locked_by: + if self in Task.objects.locked(timezone.now()) and self.locked_by: pid, nodename = self.locked_by.split('/', 1) # locked by a process on this node? if nodename != self.nodename: From bcb743217477331303b752fc33f0b9e2e0f684de Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 02:30:43 -0400 Subject: [PATCH 283/417] Remove the hard-coded class name --- patches/background_task/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/background_task/models.py b/patches/background_task/models.py index e8a08b2e..7214308d 100644 --- a/patches/background_task/models.py +++ b/patches/background_task/models.py @@ -216,7 +216,7 @@ class Task(models.Model): """ Check if the locked_by process is still running. """ - if self in Task.objects.locked(timezone.now()) and self.locked_by: + if self in self.__class__.objects.locked(timezone.now()) and self.locked_by: pid, nodename = self.locked_by.split('/', 1) # locked by a process on this node? if nodename != self.nodename: From f150ce21d0829653dc1cb4c20e2c2c4a8ef76d4a Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 02:44:58 -0400 Subject: [PATCH 284/417] Force new `yt-dlp` version --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index c0fcd9ad..82bc665e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -315,6 +315,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va # Switch workdir to the the app WORKDIR /app +ARG YTDLP_DATE + # Set up the app RUN --mount=type=tmpfs,target=/cache \ --mount=type=cache,id=pipenv-cache,sharing=locked,target=/cache/pipenv \ From 6e38fcdb7c184e79f8d106d97d4ba9336d3248a0 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 02:49:35 -0400 Subject: [PATCH 285/417] Create `tag-yt-dlp` --- tag-yt-dlp | 1 + 1 file changed, 1 insertion(+) create mode 100644 tag-yt-dlp diff --git a/tag-yt-dlp b/tag-yt-dlp new file mode 100644 index 00000000..0c159067 --- /dev/null +++ b/tag-yt-dlp @@ -0,0 +1 @@ + 2025.03.26 From 5e6c9f144a37d191518c65ed196b21d1cd6cda85 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 03:06:20 -0400 Subject: [PATCH 286/417] Add `YTDLP_DATE` build arg --- .github/workflows/ci.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b7eef6ea..24d66353 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -10,6 +10,18 @@ on: - main jobs: + info: + runs-on: ubuntu-latest + outputs: + ytdlp_tag: ${{ steps.set.outputs.ytdlp_tag }} + steps: + - uses: actions/checkout@v4 + - name: Set outputs + id: set + run: | + printf -- '%s=%s\n' >> "${GITHUB_OUTPUT}" \ + 'ytdlp_tag' "$(< tag-yt-dlp)" + test: runs-on: ubuntu-22.04 strategy: @@ -34,9 +46,10 @@ jobs: cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 + containerise: if: ${{ !cancelled() }} - needs: test + needs: ['info', 'test'] runs-on: ubuntu-latest timeout-minutes: 120 steps: @@ -122,3 +135,5 @@ jobs: IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ env.FFMPEG_DATE }} FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} + YTDLP_DATE=${{ needs.info.outputs.ytdlp_tag }} + From cacf306b0960b8cf3b66f15316d0e5ba3b0b4902 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 03:30:13 -0400 Subject: [PATCH 287/417] Move more steps into the `info` job --- .github/workflows/ci.yaml | 79 ++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 24d66353..30220cc7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,8 +13,45 @@ jobs: info: runs-on: ubuntu-latest outputs: + ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} + string-lowercase: ${{ steps.string.outputs.lowercase }} ytdlp_tag: ${{ steps.set.outputs.ytdlp_tag }} steps: + - name: Lowercase github username for ghcr + id: string + uses: ASzc/change-string-case-action@v6 + with: + string: ${{ github.actor }} + - name: Retrieve yt-dlp/FFmpeg-Builds releases with GitHub CLI + id: ffmpeg + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_API_GQL_ASSETS: 25 + GH_API_GQL_RELEASES: 35 + GH_API_GQL_OWNER: yt-dlp + GH_API_GQL_REPO: FFmpeg-Builds + run: | + gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC}) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; + { + var='releases' ; + delim='"'"${var}"'_EOF"' ; + printf -- '%s<<%s\n' "${var}" "${delim}" ; + gh api graphql --cache 12h \ + -F assets="${GH_API_GQL_ASSETS}" \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" ; + printf -- '%s\n' "${delim}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_OUTPUT}" + gh api graphql --cache 12h \ + -F assets="${GH_API_GQL_ASSETS}" \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; - uses: actions/checkout@v4 - name: Set outputs id: set @@ -53,40 +90,10 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 120 steps: - - name: Retrieve yt-dlp/FFmpeg-Builds releases with GitHub CLI - id: ffmpeg - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GH_API_GQL_ASSETS: 25 - GH_API_GQL_RELEASES: 35 - GH_API_GQL_OWNER: yt-dlp - GH_API_GQL_REPO: FFmpeg-Builds - run: | - gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC}) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; - gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; - { - var='releases' ; - delim='"'"${var}"'_EOF"' ; - printf -- '%s<<%s\n' "${var}" "${delim}" ; - gh api graphql --cache 12h \ - -F assets="${GH_API_GQL_ASSETS}" \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" ; - printf -- '%s\n' "${delim}" ; - unset -v delim jq_arg var ; - } >> "${GITHUB_OUTPUT}" - gh api graphql --cache 12h \ - -F assets="${GH_API_GQL_ASSETS}" \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; - name: Set environment variables with jq run: | cat >| .ffmpeg.releases.json <<'EOF' - ${{ steps.ffmpeg.outputs.releases }} + ${{ needs.info.outputs.ffmpeg-releases }} EOF { var='FFMPEG_DATE' ; @@ -117,23 +124,17 @@ jobs: DOCKER_USERNAME: ${{ github.actor }} DOCKER_TOKEN: ${{ 'meeb' == github.repository_owner && secrets.REGISTRY_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} run: echo "${DOCKER_TOKEN}" | docker login --password-stdin --username "${DOCKER_USERNAME}" "${DOCKER_REGISTRY}" - - name: Lowercase github username for ghcr - id: string - uses: ASzc/change-string-case-action@v6 - with: - string: ${{ github.actor }} - name: Build and push timeout-minutes: 60 uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'true' || 'false' }} - tags: ghcr.io/${{ steps.string.outputs.lowercase }}/${{ env.IMAGE_NAME }}:latest - cache-from: type=registry,ref=ghcr.io/${{ steps.string.outputs.lowercase }}/${{ env.IMAGE_NAME }}:latest + tags: ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest + cache-from: type=registry,ref=ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest cache-to: type=inline build-args: | IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ env.FFMPEG_DATE }} FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} YTDLP_DATE=${{ needs.info.outputs.ytdlp_tag }} - From b799bdfd65977513b9e9e8910ec7ce173d826396 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 04:05:21 -0400 Subject: [PATCH 288/417] Update tag-yt-dlp --- tag-yt-dlp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tag-yt-dlp b/tag-yt-dlp index 0c159067..c7302ddb 100644 --- a/tag-yt-dlp +++ b/tag-yt-dlp @@ -1 +1 @@ - 2025.03.26 +2025.03.26 From 0300be4728f7d893816be4b1d81d37acab634140 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 12:23:52 -0400 Subject: [PATCH 289/417] Retrieve `yt-dlp/yt-dlp` releases from the GitHub API --- .github/workflows/ci.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 30220cc7..b445aec5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -52,6 +52,33 @@ jobs: -F repo="${GH_API_GQL_REPO}" \ -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; + - name: Retrieve yt-dlp/yt-dlp releases with GitHub CLI + id: yt-dlp + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_API_GQL_RELEASES: 25 + GH_API_GQL_OWNER: yt-dlp + GH_API_GQL_REPO: yt-dlp + run: | + gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC}) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | { "tag": .tag, "date": .tag.name } ]' ; + { + var='releases' ; + delim='"'"${var}"'_EOF"' ; + printf -- '%s<<%s\n' "${var}" "${delim}" ; + gh api graphql --cache 12h \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" ; + printf -- '%s\n' "${delim}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_OUTPUT}" + gh api graphql --cache 12h \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; - uses: actions/checkout@v4 - name: Set outputs id: set From 18829d13628c33a0427b2d63a43c4894028ebe42 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 12:49:04 -0400 Subject: [PATCH 290/417] Show the raw API response --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b445aec5..2546ab0c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,8 +60,8 @@ jobs: GH_API_GQL_OWNER: yt-dlp GH_API_GQL_REPO: yt-dlp run: | - gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC}) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; - gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | { "tag": .tag, "date": .tag.name } ]' ; + gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) ]' ; { var='releases' ; delim='"'"${var}"'_EOF"' ; From 72ba0474a38f0621b1fc52384b4248bb94ee3077 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 13:10:22 -0400 Subject: [PATCH 291/417] Remove static booleans --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2546ab0c..30ad207b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,8 +60,8 @@ jobs: GH_API_GQL_OWNER: yt-dlp GH_API_GQL_REPO: yt-dlp run: | - gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; - gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) ]' ; + gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, description, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | del(.isDraft, .isPrerelease) ]' ; { var='releases' ; delim='"'"${var}"'_EOF"' ; From 085c64d95ddcb0e638d55778ee1ae35af039aea3 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 14:08:07 -0400 Subject: [PATCH 292/417] Show the `yt-dlp-latest-release` --- .github/workflows/ci.yaml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 30ad207b..2e241a7b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,6 +15,7 @@ jobs: outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} string-lowercase: ${{ steps.string.outputs.lowercase }} + ytdlp-latest-release: ${{ steps.yt-dlp.outputs.latest-release }} ytdlp_tag: ${{ steps.set.outputs.ytdlp_tag }} steps: - name: Lowercase github username for ghcr @@ -60,11 +61,12 @@ jobs: GH_API_GQL_OWNER: yt-dlp GH_API_GQL_REPO: yt-dlp run: | - gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, description, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | del(.isDraft, .isPrerelease) ]' ; { var='releases' ; - delim='"'"${var}"'_EOF"' ; + delim="$(mk_delim "${var}")" ; printf -- '%s<<%s\n' "${var}" "${delim}" ; gh api graphql --cache 12h \ -F owner="${GH_API_GQL_OWNER}" \ @@ -72,6 +74,16 @@ jobs: -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" ; printf -- '%s\n' "${delim}" ; + jq_arg='map(select(.isLatest))' ; + var='latest-release' ; + delim="$(mk_delim "${var}")" ; + printf -- '%s<<%s\n' "${var}" "${delim}" ; + gh api graphql --cache 12h \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq -c "${jq_arg}" -- ; + printf -- '%s\n' "${delim}" ; unset -v delim jq_arg var ; } >> "${GITHUB_OUTPUT}" gh api graphql --cache 12h \ @@ -83,6 +95,9 @@ jobs: - name: Set outputs id: set run: | + cat <<'EOF' + ${{ toJSON(fromJSON(steps.yt-dlp.outputs.latest-release)) }} + 'EOF' printf -- '%s=%s\n' >> "${GITHUB_OUTPUT}" \ 'ytdlp_tag' "$(< tag-yt-dlp)" From 77d00114f83c8ade466d10759ae5c9d98724aee2 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 14:16:25 -0400 Subject: [PATCH 293/417] Show the tag name from the latest `yt-dlp` release --- .github/workflows/ci.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2e241a7b..2aff0e9a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -74,7 +74,7 @@ jobs: -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" ; printf -- '%s\n' "${delim}" ; - jq_arg='map(select(.isLatest))' ; + jq_arg='map(select(.isLatest))[0]' ; var='latest-release' ; delim="$(mk_delim "${var}")" ; printf -- '%s<<%s\n' "${var}" "${delim}" ; @@ -97,7 +97,10 @@ jobs: run: | cat <<'EOF' ${{ toJSON(fromJSON(steps.yt-dlp.outputs.latest-release)) }} - 'EOF' + EOF + cat <<'EOF' + ${{ fromJSON(steps.yt-dlp.outputs.latest-release).tag.name }} + EOF printf -- '%s=%s\n' >> "${GITHUB_OUTPUT}" \ 'ytdlp_tag' "$(< tag-yt-dlp)" From ff502e2014161360d78c1495fb941d2a7a82caa0 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 14:40:20 -0400 Subject: [PATCH 294/417] Clean up the `set` step --- .github/workflows/ci.yaml | 53 ++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2aff0e9a..5b7d0562 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,7 +16,7 @@ jobs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} string-lowercase: ${{ steps.string.outputs.lowercase }} ytdlp-latest-release: ${{ steps.yt-dlp.outputs.latest-release }} - ytdlp_tag: ${{ steps.set.outputs.ytdlp_tag }} + ytdlp-releases: ${{ steps.yt-dlp.outputs.releases }} steps: - name: Lowercase github username for ghcr id: string @@ -32,11 +32,12 @@ jobs: GH_API_GQL_OWNER: yt-dlp GH_API_GQL_REPO: FFmpeg-Builds run: | - gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC}) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; + gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; { var='releases' ; - delim='"'"${var}"'_EOF"' ; + delim="$(mk_delim "${var}")" ; printf -- '%s<<%s\n' "${var}" "${delim}" ; gh api graphql --cache 12h \ -F assets="${GH_API_GQL_ASSETS}" \ @@ -46,13 +47,14 @@ jobs: -f query="${gql_query}" --jq "${gql_jq}" ; printf -- '%s\n' "${delim}" ; unset -v delim jq_arg var ; - } >> "${GITHUB_OUTPUT}" - gh api graphql --cache 12h \ - -F assets="${GH_API_GQL_ASSETS}" \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; + } >> "${GITHUB_OUTPUT}" ; + # Log the human version + gh api graphql --cache 12h \ + -F assets="${GH_API_GQL_ASSETS}" \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; - name: Retrieve yt-dlp/yt-dlp releases with GitHub CLI id: yt-dlp env: @@ -61,9 +63,9 @@ jobs: GH_API_GQL_OWNER: yt-dlp GH_API_GQL_REPO: yt-dlp run: | - mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; - gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; + gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, tagName, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | del(.isDraft, .isPrerelease) ]' ; + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; { var='releases' ; delim="$(mk_delim "${var}")" ; @@ -85,24 +87,13 @@ jobs: -f query="${gql_query}" --jq "${gql_jq}" | jq -c "${jq_arg}" -- ; printf -- '%s\n' "${delim}" ; unset -v delim jq_arg var ; - } >> "${GITHUB_OUTPUT}" - gh api graphql --cache 12h \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; - - uses: actions/checkout@v4 - - name: Set outputs - id: set - run: | - cat <<'EOF' - ${{ toJSON(fromJSON(steps.yt-dlp.outputs.latest-release)) }} - EOF - cat <<'EOF' - ${{ fromJSON(steps.yt-dlp.outputs.latest-release).tag.name }} - EOF - printf -- '%s=%s\n' >> "${GITHUB_OUTPUT}" \ - 'ytdlp_tag' "$(< tag-yt-dlp)" + } >> "${GITHUB_OUTPUT}" ; + # Log the human version + gh api graphql --cache 12h \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; test: runs-on: ubuntu-22.04 @@ -182,4 +173,4 @@ jobs: IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ env.FFMPEG_DATE }} FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} - YTDLP_DATE=${{ needs.info.outputs.ytdlp_tag }} + YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} From 63a65949c7688ed977d3b1f0676a4c2c49936d11 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 14:43:54 -0400 Subject: [PATCH 295/417] Delete `tag-yt-dlp` as no longer used --- tag-yt-dlp | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tag-yt-dlp diff --git a/tag-yt-dlp b/tag-yt-dlp deleted file mode 100644 index c7302ddb..00000000 --- a/tag-yt-dlp +++ /dev/null @@ -1 +0,0 @@ -2025.03.26 From af4374c224733f4737f8adc9170e7e9919824cd4 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 15:26:38 -0400 Subject: [PATCH 296/417] Run tests for pull requests --- .github/workflows/ci.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5b7d0562..bcee7dc7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,9 +8,18 @@ on: push: branches: - main + pull_request: + branches: + - main + types: + - opened + - reopened + - synchronize + - ready_for_review jobs: info: + if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} @@ -96,6 +105,7 @@ jobs: -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; test: + if: ${{ !cancelled() && ( 'pull_request' != github.event_name || (! github.event.pull_request.draft) ) }} runs-on: ubuntu-22.04 strategy: fail-fast: false @@ -121,7 +131,7 @@ jobs: run: cd tubesync && python3 manage.py test --verbosity=2 containerise: - if: ${{ !cancelled() }} + if: ${{ !cancelled() && 'success' == needs.info.result }} needs: ['info', 'test'] runs-on: ubuntu-latest timeout-minutes: 120 From 952c2e530f09c58f0310a4b31f47c37471e2759d Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 15:36:17 -0400 Subject: [PATCH 297/417] Shorten the name for checks --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index bcee7dc7..914c2556 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,4 +1,4 @@ -name: Run Django tests for TubeSync +name: CI env: IMAGE_NAME: tubesync From 807d825904d3ce48ebc575c96c75c2f5cbb98d43 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 15:52:33 -0400 Subject: [PATCH 298/417] Disable push for pull requests --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 914c2556..52742835 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - if: ${{ !cancelled() && 'pull_request' != github.event_name }} + #if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} @@ -175,7 +175,7 @@ jobs: uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 - push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'true' || 'false' }} + push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'true' || 'false' }} tags: ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest cache-from: type=registry,ref=ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest cache-to: type=inline From 317cc63a96f7931f8e4acecf3e778cbe506f1e55 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 16:45:15 -0400 Subject: [PATCH 299/417] Look for docker layers in multiple cache locations --- .github/workflows/ci.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 52742835..fb6934be 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -177,8 +177,13 @@ jobs: platforms: linux/amd64,linux/arm64 push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'true' || 'false' }} tags: ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest - cache-from: type=registry,ref=ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest - cache-to: type=inline + cache-from: | + type=registry,ref=ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest + type=registry,ref=ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:latest + type=gha + cache-to: | + type=gha,mode=max + ${{ 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'type=inline' || '' }} build-args: | IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ env.FFMPEG_DATE }} From ea6beae40ae351e89ec53fb2073579032b1b8c1f Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 16:58:44 -0400 Subject: [PATCH 300/417] Turn off the `info` step again for pull requests --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fb6934be..92b72d1e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - #if: ${{ !cancelled() && 'pull_request' != github.event_name }} + if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} From 1885aecb4b23a86e33d7e0c0e60ebfc9f8162888 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 17:29:41 -0400 Subject: [PATCH 301/417] Lowercase some variables using bash --- .github/workflows/ci.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 92b72d1e..08f2ec63 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,6 +23,8 @@ jobs: runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} + lowercase-variables-actor: ${{ steps.lowercase-variables.outputs.actor }} + lowercase-variables-repository_owner: ${{ steps.lowercase-variables.outputs.repository_owner }} string-lowercase: ${{ steps.string.outputs.lowercase }} ytdlp-latest-release: ${{ steps.yt-dlp.outputs.latest-release }} ytdlp-releases: ${{ steps.yt-dlp.outputs.releases }} @@ -32,6 +34,20 @@ jobs: uses: ASzc/change-string-case-action@v6 with: string: ${{ github.actor }} + - name: Lowercase GitHub variables + id: lowercase-variables + shell: bash + run: | + for var in \ + actor='${{ github.actor }}' \ + repository_owner='${{ github.repository_owner }}' + do + k="$(cut -d '=' -f 1)" + v="${var#${k}=}" + printf -- '%s=%s\n' -- >> "${GITHUB_OUTPUT}" \ + "${k}" "${v,,}" + done + unset -v k v var - name: Retrieve yt-dlp/FFmpeg-Builds releases with GitHub CLI id: ffmpeg env: From d22f200b732b5c4a903625451779b966952884f5 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 20:06:50 -0400 Subject: [PATCH 302/417] Improve readability Also, work around a strange syntax highlighting behavior. --- .github/workflows/ci.yaml | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 08f2ec63..af3066c9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,17 +60,19 @@ jobs: gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; { var='releases' ; delim="$(mk_delim "${var}")" ; - printf -- '%s<<%s\n' "${var}" "${delim}" ; + open_ml_var "${delim}" "${var}" ; gh api graphql --cache 12h \ -F assets="${GH_API_GQL_ASSETS}" \ -F owner="${GH_API_GQL_OWNER}" \ -F repo="${GH_API_GQL_REPO}" \ -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" ; - printf -- '%s\n' "${delim}" ; + close_ml_var "${delim}" "${var}" ; unset -v delim jq_arg var ; } >> "${GITHUB_OUTPUT}" ; # Log the human version @@ -91,26 +93,28 @@ jobs: gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, tagName, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | del(.isDraft, .isPrerelease) ]' ; mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; { var='releases' ; delim="$(mk_delim "${var}")" ; - printf -- '%s<<%s\n' "${var}" "${delim}" ; + open_ml_var "${delim}" "${var}" ; gh api graphql --cache 12h \ -F owner="${GH_API_GQL_OWNER}" \ -F repo="${GH_API_GQL_REPO}" \ -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" ; - printf -- '%s\n' "${delim}" ; + close_ml_var "${delim}" "${var}" ; jq_arg='map(select(.isLatest))[0]' ; var='latest-release' ; delim="$(mk_delim "${var}")" ; - printf -- '%s<<%s\n' "${var}" "${delim}" ; + open_ml_var "${delim}" "${var}" ; gh api graphql --cache 12h \ -F owner="${GH_API_GQL_OWNER}" \ -F repo="${GH_API_GQL_REPO}" \ -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" | jq -c "${jq_arg}" -- ; - printf -- '%s\n' "${delim}" ; + close_ml_var "${delim}" "${var}" ; unset -v delim jq_arg var ; } >> "${GITHUB_OUTPUT}" ; # Log the human version @@ -157,22 +161,25 @@ jobs: cat >| .ffmpeg.releases.json <<'EOF' ${{ needs.info.outputs.ffmpeg-releases }} EOF + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; { var='FFMPEG_DATE' ; - delim='"'"${var}"'_EOF"' ; - printf -- '%s<<%s\n' "${var}" "${delim}" ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; jq_arg='[foreach .[] as $release ([{}, []]; [ .[0] + {($release.commit): ([ $release.date ] + (.[0][($release.commit)] // []) ) }, [ .[1][0] // $release.commit ] ] ; .[0][(.[1][0])] ) ][-1][0]' ; jq -r "${jq_arg}" -- .ffmpeg.releases.json ; - printf -- '%s\n' "${delim}" ; + close_ml_var "${delim}" "${var}" ; ffmpeg_date="$( jq -r "${jq_arg}" -- .ffmpeg.releases.json )" var='FFMPEG_VERSION' ; - delim='"'"${var}"'_EOF"' ; - printf -- '%s<<%s\n' "${var}" "${delim}" ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; jq_arg='.[]|select(.date == $date)|.versions[]|select(startswith("N-"))' ; jq -r --arg date "${ffmpeg_date}" "${jq_arg}" -- .ffmpeg.releases.json ; - printf -- '%s\n' "${delim}" ; + close_ml_var "${delim}" "${var}" ; unset -v delim jq_arg var ; } >> "${GITHUB_ENV}" - name: Set up QEMU From dc49b6d87c2c2cc78f237613f8bdc61a0a1985c2 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 20:17:16 -0400 Subject: [PATCH 303/417] Add a function for single line variables also --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index af3066c9..e395239c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -38,13 +38,14 @@ jobs: id: lowercase-variables shell: bash run: | + set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; for var in \ actor='${{ github.actor }}' \ repository_owner='${{ github.repository_owner }}' do k="$(cut -d '=' -f 1)" v="${var#${k}=}" - printf -- '%s=%s\n' -- >> "${GITHUB_OUTPUT}" \ + set_sl_var >> "${GITHUB_OUTPUT}" \ "${k}" "${v,,}" done unset -v k v var From 3a9ff40ddab5b80d79ed95e50e25a28894ad75cc Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 21:10:54 -0400 Subject: [PATCH 304/417] Create action.yml This is only a sketch of moving the API work into reusable actions so far. --- .github/actions/FFmpeg/action.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/actions/FFmpeg/action.yml diff --git a/.github/actions/FFmpeg/action.yml b/.github/actions/FFmpeg/action.yml new file mode 100644 index 00000000..ed52cd12 --- /dev/null +++ b/.github/actions/FFmpeg/action.yml @@ -0,0 +1,27 @@ +name: 'FFmpeg Builds' +description: 'Use GitHub CLI & API to retrieve information about FFmpeg Build releases.' + +inputs: + who-to-greet: # id of input + description: 'Who to greet' + required: false + default: 'World' + +outputs: + random-number: + description: "Random number" + value: ${{ steps.first.outputs.random-number }} + +runs: + using: 'composite' + steps: + - name: First step + id: 'first' + env: + INPUT_WHO_TO_GREET: ${{ inputs.who-to-greet }} + shell: 'bash' + run: | + echo 'It worked!' + echo "random-number=${RANDOM}" >> "${GITHUB_OUTPUT}" + ls -al '${{ github.action_path }}' + echo "Hello ${INPUT_WHO_TO_GREET}." From e473af2878aaf9a53f729562000ff2fc070eec6e Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 21:50:16 -0400 Subject: [PATCH 305/417] Replace the sketch with the current code --- .github/actions/FFmpeg/action.yml | 81 +++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/.github/actions/FFmpeg/action.yml b/.github/actions/FFmpeg/action.yml index ed52cd12..90a1210f 100644 --- a/.github/actions/FFmpeg/action.yml +++ b/.github/actions/FFmpeg/action.yml @@ -2,26 +2,79 @@ name: 'FFmpeg Builds' description: 'Use GitHub CLI & API to retrieve information about FFmpeg Build releases.' inputs: - who-to-greet: # id of input - description: 'Who to greet' - required: false - default: 'World' + token: + required: true + default: ${{ secrets.GITHUB_TOKEN }} + description: | + GH_TOKEN for GitHub CLI to use. + Default: $${{ secrets.GITHUB_TOKEN }} + num-assets: + required: true + default: '25' + description: | + The number of assets (attached files) to retrieve from each release. + Default: 25 + num-releases: + required: true + default: '35' + description: | + The number of releases to retrieve from the repository. + Default: 35 + repository_owner: + required: true + default: 'yt-dlp' + description: | + The name of the user or organization that owns the repository. + Default: 'yt-dlp' + repository_name: + required: true + default: 'FFmpeg-Builds' + description: | + Which repository from the owner to search for releases. + Default: 'FFmpeg-Builds' outputs: - random-number: - description: "Random number" - value: ${{ steps.first.outputs.random-number }} + releases: + value: ${{ steps.set.outputs.releases }} + description: 'Generated JSON describing the released builds.' runs: using: 'composite' steps: - - name: First step - id: 'first' + - name: Retrieve releases + id: 'set' env: - INPUT_WHO_TO_GREET: ${{ inputs.who-to-greet }} + GH_TOKEN: '${{ inputs.token }}' + GH_API_GQL_ASSETS: '${{ inputs.num-assets }}' + GH_API_GQL_RELEASES: '${{ inputs.num-releases }}' + GH_API_GQL_OWNER: '${{ inputs.repository_owner }}' + GH_API_GQL_REPO: '${{ inputs.repository_name }}' shell: 'bash' run: | - echo 'It worked!' - echo "random-number=${RANDOM}" >> "${GITHUB_OUTPUT}" - ls -al '${{ github.action_path }}' - echo "Hello ${INPUT_WHO_TO_GREET}." + command -v gh + command -v jq + gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + { + var='releases' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + gh api graphql --cache 12h \ + -F assets="${GH_API_GQL_ASSETS}" \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" ; + close_ml_var "${delim}" "${var}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_OUTPUT}" ; + # Log the human version + gh api graphql --cache 12h \ + -F assets="${GH_API_GQL_ASSETS}" \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; From 41f37bcd656c5c075f8ffbb6b7d1b130c745f134 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 22:00:46 -0400 Subject: [PATCH 306/417] Use the new `FFmpeg` action --- .github/workflows/ci.yaml | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e395239c..2b465feb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - if: ${{ !cancelled() && 'pull_request' != github.event_name }} + #if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} @@ -49,40 +49,10 @@ jobs: "${k}" "${v,,}" done unset -v k v var + - uses: actions/checkout@v4 - name: Retrieve yt-dlp/FFmpeg-Builds releases with GitHub CLI id: ffmpeg - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GH_API_GQL_ASSETS: 25 - GH_API_GQL_RELEASES: 35 - GH_API_GQL_OWNER: yt-dlp - GH_API_GQL_REPO: FFmpeg-Builds - run: | - gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; - gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; - mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; - open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; - close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; - { - var='releases' ; - delim="$(mk_delim "${var}")" ; - open_ml_var "${delim}" "${var}" ; - gh api graphql --cache 12h \ - -F assets="${GH_API_GQL_ASSETS}" \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" ; - close_ml_var "${delim}" "${var}" ; - unset -v delim jq_arg var ; - } >> "${GITHUB_OUTPUT}" ; - # Log the human version - gh api graphql --cache 12h \ - -F assets="${GH_API_GQL_ASSETS}" \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; + uses: ./.github/actions/FFmpeg - name: Retrieve yt-dlp/yt-dlp releases with GitHub CLI id: yt-dlp env: From b30d0ce88f281a9fc41c182bc4e1d6b0eb10c275 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 22:07:17 -0400 Subject: [PATCH 307/417] Debug the shell code --- .github/workflows/ci.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2b465feb..063b50ee 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -38,17 +38,18 @@ jobs: id: lowercase-variables shell: bash run: | + set -x ; set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; for var in \ actor='${{ github.actor }}' \ repository_owner='${{ github.repository_owner }}' do - k="$(cut -d '=' -f 1)" - v="${var#${k}=}" + k="$(cut -d '=' -f 1)" ; + v="${var#${k}=}" ; set_sl_var >> "${GITHUB_OUTPUT}" \ - "${k}" "${v,,}" - done - unset -v k v var + "${k}" "${v,,}" ; + done ; + unset -v k v var ; - uses: actions/checkout@v4 - name: Retrieve yt-dlp/FFmpeg-Builds releases with GitHub CLI id: ffmpeg From d556f203012cd7b2528363aad9cefede91929a04 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 22:10:21 -0400 Subject: [PATCH 308/417] fixup: cut needed some input --- .github/workflows/ci.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 063b50ee..2b70c334 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -38,13 +38,12 @@ jobs: id: lowercase-variables shell: bash run: | - set -x ; set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; - for var in \ + set -x ; for var in \ actor='${{ github.actor }}' \ repository_owner='${{ github.repository_owner }}' do - k="$(cut -d '=' -f 1)" ; + k="$( cut -d '=' -f 1 <<<"${var}" )" ; v="${var#${k}=}" ; set_sl_var >> "${GITHUB_OUTPUT}" \ "${k}" "${v,,}" ; From f4e9c08376f762e2089e273f4bda51794c1a9708 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 22:19:47 -0400 Subject: [PATCH 309/417] Use `github.token` instead of `secrets.GITHUB_TOKEN` --- .github/actions/FFmpeg/action.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/FFmpeg/action.yml b/.github/actions/FFmpeg/action.yml index 90a1210f..4e20ddc0 100644 --- a/.github/actions/FFmpeg/action.yml +++ b/.github/actions/FFmpeg/action.yml @@ -4,10 +4,10 @@ description: 'Use GitHub CLI & API to retrieve information about FFmpeg Build re inputs: token: required: true - default: ${{ secrets.GITHUB_TOKEN }} + default: ${{ github.token }} description: | GH_TOKEN for GitHub CLI to use. - Default: $${{ secrets.GITHUB_TOKEN }} + Default: $${{ github.token }} num-assets: required: true default: '25' @@ -44,7 +44,7 @@ runs: - name: Retrieve releases id: 'set' env: - GH_TOKEN: '${{ inputs.token }}' + GH_TOKEN: ${{ inputs.token }} GH_API_GQL_ASSETS: '${{ inputs.num-assets }}' GH_API_GQL_RELEASES: '${{ inputs.num-releases }}' GH_API_GQL_OWNER: '${{ inputs.repository_owner }}' From 97e69e415645a2943a596225a121b4f6fc5ba7a2 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 22:29:29 -0400 Subject: [PATCH 310/417] Try without the braces --- .github/actions/FFmpeg/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/FFmpeg/action.yml b/.github/actions/FFmpeg/action.yml index 4e20ddc0..db9b61bd 100644 --- a/.github/actions/FFmpeg/action.yml +++ b/.github/actions/FFmpeg/action.yml @@ -7,7 +7,7 @@ inputs: default: ${{ github.token }} description: | GH_TOKEN for GitHub CLI to use. - Default: $${{ github.token }} + Default: github.token num-assets: required: true default: '25' From 4e82fcb5ccd0559c08e92223a13470c5fc4e1acf Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 22:42:02 -0400 Subject: [PATCH 311/417] fixup: escaping the sequence --- .github/actions/FFmpeg/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/FFmpeg/action.yml b/.github/actions/FFmpeg/action.yml index db9b61bd..40170ccb 100644 --- a/.github/actions/FFmpeg/action.yml +++ b/.github/actions/FFmpeg/action.yml @@ -7,7 +7,7 @@ inputs: default: ${{ github.token }} description: | GH_TOKEN for GitHub CLI to use. - Default: github.token + Default: `\$\{\{ github.token \}\}` num-assets: required: true default: '25' From 485cfc46cd093ad909f6c7c18a6b70adb93538aa Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 23:25:53 -0400 Subject: [PATCH 312/417] Create action.yml --- .github/actions/yt-dlp/action.yml | 71 +++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 .github/actions/yt-dlp/action.yml diff --git a/.github/actions/yt-dlp/action.yml b/.github/actions/yt-dlp/action.yml new file mode 100644 index 00000000..b7f84b1f --- /dev/null +++ b/.github/actions/yt-dlp/action.yml @@ -0,0 +1,71 @@ +name: 'yt-dlp Releases' +description: 'Use GitHub CLI & API to retrieve information about `yt-dlp` releases.' + +inputs: + token: + required: true + default: ${{ github.token }} + description: | + GH_TOKEN for GitHub CLI to use. + Default: `\$\{\{ github.token \}\}` + num-releases: + required: true + default: '25' + description: | + The number of releases to retrieve from the repository. + Default: 25 + repository_owner: + required: true + default: 'yt-dlp' + description: | + The name of the user or organization that owns the repository. + Default: 'yt-dlp' + repository_name: + required: true + default: 'yt-dlp' + description: | + Which repository from the owner to search for releases. + Default: 'yt-dlp' + +outputs: + releases: + value: ${{ steps.set.outputs.releases }} + description: 'Retrieved JSON from the API describing the releases.' + +runs: + using: 'composite' + steps: + - name: Retrieve releases + id: 'set' + env: + GH_TOKEN: ${{ inputs.token }} + GH_API_GQL_RELEASES: '${{ inputs.num-releases }}' + GH_API_GQL_OWNER: '${{ inputs.repository_owner }}' + GH_API_GQL_REPO: '${{ inputs.repository_name }}' + shell: 'bash' + run: | + command -v gh > /dev/null ; + command -v jq > /dev/null ; + gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + { + var='releases' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + gh api graphql --cache 12h \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" ; + close_ml_var "${delim}" "${var}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_OUTPUT}" ; + # Log the human version + gh api graphql --cache 12h \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; From 7b000836d370804b26a07c3a95dc27ec2ee5947f Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 23:28:42 -0400 Subject: [PATCH 313/417] Turn off the `info` step again for pull requests --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2b70c334..df4df0d6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - #if: ${{ !cancelled() && 'pull_request' != github.event_name }} + if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} From 5d3f75ea1c8350c4cc4376cd3c3dcf3b2030f4a1 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 23:33:37 -0400 Subject: [PATCH 314/417] Do not log the output for command requirements --- .github/actions/FFmpeg/action.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/FFmpeg/action.yml b/.github/actions/FFmpeg/action.yml index 40170ccb..c768592b 100644 --- a/.github/actions/FFmpeg/action.yml +++ b/.github/actions/FFmpeg/action.yml @@ -45,14 +45,15 @@ runs: id: 'set' env: GH_TOKEN: ${{ inputs.token }} + GH_REPO: '${{ inputs.repository_owner }}/${{ inputs.repository_name }}' GH_API_GQL_ASSETS: '${{ inputs.num-assets }}' GH_API_GQL_RELEASES: '${{ inputs.num-releases }}' GH_API_GQL_OWNER: '${{ inputs.repository_owner }}' GH_API_GQL_REPO: '${{ inputs.repository_name }}' shell: 'bash' run: | - command -v gh - command -v jq + command -v gh >/dev/null ; + command -v jq >/dev/null ; gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; From b619c5a42dca889b699c28c1ba8d23cc1d9c0258 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 23:43:04 -0400 Subject: [PATCH 315/417] Copy the current code for the `yt-dlp` releases --- .github/actions/yt-dlp/action.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/actions/yt-dlp/action.yml b/.github/actions/yt-dlp/action.yml index b7f84b1f..5f8b37ff 100644 --- a/.github/actions/yt-dlp/action.yml +++ b/.github/actions/yt-dlp/action.yml @@ -28,6 +28,9 @@ inputs: Default: 'yt-dlp' outputs: + latest-release: + value: ${{ steps.set.outputs.latest-release }} + description: 'The JSON API response for the latest release.' releases: value: ${{ steps.set.outputs.releases }} description: 'Retrieved JSON from the API describing the releases.' @@ -46,8 +49,8 @@ runs: run: | command -v gh > /dev/null ; command -v jq > /dev/null ; - gql_query='query($repo: String!, $owner: String!, $releases: Int!, $assets: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { tagName, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } }, releaseAssets(first: $assets) { totalCount, nodes { name, size, downloadUrl } } } } } }' ; - gql_jq='[ .data.repository.releases.nodes[] | select((.isLatest or .isDraft or .isPrerelease) | not) | { "tag": .tag.name, "commit": .tag.target.oid, "date": .tag.name[1+(.tag.name|index("-")):], "assets": { "limit": '"${GH_API_GQL_ASSETS}"', "totalCount": .releaseAssets.totalCount }, "files": .releaseAssets.nodes, "versions": [ .releaseAssets.nodes[].name | select(contains("-linux64-"))[1+index("-"):index("-linux64-")] ] } ]' ; + gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, tagName, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; + gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | del(.isDraft, .isPrerelease) ]' ; mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; @@ -61,6 +64,16 @@ runs: -F releases="${GH_API_GQL_RELEASES}" \ -f query="${gql_query}" --jq "${gql_jq}" ; close_ml_var "${delim}" "${var}" ; + jq_arg='map(select(.isLatest))[0]' ; + var='latest-release' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + gh api graphql --cache 12h \ + -F owner="${GH_API_GQL_OWNER}" \ + -F repo="${GH_API_GQL_REPO}" \ + -F releases="${GH_API_GQL_RELEASES}" \ + -f query="${gql_query}" --jq "${gql_jq}" | jq -c "${jq_arg}" -- ; + close_ml_var "${delim}" "${var}" ; unset -v delim jq_arg var ; } >> "${GITHUB_OUTPUT}" ; # Log the human version From 6b0483b993e41a56b73baefaf347741e4ad2d98a Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 23:48:11 -0400 Subject: [PATCH 316/417] Use the newly added `yt-dlp` action --- .github/workflows/ci.yaml | 40 +-------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2b70c334..e23582b1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -55,45 +55,7 @@ jobs: uses: ./.github/actions/FFmpeg - name: Retrieve yt-dlp/yt-dlp releases with GitHub CLI id: yt-dlp - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GH_API_GQL_RELEASES: 25 - GH_API_GQL_OWNER: yt-dlp - GH_API_GQL_REPO: yt-dlp - run: | - gql_query='query($repo: String!, $owner: String!, $releases: Int!) { repository(owner: $owner, name: $repo) { releases(first: $releases, orderBy: { field: CREATED_AT, direction: DESC }) { nodes { name, createdAt, publishedAt, updatedAt, tagName, url, isDraft, isPrerelease, isLatest, tag { name, target { oid, commitUrl } } } } } }' ; - gql_jq='[ .data.repository.releases.nodes[] | select((.isDraft or .isPrerelease) | not) | del(.isDraft, .isPrerelease) ]' ; - mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; - open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; - close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; - { - var='releases' ; - delim="$(mk_delim "${var}")" ; - open_ml_var "${delim}" "${var}" ; - gh api graphql --cache 12h \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" ; - close_ml_var "${delim}" "${var}" ; - jq_arg='map(select(.isLatest))[0]' ; - var='latest-release' ; - delim="$(mk_delim "${var}")" ; - open_ml_var "${delim}" "${var}" ; - gh api graphql --cache 12h \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" | jq -c "${jq_arg}" -- ; - close_ml_var "${delim}" "${var}" ; - unset -v delim jq_arg var ; - } >> "${GITHUB_OUTPUT}" ; - # Log the human version - gh api graphql --cache 12h \ - -F owner="${GH_API_GQL_OWNER}" \ - -F repo="${GH_API_GQL_REPO}" \ - -F releases="${GH_API_GQL_RELEASES}" \ - -f query="${gql_query}" --jq "${gql_jq}" | jq '.[]' -- ; + uses: ./.github/actions/yt-dlp test: if: ${{ !cancelled() && ( 'pull_request' != github.event_name || (! github.event.pull_request.draft) ) }} From 800886a95899524c03d68efd640316179041e254 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 26 Mar 2025 23:48:57 -0400 Subject: [PATCH 317/417] Turn off the `info` step again for pull requests --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e23582b1..f91413ba 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - #if: ${{ !cancelled() && 'pull_request' != github.event_name }} + if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} From e9c6fbdaa07751bffc8389d1945ff3cae9849850 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 00:04:16 -0400 Subject: [PATCH 318/417] Turn off debugging for the `lowercase-variables` step --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f91413ba..4ec7ca4d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -39,7 +39,7 @@ jobs: shell: bash run: | set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; - set -x ; for var in \ + for var in \ actor='${{ github.actor }}' \ repository_owner='${{ github.repository_owner }}' do From 2a0c59380ad787c716bdf7c02cfaf1f0f19c2afb Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 00:29:13 -0400 Subject: [PATCH 319/417] Add the `string-case` action --- .github/actions/string-case/action.yml | 53 ++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 .github/actions/string-case/action.yml diff --git a/.github/actions/string-case/action.yml b/.github/actions/string-case/action.yml new file mode 100644 index 00000000..b5962aac --- /dev/null +++ b/.github/actions/string-case/action.yml @@ -0,0 +1,53 @@ +name: Change String Case +description: Make a string lowercase, uppercase, or capitalized + +inputs: + string: + description: The input string + required: true + +outputs: + lowercase: + value: ${{ steps.set.outputs.lowercase }} + description: The input string, with any uppercase characters replaced with lowercase ones + uppercase: + value: ${{ steps.set.outputs.uppercase }} + description: The input string, with any lowercase characters replaced with uppercase ones + capitalized: + value: ${{ steps.set.outputs.capitalized }} + description: The input string, with any alphabetical characters lowercase, except for the first character, which is uppercased + +runs: + using: 'composite' + steps: + - name: Retrieve releases + id: 'set' + env: + INPUT_STRING: '${{ inputs.string }}' + shell: 'bash' + run: | + set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + { + + var='lowercase' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + printf -- '%s\n' "${INPUT_STRING,,}" ; + close_ml_var "${delim}" "${var}" ; + + var='capitalized' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + printf -- '%s\n' "${INPUT_STRING^}" ; + close_ml_var "${delim}" "${var}" ; + + var='uppercase' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + printf -- '%s\n' "${INPUT_STRING^^}" ; + close_ml_var "${delim}" "${var}" ; + + } >> "${GITHUB_OUTPUT}" From c54b1d3a4848cedbbc14083c399c949520b04fdf Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 00:50:25 -0400 Subject: [PATCH 320/417] Use the new `string-case` action --- .github/workflows/ci.yaml | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4ec7ca4d..199f2d82 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,33 +23,22 @@ jobs: runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} - lowercase-variables-actor: ${{ steps.lowercase-variables.outputs.actor }} - lowercase-variables-repository_owner: ${{ steps.lowercase-variables.outputs.repository_owner }} - string-lowercase: ${{ steps.string.outputs.lowercase }} + lowercase-github-actor: ${{ steps.github-actor.outputs.lowercase }} + lowercase-github-repository_owner: ${{ steps.github-repository_owner.outputs.lowercase }} ytdlp-latest-release: ${{ steps.yt-dlp.outputs.latest-release }} ytdlp-releases: ${{ steps.yt-dlp.outputs.releases }} steps: - - name: Lowercase github username for ghcr - id: string - uses: ASzc/change-string-case-action@v6 + - uses: actions/checkout@v4 + - name: Lowercase github username + id: github-actor + uses: ./.github/actions/string-case with: string: ${{ github.actor }} - - name: Lowercase GitHub variables - id: lowercase-variables - shell: bash - run: | - set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; - for var in \ - actor='${{ github.actor }}' \ - repository_owner='${{ github.repository_owner }}' - do - k="$( cut -d '=' -f 1 <<<"${var}" )" ; - v="${var#${k}=}" ; - set_sl_var >> "${GITHUB_OUTPUT}" \ - "${k}" "${v,,}" ; - done ; - unset -v k v var ; - - uses: actions/checkout@v4 + - name: Lowercase github repository owner + id: github-repository_owner + uses: ./.github/actions/string-case + with: + string: ${{ github.repository_owner }} - name: Retrieve yt-dlp/FFmpeg-Builds releases with GitHub CLI id: ffmpeg uses: ./.github/actions/FFmpeg @@ -132,10 +121,10 @@ jobs: with: platforms: linux/amd64,linux/arm64 push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'true' || 'false' }} - tags: ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest + tags: ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:latest cache-from: | - type=registry,ref=ghcr.io/${{ needs.info.outputs.string-lowercase }}/${{ env.IMAGE_NAME }}:latest - type=registry,ref=ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:latest + type=registry,ref=ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:latest + type=registry,ref=ghcr.io/${{ needs.info.outputs.lowercase-github-repository_owner }}/${{ env.IMAGE_NAME }}:latest type=gha cache-to: | type=gha,mode=max From 30cee644855ce929f2529f852793a54edde45939 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 00:57:40 -0400 Subject: [PATCH 321/417] Log the same outputs as the old action --- .github/actions/string-case/action.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/actions/string-case/action.yml b/.github/actions/string-case/action.yml index b5962aac..4b4c2ada 100644 --- a/.github/actions/string-case/action.yml +++ b/.github/actions/string-case/action.yml @@ -26,6 +26,7 @@ runs: INPUT_STRING: '${{ inputs.string }}' shell: 'bash' run: | + printf -- 'Manipulating string: %s\n' "${INPUT_STRING}" set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; @@ -51,3 +52,6 @@ runs: close_ml_var "${delim}" "${var}" ; } >> "${GITHUB_OUTPUT}" + printf -- '%s: %s\n' 'lowercase' "${INPUT_STRING,,}" + printf -- '%s: %s\n' 'uppercase' "${INPUT_STRING^^}" + printf -- '%s: %s\n' 'capitalized' "${INPUT_STRING^}" From 5b3d72f3dae8c48adc7e94b02cc3a7b3166073f8 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:04:07 -0400 Subject: [PATCH 322/417] Tweak the step name --- .github/actions/string-case/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/string-case/action.yml b/.github/actions/string-case/action.yml index 4b4c2ada..cadcb74b 100644 --- a/.github/actions/string-case/action.yml +++ b/.github/actions/string-case/action.yml @@ -20,7 +20,7 @@ outputs: runs: using: 'composite' steps: - - name: Retrieve releases + - name: Set outputs id: 'set' env: INPUT_STRING: '${{ inputs.string }}' From 5757455b46aa81b705c3045ccf9fdd2ea0b867ff Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:15:25 -0400 Subject: [PATCH 323/417] Create variables.inc.sh --- .github/sh/library/variables.inc.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/sh/library/variables.inc.sh diff --git a/.github/sh/library/variables.inc.sh b/.github/sh/library/variables.inc.sh new file mode 100644 index 00000000..a5ac4ce0 --- /dev/null +++ b/.github/sh/library/variables.inc.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env sh + +# For setting single line variables in the environment or output +set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; + +# Used together to set multiple line variables in the environment or output +mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; +open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; +close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + From ffd6edd913fccc82c42d0f59588e848cfe1d7a23 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:23:00 -0400 Subject: [PATCH 324/417] Clean up the `.json` file and log the results --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 199f2d82..5bd6d8b7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -104,6 +104,8 @@ jobs: close_ml_var "${delim}" "${var}" ; unset -v delim jq_arg var ; } >> "${GITHUB_ENV}" + cat -v "${GITHUB_ENV}" + rm -v -f .ffmpeg.releases.json - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx From 9d9c549275694d25f44ac8920cbf85b4dc91d4a0 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:25:11 -0400 Subject: [PATCH 325/417] Run all the jobs --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5bd6d8b7..e0f6f7f7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - if: ${{ !cancelled() && 'pull_request' != github.event_name }} + #if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} From 60596b04eb933f99e9b54e1bffbe51e876f89631 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:34:19 -0400 Subject: [PATCH 326/417] Test if GitHub parses like shell does or not --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e0f6f7f7..1e7b3026 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -83,8 +83,8 @@ jobs: cat >| .ffmpeg.releases.json <<'EOF' ${{ needs.info.outputs.ffmpeg-releases }} EOF - mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; - open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + mk_delim() { local f='%s_EOF_%d_' ; printf -- "${f}" "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<"'\%'s"\n' ; printf -- "${f}" "$2" "$1" ; } ; close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; { var='FFMPEG_DATE' ; From f1043f57eb582ed7491b4fc51954c7e28df1929a Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:37:24 -0400 Subject: [PATCH 327/417] GitHub has its own weirdness --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1e7b3026..cbce9946 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -84,7 +84,7 @@ jobs: ${{ needs.info.outputs.ffmpeg-releases }} EOF mk_delim() { local f='%s_EOF_%d_' ; printf -- "${f}" "$1" "${RANDOM}" ; } ; - open_ml_var() { local f=''\%'s<<"'\%'s"\n' ; printf -- "${f}" "$2" "$1" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; { var='FFMPEG_DATE' ; From e9cf69e0587bb1a61260cf2aa6f46aa4d7676849 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:38:31 -0400 Subject: [PATCH 328/417] Turn off the `info` step again for pull requests --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index cbce9946..15a0bf45 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - #if: ${{ !cancelled() && 'pull_request' != github.event_name }} + if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} From 73f683edc223167a074158844aca29ade26be2e4 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 10:44:22 -0400 Subject: [PATCH 329/417] Use a local for format like the other functions --- .github/sh/library/variables.inc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/sh/library/variables.inc.sh b/.github/sh/library/variables.inc.sh index a5ac4ce0..01a5373c 100644 --- a/.github/sh/library/variables.inc.sh +++ b/.github/sh/library/variables.inc.sh @@ -4,7 +4,7 @@ set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; # Used together to set multiple line variables in the environment or output -mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; +mk_delim() { local f='%s_EOF_%d_' ; printf -- "${f}" "$1" "${RANDOM}" ; } ; open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; From 129ccca2582dc3afc872fb395a7565a51bd99dab Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 27 Mar 2025 21:39:59 -0400 Subject: [PATCH 330/417] Handle strings better --- tubesync/sync/fields.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tubesync/sync/fields.py b/tubesync/sync/fields.py index 2910b7cc..2f479b68 100644 --- a/tubesync/sync/fields.py +++ b/tubesync/sync/fields.py @@ -145,6 +145,9 @@ class CommaSepChoiceField(models.CharField): # The data was lost; we can regenerate it. args_dict = {key: self.__dict__[key] for key in CommaSepChoice._fields} args_dict['selected_choices'] = list(value) + # setting a string manually should not result in characters + if isinstance(value, str) and len(value) > 0: + args_dict['selected_choices'] = value.split(self.separator) data = CommaSepChoice(**args_dict) value = data.selected_choices s_value = super().get_prep_value(value) From b443450ef43ce962cfea3819030b25d23f750a13 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Mar 2025 01:48:52 -0400 Subject: [PATCH 331/417] Logging this every five seconds is excessive --- tubesync/common/logger.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/common/logger.py b/tubesync/common/logger.py index 3334b1e5..0bdd52fd 100644 --- a/tubesync/common/logger.py +++ b/tubesync/common/logger.py @@ -19,12 +19,17 @@ app_logger.addHandler(default_sh) app_logger.setLevel(logging_level) +class NoWaitingForTasksFilter(logging.Filter): + def filter(self, record): + return 'waiting for tasks' != record.getMessage() + background_task_name = 'background_task.management.commands.process_tasks' last_part = background_task_name.rsplit('.', 1)[-1] background_task_formatter = logging.Formatter( f'%(asctime)s [{last_part}/%(levelname)s] %(message)s' ) background_task_sh = logging.StreamHandler() +background_task_sh.addFilter(NoWaitingForTasksFilter()) background_task_sh.setFormatter(background_task_formatter) background_task_sh.setLevel(logging_level) background_task_logger = logging.getLogger(background_task_name) From 03b8ac3dc621aef070d2a097a1d0d14938decbfe Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Mar 2025 10:00:24 -0400 Subject: [PATCH 332/417] Adjust to the much larger `MAX_RUN_TIME` --- tubesync/tubesync/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index 602583ab..c44c888f 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -210,7 +210,7 @@ except: if MAX_RUN_TIME < 600: MAX_RUN_TIME = 600 -DOWNLOAD_MEDIA_DELAY = 60 + (MAX_RUN_TIME / 20) +DOWNLOAD_MEDIA_DELAY = 60 + (MAX_RUN_TIME / 50) if RENAME_SOURCES or RENAME_ALL_SOURCES: BACKGROUND_TASK_ASYNC_THREADS += 1 From 3f873e1100c46ee27faaea9540e0d88070618ae7 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 28 Mar 2025 15:20:04 -0400 Subject: [PATCH 333/417] Don't skip unavailable fragments during download --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index bc1a2432..bfab29e8 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -281,6 +281,7 @@ def download_media( 'writethumbnail': embed_thumbnail, 'check_formats': None, 'overwrites': None, + 'skip_unavailable_fragments': False, 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20), 'max_sleep_interval': settings.DOWNLOAD_MEDIA_DELAY, 'sleep_interval_requests': 1 + (2 * settings.BACKGROUND_TASK_ASYNC_THREADS), From 41c8cba582e5baecf5512d9d4b4d73fe71d85b0f Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 29 Mar 2025 21:29:18 -0400 Subject: [PATCH 334/417] Add a post-processor to disallow partial playlist results --- tubesync/sync/youtube.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index bfab29e8..b7da7696 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -148,6 +148,7 @@ def get_media_info(url, days=None): f'yesterday-{days!s}days' if days else None ) opts = get_yt_opts() + default_opts = yt_dlp.parse_options([]).options paths = opts.get('paths', dict()) if 'temp' in paths: temp_dir_obj = TemporaryDirectory(prefix='.yt_dlp-', dir=paths['temp']) @@ -156,6 +157,12 @@ def get_media_info(url, days=None): paths.update({ 'temp': str(temp_dir_path), }) + postprocessors = opts.get('postprocessors', default_opts.get('postprocessors', list())) + postprocessors.extend(dict( + key='Exec', + when='playlist', + exec_cmd='/usr/bin/env sh /app/full_playlist.sh %(playlist_count)d %(n_entries)d', + )) opts.update({ 'ignoreerrors': False, # explicitly set this to catch exceptions 'ignore_no_formats_error': False, # we must fail first to try again with this enabled @@ -170,6 +177,7 @@ def get_media_info(url, days=None): 'youtubetab': {'approximate_date': ['true']}, }, 'paths': paths, + 'postprocessors': postprocessors, 'skip_unavailable_fragments': False, 'sleep_interval_requests': 2 * settings.BACKGROUND_TASK_ASYNC_THREADS, 'verbose': True if settings.DEBUG else False, From b30999747b0c7b56956958a163693ac24eeed218 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 29 Mar 2025 21:32:55 -0400 Subject: [PATCH 335/417] Create full_playlist.sh --- tubesync/full_playlist.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tubesync/full_playlist.sh diff --git a/tubesync/full_playlist.sh b/tubesync/full_playlist.sh new file mode 100644 index 00000000..8a9b2139 --- /dev/null +++ b/tubesync/full_playlist.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env sh + +echo "$1" +echo "$2" +exit 0 + +if [ "$1" -ne "$2" ] +then + exit 1 +fi + +exit 0 From c99985d7fff8f86f1afe22d812407218c80d4900 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 29 Mar 2025 22:57:36 -0400 Subject: [PATCH 336/417] Create down-signal for `tubesync-worker` --- config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal b/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal new file mode 100644 index 00000000..f14fddb5 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal @@ -0,0 +1 @@ +SIGTSTP From 7272811434b6464c1eae197dc95f117818c5ced3 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 29 Mar 2025 23:21:19 -0400 Subject: [PATCH 337/417] Update down-signal --- config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal b/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal index f14fddb5..d751378e 100644 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal @@ -1 +1 @@ -SIGTSTP +SIGINT From 13f7673dc56674e9868e7e775db50a916554c0be Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 29 Mar 2025 23:25:45 -0400 Subject: [PATCH 338/417] Create utils.py --- patches/background_task/utils.py | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 patches/background_task/utils.py diff --git a/patches/background_task/utils.py b/patches/background_task/utils.py new file mode 100644 index 00000000..9b607abd --- /dev/null +++ b/patches/background_task/utils.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +import signal +import platform + +TTW_SLOW = [0.5, 1.5] +TTW_FAST = [0.0, 0.1] + + +class SignalManager(): + """Manages POSIX signals.""" + + kill_now = False + time_to_wait = TTW_SLOW + + def __init__(self): + signal.signal(signal.SIGINT, self.exit_gracefully) + # On Windows, signal() can only be called with: + # SIGABRT, SIGFPE, SIGILL, SIGINT, SIGSEGV, SIGTERM, or SIGBREAK. + if platform.system() == 'Windows': + signal.signal(signal.SIGBREAK, self.exit_gracefully) + else: + signal.signal(signal.SIGHUP, self.exit_gracefully) + signal.signal(signal.SIGUSR1, self.speed_up) + signal.signal(signal.SIGUSR2, self.slow_down) + + def exit_gracefully(self, signum, frame): + self.kill_now = True + # Using interrupt again should raise + # a KeyboardInterrupt exception. + signal.signal(signal.SIGINT, signal.SIG_DFL) + + def speed_up(self, signum, frame): + self.time_to_wait = TTW_FAST + + def slow_down(self, signum, frame): + self.time_to_wait = TTW_SLOW From a24155074881649d2bcb16f3454e1618c553fd7a Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 29 Mar 2025 23:57:14 -0400 Subject: [PATCH 339/417] Pass a tuple to extend --- tubesync/sync/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index b7da7696..9b6a937f 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -158,11 +158,11 @@ def get_media_info(url, days=None): 'temp': str(temp_dir_path), }) postprocessors = opts.get('postprocessors', default_opts.get('postprocessors', list())) - postprocessors.extend(dict( + postprocessors.extend((dict( key='Exec', when='playlist', exec_cmd='/usr/bin/env sh /app/full_playlist.sh %(playlist_count)d %(n_entries)d', - )) + ),)) opts.update({ 'ignoreerrors': False, # explicitly set this to catch exceptions 'ignore_no_formats_error': False, # we must fail first to try again with this enabled From 3f283e5e2ea80dcd8e367e41aeec8f81343b7861 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 30 Mar 2025 04:50:06 -0400 Subject: [PATCH 340/417] Update full_playlist.sh --- tubesync/full_playlist.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/full_playlist.sh b/tubesync/full_playlist.sh index 8a9b2139..e758106a 100644 --- a/tubesync/full_playlist.sh +++ b/tubesync/full_playlist.sh @@ -4,7 +4,7 @@ echo "$1" echo "$2" exit 0 -if [ "$1" -ne "$2" ] +if [ 'NA' != "$2" ] && [ "$1" != "$2" ] then exit 1 fi From 9b27d94d06255fba476c27f29344d6c9088fc257 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 30 Mar 2025 04:52:42 -0400 Subject: [PATCH 341/417] Update youtube.py --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 9b6a937f..0abd357f 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -157,7 +157,7 @@ def get_media_info(url, days=None): paths.update({ 'temp': str(temp_dir_path), }) - postprocessors = opts.get('postprocessors', default_opts.get('postprocessors', list())) + postprocessors = opts.get('postprocessors', default_opts.__dict__.get('postprocessors', list())) postprocessors.extend((dict( key='Exec', when='playlist', From c3c0db931434c032dd3cb771c5379293133263ea Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 30 Mar 2025 13:10:09 -0400 Subject: [PATCH 342/417] Write the playlist JSON to a file Sadly, this is needed to get the number of entries. --- tubesync/sync/youtube.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 0abd357f..3fac2dfb 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -19,7 +19,7 @@ from .utils import mkdir_p import yt_dlp import yt_dlp.patch.check_thumbnails import yt_dlp.patch.fatal_http_errors -from yt_dlp.utils import remove_end +from yt_dlp.utils import remove_end, OUTTMPL_TYPES _defaults = getattr(settings, 'YOUTUBE_DEFAULTS', {}) @@ -161,26 +161,39 @@ def get_media_info(url, days=None): postprocessors.extend((dict( key='Exec', when='playlist', - exec_cmd='/usr/bin/env sh /app/full_playlist.sh %(playlist_count)d %(n_entries)d', + exec_cmd="/usr/bin/env sh /app/full_playlist.sh '%(id)s' '%(playlist_count)d'", ),)) + infojson_directory_path = Path(opts.get('cachedir', '/dev/shm')) / 'infojson', + playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' + outtmpl = dict( + default='', + pl_infojson=f'{infojson_directory_path!s}/playlist/{playlist_infojson}.info.json', + ) + for k in OUTTMPL_TYPES.keys(): + outtmpl.setdefault(k, '') opts.update({ 'ignoreerrors': False, # explicitly set this to catch exceptions 'ignore_no_formats_error': False, # we must fail first to try again with this enabled 'skip_download': True, - 'simulate': True, + 'simulate': False, 'logger': log, 'extract_flat': True, + 'allow_playlist_files': True, 'check_formats': True, 'check_thumbnails': False, + 'clean_infojson': False, 'daterange': yt_dlp.utils.DateRange(start=start), 'extractor_args': { 'youtubetab': {'approximate_date': ['true']}, }, + 'outtmpl': outtmpl, + 'overwrites': True, 'paths': paths, 'postprocessors': postprocessors, 'skip_unavailable_fragments': False, 'sleep_interval_requests': 2 * settings.BACKGROUND_TASK_ASYNC_THREADS, 'verbose': True if settings.DEBUG else False, + 'writeinfojson': True, }) if start: log.debug(f'get_media_info: used date range: {opts["daterange"]} for URL: {url}') From 3717e6d2744ace70d31de9c1f6f38e58b19081e8 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 30 Mar 2025 13:23:46 -0400 Subject: [PATCH 343/417] Update full_playlist.sh --- tubesync/full_playlist.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tubesync/full_playlist.sh b/tubesync/full_playlist.sh index e758106a..6197931b 100644 --- a/tubesync/full_playlist.sh +++ b/tubesync/full_playlist.sh @@ -2,9 +2,17 @@ echo "$1" echo "$2" -exit 0 +playlist_id="${1}" +total_entries="${2}" +set -x -if [ 'NA' != "$2" ] && [ "$1" != "$2" ] +time find / -path '*/infojson/playlist/*' \ + -name "postprocessor_[${playlist_id}]_*_${total_entries}_temp.info.json" + +exit 0 +downloaded_entries=0 + +if [ 'NA' != "${total_entries}" ] && [ "${downloaded_entries}" != "${total_entries}" ] then exit 1 fi From bb5d622988089d71e01501d69c4d7be3c2735881 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 31 Mar 2025 06:49:41 -0400 Subject: [PATCH 344/417] Update youtube.py --- tubesync/sync/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 3fac2dfb..7da4cb06 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -161,13 +161,13 @@ def get_media_info(url, days=None): postprocessors.extend((dict( key='Exec', when='playlist', - exec_cmd="/usr/bin/env sh /app/full_playlist.sh '%(id)s' '%(playlist_count)d'", + exec_cmd="/usr/bin/env bash /app/full_playlist.sh '%(id)s' '%(playlist_count)d'", ),)) - infojson_directory_path = Path(opts.get('cachedir', '/dev/shm')) / 'infojson', + infojson_directory_path = Path(opts.get('cachedir', '/dev/shm')) / 'infojson' playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( default='', - pl_infojson=f'{infojson_directory_path!s}/playlist/{playlist_infojson}.info.json', + pl_infojson=f'{infojson_directory_path}/playlist/{playlist_infojson}.%(ext)s', ) for k in OUTTMPL_TYPES.keys(): outtmpl.setdefault(k, '') From 9e0044f8ee44941e2cac1c3eee96ec9b88479ece Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 31 Mar 2025 06:54:52 -0400 Subject: [PATCH 345/417] Update youtube.py --- tubesync/sync/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 7da4cb06..9af2be27 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -163,11 +163,11 @@ def get_media_info(url, days=None): when='playlist', exec_cmd="/usr/bin/env bash /app/full_playlist.sh '%(id)s' '%(playlist_count)d'", ),)) - infojson_directory_path = Path(opts.get('cachedir', '/dev/shm')) / 'infojson' + cache_directory_path = Path(opts.get('cachedir', '/dev/shm')) playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( default='', - pl_infojson=f'{infojson_directory_path}/playlist/{playlist_infojson}.%(ext)s', + pl_infojson=f'{cache_directory_path}/infojson/playlist/{playlist_infojson}.%(ext)s', ) for k in OUTTMPL_TYPES.keys(): outtmpl.setdefault(k, '') From c1d30df980cba576e291b4fd92e1ef84086388ed Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 31 Mar 2025 07:19:04 -0400 Subject: [PATCH 346/417] Update full_playlist.sh --- tubesync/full_playlist.sh | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tubesync/full_playlist.sh b/tubesync/full_playlist.sh index 6197931b..69a6f084 100644 --- a/tubesync/full_playlist.sh +++ b/tubesync/full_playlist.sh @@ -1,18 +1,23 @@ -#!/usr/bin/env sh +#!/usr/bin/env bash -echo "$1" -echo "$2" playlist_id="${1}" total_entries="${2}" -set -x -time find / -path '*/infojson/playlist/*' \ - -name "postprocessor_[${playlist_id}]_*_${total_entries}_temp.info.json" +downloaded_entries="$( find / \ + -path '*/infojson/playlist/postprocessor_*_temp\.info\.json' \ + -name "postprocessor_[[]${playlist_id}[]]_*_${total_entries}_temp\.info\.json" \ + -exec basename '{}' ';' | \ + sed -e 's/^postprocessor_[[].*[]]_//;s/_temp.*\.json$//;' | \ + cut -d '_' -f 1 )" -exit 0 -downloaded_entries=0 +find / \ + -path '*/infojson/playlist/postprocessor_*_temp\.info\.json' \ + -name "postprocessor_[[]${playlist_id}[]]_*_temp\.info\.json" \ + -type f -delete -if [ 'NA' != "${total_entries}" ] && [ "${downloaded_entries}" != "${total_entries}" ] +if [ 'NA' != "${downloaded_entries:=${3:-NA}}" ] && + [ 'NA' != "${total_entries:-NA}" ] && + [ "${downloaded_entries}" != "${total_entries}" ] then exit 1 fi From a98df953321acf164126ea639a2454dc39c3e565 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 31 Mar 2025 13:23:34 -0400 Subject: [PATCH 347/417] Incorporate earlier `infojson` changes --- tubesync/sync/youtube.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 9af2be27..b8e48a45 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -132,7 +132,7 @@ def _subscriber_only(msg='', response=None): return False -def get_media_info(url, days=None): +def get_media_info(url, /, *, days=None, info_json=None): ''' Extracts information from a YouTube URL and returns it as a dict. For a channel or playlist this returns a dict of all the videos on the channel or playlist @@ -149,7 +149,10 @@ def get_media_info(url, days=None): ) opts = get_yt_opts() default_opts = yt_dlp.parse_options([]).options - paths = opts.get('paths', dict()) + class NoDefaultValue: pass # a unique Singleton, that may be checked for later + user_set = lambda k, d, default=NoDefaultValue: d[k] if k in d.keys() else default + default_paths = user_set('paths', default_opts.__dict__, dict()) + paths = user_set('paths', opts, default_paths) if 'temp' in paths: temp_dir_obj = TemporaryDirectory(prefix='.yt_dlp-', dir=paths['temp']) temp_dir_path = Path(temp_dir_obj.name) @@ -157,13 +160,22 @@ def get_media_info(url, days=None): paths.update({ 'temp': str(temp_dir_path), }) - postprocessors = opts.get('postprocessors', default_opts.__dict__.get('postprocessors', list())) + try: + info_json_path = Path(info_json).resolve(strict=False) + except: + pass + else: + opts['paths'].update({ + 'infojson': user_set('infojson', opts['paths'], str(info_json_path)) + }) + default_postprocessors = user_set('postprocessors', default_opts.__dict__, list()) + postprocessors = user_set('postprocessors', opts, default_postprocessors) postprocessors.extend((dict( key='Exec', when='playlist', exec_cmd="/usr/bin/env bash /app/full_playlist.sh '%(id)s' '%(playlist_count)d'", ),)) - cache_directory_path = Path(opts.get('cachedir', '/dev/shm')) + cache_directory_path = Path(user_set('cachedir', opts, '/dev/shm')) playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( default='', From b3ec8069ee694400f9c399f90e423664ce481359 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 31 Mar 2025 14:44:25 -0400 Subject: [PATCH 348/417] Use the `paths` local --- tubesync/sync/youtube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index b8e48a45..5bdf0fd0 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -162,11 +162,11 @@ def get_media_info(url, /, *, days=None, info_json=None): }) try: info_json_path = Path(info_json).resolve(strict=False) - except: + except (RuntimeError, TypeError): pass else: - opts['paths'].update({ - 'infojson': user_set('infojson', opts['paths'], str(info_json_path)) + paths.update({ + 'infojson': user_set('infojson', paths, str(info_json_path)) }) default_postprocessors = user_set('postprocessors', default_opts.__dict__, list()) postprocessors = user_set('postprocessors', opts, default_postprocessors) @@ -179,6 +179,7 @@ def get_media_info(url, /, *, days=None, info_json=None): playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( default='', + infojson='%(id)s.%(ext)s' if paths['infojson'] else '', pl_infojson=f'{cache_directory_path}/infojson/playlist/{playlist_infojson}.%(ext)s', ) for k in OUTTMPL_TYPES.keys(): From 993fdc250393446cf915057911c008b96ea84619 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 31 Mar 2025 15:12:22 -0400 Subject: [PATCH 349/417] Avoid the `KeyError` when it is not set --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 5bdf0fd0..d2c6d493 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -179,7 +179,7 @@ def get_media_info(url, /, *, days=None, info_json=None): playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( default='', - infojson='%(id)s.%(ext)s' if paths['infojson'] else '', + infojson='%(id)s.%(ext)s' if user_set('infojson', paths, '') else '', pl_infojson=f'{cache_directory_path}/infojson/playlist/{playlist_infojson}.%(ext)s', ) for k in OUTTMPL_TYPES.keys(): From 2bf4af792027ce71f36b7d6e9b8d27caa585538b Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 00:11:03 -0400 Subject: [PATCH 350/417] Use Django setting to restrict find locations --- tubesync/full_playlist.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tubesync/full_playlist.sh b/tubesync/full_playlist.sh index 69a6f084..08bab14b 100644 --- a/tubesync/full_playlist.sh +++ b/tubesync/full_playlist.sh @@ -3,14 +3,29 @@ playlist_id="${1}" total_entries="${2}" -downloaded_entries="$( find / \ +# select YOUTUBE_*DIR settings +# convert None to '' +# convert PosixPath('VALUE') to 'VALUE' +# assign a shell variable with the setting name and value +_awk_prog='$2 == "=" && $1 ~ /^YOUTUBE_/ && $1 ~ /DIR$/ { + sub(/^None$/, "'\'\''", $3); + r = sub(/^PosixPath[(]/, "", $3); + NF--; + if(r) {sub(/[)]$/, "", $NF);}; + $3=$1 $2 $3; $1=$2=""; sub("^" OFS "+", ""); + print; + }' +. <(python3 /app/manage.py diffsettings --output hash | awk "${_awk_prog}") +WHERE="${YOUTUBE_DL_CACHEDIR:-/dev/shm}" + +downloaded_entries="$( find /dev/shm "${WHERE}" \ -path '*/infojson/playlist/postprocessor_*_temp\.info\.json' \ -name "postprocessor_[[]${playlist_id}[]]_*_${total_entries}_temp\.info\.json" \ -exec basename '{}' ';' | \ sed -e 's/^postprocessor_[[].*[]]_//;s/_temp.*\.json$//;' | \ cut -d '_' -f 1 )" -find / \ +find /dev/shm "${WHERE}" \ -path '*/infojson/playlist/postprocessor_*_temp\.info\.json' \ -name "postprocessor_[[]${playlist_id}[]]_*_temp\.info\.json" \ -type f -delete From 3940487c1ce9b73e2b4aeb99c757aa0eaa94726f Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 00:21:55 -0400 Subject: [PATCH 351/417] Use the shorter `paths.get` to test for any value --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index d2c6d493..e63f1e71 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -179,7 +179,7 @@ def get_media_info(url, /, *, days=None, info_json=None): playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( default='', - infojson='%(id)s.%(ext)s' if user_set('infojson', paths, '') else '', + infojson='%(id)s.%(ext)s' if paths.get('infojson') else '', pl_infojson=f'{cache_directory_path}/infojson/playlist/{playlist_infojson}.%(ext)s', ) for k in OUTTMPL_TYPES.keys(): From 379d0ff02f3b9a216eefdeb2a46af7ce4d61e227 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 01:14:15 -0400 Subject: [PATCH 352/417] Add `extractor` so that `id` cannot collide --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index e63f1e71..c2c35464 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -179,7 +179,7 @@ def get_media_info(url, /, *, days=None, info_json=None): playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( default='', - infojson='%(id)s.%(ext)s' if paths.get('infojson') else '', + infojson='%(extractor)s/%(id)s.%(ext)s' if paths.get('infojson') else '', pl_infojson=f'{cache_directory_path}/infojson/playlist/{playlist_infojson}.%(ext)s', ) for k in OUTTMPL_TYPES.keys(): From 2a3df669c9d145b4e8320a42ea81202400831f26 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 01:33:00 -0400 Subject: [PATCH 353/417] Restrict delays to a range between 10 seconds and 20 minutes --- tubesync/sync/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index e63f1e71..1ebc6ec6 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -316,8 +316,8 @@ def download_media( 'check_formats': None, 'overwrites': None, 'skip_unavailable_fragments': False, - 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20), - 'max_sleep_interval': settings.DOWNLOAD_MEDIA_DELAY, + 'sleep_interval': max(10, int(settings.DOWNLOAD_MEDIA_DELAY / 20)), + 'max_sleep_interval': min(20*60, max(60, settings.DOWNLOAD_MEDIA_DELAY)), 'sleep_interval_requests': 1 + (2 * settings.BACKGROUND_TASK_ASYNC_THREADS), 'paths': opts.get('paths', dict()), 'postprocessor_args': opts.get('postprocessor_args', dict()), From 7002cb458bd1be6fa4089b07372b58ff1f83ce6a Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 03:47:13 -0400 Subject: [PATCH 354/417] Use `list.append` instead of `list.extend` --- tubesync/sync/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index c2c35464..e487fa3b 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -170,11 +170,11 @@ def get_media_info(url, /, *, days=None, info_json=None): }) default_postprocessors = user_set('postprocessors', default_opts.__dict__, list()) postprocessors = user_set('postprocessors', opts, default_postprocessors) - postprocessors.extend((dict( + postprocessors.append(dict( key='Exec', when='playlist', exec_cmd="/usr/bin/env bash /app/full_playlist.sh '%(id)s' '%(playlist_count)d'", - ),)) + )) cache_directory_path = Path(user_set('cachedir', opts, '/dev/shm')) playlist_infojson = 'postprocessor_[%(id)s]_%(n_entries)d_%(playlist_count)d_temp' outtmpl = dict( From 9340fa2741c00811936e4c7c9ccfc85f04163633 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 16:28:51 -0400 Subject: [PATCH 355/417] Add `dive` analysis --- .github/workflows/ci.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 15a0bf45..914e84fc 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -118,10 +118,12 @@ jobs: DOCKER_TOKEN: ${{ 'meeb' == github.repository_owner && secrets.REGISTRY_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} run: echo "${DOCKER_TOKEN}" | docker login --password-stdin --username "${DOCKER_USERNAME}" "${DOCKER_REGISTRY}" - name: Build and push + id: build-push timeout-minutes: 60 uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 + load: true push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'true' || 'false' }} tags: ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:latest cache-from: | @@ -136,3 +138,12 @@ jobs: FFMPEG_DATE=${{ env.FFMPEG_DATE }} FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} + - name: Analysis with `dive` + run: | + docker run --rm \ + -v /var/run/docker.sock:/var/run/docker.sock \ + ghcr.io/wagoodman/dive \ + 'docker://${{ steps.build-push.outputs.imageid }}' \ + --ci \ + --highestUserWastedPercent '0.03' \ + --highestWastedBytes '10M' From a8e425859eb179ec74158b9a26c6468eadc8dbc3 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 16:30:42 -0400 Subject: [PATCH 356/417] Enable the `info` job --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 914e84fc..b0eba921 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - if: ${{ !cancelled() && 'pull_request' != github.event_name }} + #if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} From 6c9dc2fc03a81d78618cbb1f976ddf70171019ec Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 16:48:45 -0400 Subject: [PATCH 357/417] Build with `dive` --- .github/workflows/ci.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b0eba921..52201797 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -123,7 +123,6 @@ jobs: uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 - load: true push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'true' || 'false' }} tags: ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:latest cache-from: | @@ -143,7 +142,7 @@ jobs: docker run --rm \ -v /var/run/docker.sock:/var/run/docker.sock \ ghcr.io/wagoodman/dive \ - 'docker://${{ steps.build-push.outputs.imageid }}' \ + build -t ${{ env.IMAGE_NAME }} . \ --ci \ --highestUserWastedPercent '0.03' \ --highestWastedBytes '10M' From 6b280c0cc369c6421b52ce6b8a0a57a6a357f4a5 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 17:22:57 -0400 Subject: [PATCH 358/417] Use a job for `dive` --- .github/workflows/ci.yaml | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 52201797..dbbe47a4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -137,12 +137,52 @@ jobs: FFMPEG_DATE=${{ env.FFMPEG_DATE }} FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} + dive: + runs-on: ubuntu-latest + needs: ['info', 'test'] + steps: + - name: Set environment variables with jq + run: | + cat >| .ffmpeg.releases.json <<'EOF' + ${{ needs.info.outputs.ffmpeg-releases }} + EOF + mk_delim() { local f='%s_EOF_%d_' ; printf -- "${f}" "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + { + var='FFMPEG_DATE' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + jq_arg='[foreach .[] as $release ([{}, []]; [ .[0] + {($release.commit): ([ $release.date ] + (.[0][($release.commit)] // []) ) }, [ .[1][0] // $release.commit ] ] ; .[0][(.[1][0])] ) ][-1][0]' ; + jq -r "${jq_arg}" -- .ffmpeg.releases.json ; + close_ml_var "${delim}" "${var}" ; + + ffmpeg_date="$( jq -r "${jq_arg}" -- .ffmpeg.releases.json )" + + var='FFMPEG_VERSION' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + jq_arg='.[]|select(.date == $date)|.versions[]|select(startswith("N-"))' ; + jq -r --arg date "${ffmpeg_date}" "${jq_arg}" -- .ffmpeg.releases.json ; + close_ml_var "${delim}" "${var}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_ENV}" + cat -v "${GITHUB_ENV}" + rm -v -f .ffmpeg.releases.json + - uses: actions/checkout@v4 - name: Analysis with `dive` run: | + docker buildx build \ + --build-arg IMAGE_NAME=${{ env.IMAGE_NAME }} \ + --build-arg FFMPEG_DATE=${{ env.FFMPEG_DATE }} \ + --build-arg FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} } + --build-arg YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} \ + --cache-from type=gha --load \ + -t ${{ env.IMAGE_NAME }} . docker run --rm \ -v /var/run/docker.sock:/var/run/docker.sock \ ghcr.io/wagoodman/dive \ - build -t ${{ env.IMAGE_NAME }} . \ + '${{ env.IMAGE_NAME }}' \ --ci \ --highestUserWastedPercent '0.03' \ --highestWastedBytes '10M' From 6113824a87638fe4d854ef3e26960676c509dd3b Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 17:26:06 -0400 Subject: [PATCH 359/417] fixup: correct line continuation --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dbbe47a4..fd1d07ce 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -175,7 +175,7 @@ jobs: docker buildx build \ --build-arg IMAGE_NAME=${{ env.IMAGE_NAME }} \ --build-arg FFMPEG_DATE=${{ env.FFMPEG_DATE }} \ - --build-arg FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} } + --build-arg FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} \ --build-arg YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} \ --cache-from type=gha --load \ -t ${{ env.IMAGE_NAME }} . From 084d434a94da60740f7b933da7e3368a34b5f035 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 17:58:50 -0400 Subject: [PATCH 360/417] Setup buildx --- .github/workflows/ci.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fd1d07ce..3f7a5cc7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -139,7 +139,7 @@ jobs: YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} dive: runs-on: ubuntu-latest - needs: ['info', 'test'] + needs: ['info'] steps: - name: Set environment variables with jq run: | @@ -169,8 +169,9 @@ jobs: } >> "${GITHUB_ENV}" cat -v "${GITHUB_ENV}" rm -v -f .ffmpeg.releases.json + - uses: docker/setup-buildx-action@v3 - uses: actions/checkout@v4 - - name: Analysis with `dive` + - name: Build `${{ env.IMAGE_NAME }}` image run: | docker buildx build \ --build-arg IMAGE_NAME=${{ env.IMAGE_NAME }} \ @@ -179,6 +180,8 @@ jobs: --build-arg YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} \ --cache-from type=gha --load \ -t ${{ env.IMAGE_NAME }} . + - name: Analysis with `dive` + run: | docker run --rm \ -v /var/run/docker.sock:/var/run/docker.sock \ ghcr.io/wagoodman/dive \ From 1b2f87cafbd2f0a7e61fdc05afab5e41174cb5a6 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 18:58:17 -0400 Subject: [PATCH 361/417] Move `jq` work to the `info` job --- .github/workflows/ci.yaml | 98 +++++++++++++++------------------------ 1 file changed, 37 insertions(+), 61 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3f7a5cc7..7521fe4e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,7 +22,9 @@ jobs: #if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: + ffmpeg-date: ${{ steps.jq.outputs.FFMPEG_DATE }} ffmpeg-releases: ${{ steps.ffmpeg.outputs.releases }} + ffmpeg-version: ${{ steps.jq.outputs.FFMPEG_VERSION }} lowercase-github-actor: ${{ steps.github-actor.outputs.lowercase }} lowercase-github-repository_owner: ${{ steps.github-repository_owner.outputs.lowercase }} ytdlp-latest-release: ${{ steps.yt-dlp.outputs.latest-release }} @@ -45,6 +47,35 @@ jobs: - name: Retrieve yt-dlp/yt-dlp releases with GitHub CLI id: yt-dlp uses: ./.github/actions/yt-dlp + - name: Set outputs with jq + id: jq + run: | + cat >| .ffmpeg.releases.json <<'EOF' + ${{ steps.ffmpeg.outputs.releases }} + EOF + mk_delim() { local f='%s_EOF_%d_' ; printf -- "${f}" "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + { + var='FFMPEG_DATE' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + jq_arg='[foreach .[] as $release ([{}, []]; [ .[0] + {($release.commit): ([ $release.date ] + (.[0][($release.commit)] // []) ) }, [ .[1][0] // $release.commit ] ] ; .[0][(.[1][0])] ) ][-1][0]' ; + jq -r "${jq_arg}" -- .ffmpeg.releases.json ; + close_ml_var "${delim}" "${var}" ; + + ffmpeg_date="$( jq -r "${jq_arg}" -- .ffmpeg.releases.json )" + + var='FFMPEG_VERSION' ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + jq_arg='.[]|select(.date == $date)|.versions[]|select(startswith("N-"))' ; + jq -r --arg date "${ffmpeg_date}" "${jq_arg}" -- .ffmpeg.releases.json ; + close_ml_var "${delim}" "${var}" ; + unset -v delim jq_arg var ; + } >> "${GITHUB_OUTPUT}" + cat -v "${GITHUB_OUTPUT}" + rm -v -f .ffmpeg.releases.json test: if: ${{ !cancelled() && ( 'pull_request' != github.event_name || (! github.event.pull_request.draft) ) }} @@ -78,34 +109,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 120 steps: - - name: Set environment variables with jq - run: | - cat >| .ffmpeg.releases.json <<'EOF' - ${{ needs.info.outputs.ffmpeg-releases }} - EOF - mk_delim() { local f='%s_EOF_%d_' ; printf -- "${f}" "$1" "${RANDOM}" ; } ; - open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; - close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; - { - var='FFMPEG_DATE' ; - delim="$(mk_delim "${var}")" ; - open_ml_var "${delim}" "${var}" ; - jq_arg='[foreach .[] as $release ([{}, []]; [ .[0] + {($release.commit): ([ $release.date ] + (.[0][($release.commit)] // []) ) }, [ .[1][0] // $release.commit ] ] ; .[0][(.[1][0])] ) ][-1][0]' ; - jq -r "${jq_arg}" -- .ffmpeg.releases.json ; - close_ml_var "${delim}" "${var}" ; - - ffmpeg_date="$( jq -r "${jq_arg}" -- .ffmpeg.releases.json )" - - var='FFMPEG_VERSION' ; - delim="$(mk_delim "${var}")" ; - open_ml_var "${delim}" "${var}" ; - jq_arg='.[]|select(.date == $date)|.versions[]|select(startswith("N-"))' ; - jq -r --arg date "${ffmpeg_date}" "${jq_arg}" -- .ffmpeg.releases.json ; - close_ml_var "${delim}" "${var}" ; - unset -v delim jq_arg var ; - } >> "${GITHUB_ENV}" - cat -v "${GITHUB_ENV}" - rm -v -f .ffmpeg.releases.json - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -134,49 +137,22 @@ jobs: ${{ 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'type=inline' || '' }} build-args: | IMAGE_NAME=${{ env.IMAGE_NAME }} - FFMPEG_DATE=${{ env.FFMPEG_DATE }} - FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} + FFMPEG_DATE=${{ needs.info.outputs.ffmpeg-date }} + FFMPEG_VERSION=${{ needs.info.outputs.ffmpeg-version }} YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} dive: - runs-on: ubuntu-latest + if: ${{ !cancelled() && 'success' == needs.info.result }} needs: ['info'] + runs-on: ubuntu-latest steps: - - name: Set environment variables with jq - run: | - cat >| .ffmpeg.releases.json <<'EOF' - ${{ needs.info.outputs.ffmpeg-releases }} - EOF - mk_delim() { local f='%s_EOF_%d_' ; printf -- "${f}" "$1" "${RANDOM}" ; } ; - open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; - close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; - { - var='FFMPEG_DATE' ; - delim="$(mk_delim "${var}")" ; - open_ml_var "${delim}" "${var}" ; - jq_arg='[foreach .[] as $release ([{}, []]; [ .[0] + {($release.commit): ([ $release.date ] + (.[0][($release.commit)] // []) ) }, [ .[1][0] // $release.commit ] ] ; .[0][(.[1][0])] ) ][-1][0]' ; - jq -r "${jq_arg}" -- .ffmpeg.releases.json ; - close_ml_var "${delim}" "${var}" ; - - ffmpeg_date="$( jq -r "${jq_arg}" -- .ffmpeg.releases.json )" - - var='FFMPEG_VERSION' ; - delim="$(mk_delim "${var}")" ; - open_ml_var "${delim}" "${var}" ; - jq_arg='.[]|select(.date == $date)|.versions[]|select(startswith("N-"))' ; - jq -r --arg date "${ffmpeg_date}" "${jq_arg}" -- .ffmpeg.releases.json ; - close_ml_var "${delim}" "${var}" ; - unset -v delim jq_arg var ; - } >> "${GITHUB_ENV}" - cat -v "${GITHUB_ENV}" - rm -v -f .ffmpeg.releases.json - uses: docker/setup-buildx-action@v3 - uses: actions/checkout@v4 - name: Build `${{ env.IMAGE_NAME }}` image run: | docker buildx build \ --build-arg IMAGE_NAME=${{ env.IMAGE_NAME }} \ - --build-arg FFMPEG_DATE=${{ env.FFMPEG_DATE }} \ - --build-arg FFMPEG_VERSION=${{ env.FFMPEG_VERSION }} \ + --build-arg FFMPEG_DATE=${{ needs.info.outputs.ffmpeg-date }} \ + --build-arg FFMPEG_VERSION=${{ needs.info.outputs.ffmpeg-version }} \ --build-arg YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} \ --cache-from type=gha --load \ -t ${{ env.IMAGE_NAME }} . From 26ba951529cdc95a0372fcfc10831c2f8627c91b Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 19:17:31 -0400 Subject: [PATCH 362/417] Use `dive` before push --- .github/workflows/ci.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7521fe4e..f4d24aab 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -120,6 +120,29 @@ jobs: DOCKER_USERNAME: ${{ github.actor }} DOCKER_TOKEN: ${{ 'meeb' == github.repository_owner && secrets.REGISTRY_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} run: echo "${DOCKER_TOKEN}" | docker login --password-stdin --username "${DOCKER_USERNAME}" "${DOCKER_REGISTRY}" + - name: Build image for `dive` + id: build-dive-image + uses: docker/build-push-action@v6 + with: + build-args: | + IMAGE_NAME=${{ env.IMAGE_NAME }} + FFMPEG_DATE=${{ needs.info.outputs.ffmpeg-date }} + FFMPEG_VERSION=${{ needs.info.outputs.ffmpeg-version }} + YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} + cache-from: type=gha + load: true + platforms: linux/amd64 + push: false + tags: ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:dive + - name: Analysis with `dive` + run: | + docker run --rm \ + -v /var/run/docker.sock:/var/run/docker.sock \ + ghcr.io/wagoodman/dive \ + 'ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:dive' \ + --ci \ + --highestUserWastedPercent '0.03' \ + --highestWastedBytes '10M' - name: Build and push id: build-push timeout-minutes: 60 From ad8231d7b9eff11771411108f44b39f51f1c8b36 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 19:24:40 -0400 Subject: [PATCH 363/417] Remove `dive` job --- .github/workflows/ci.yaml | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f4d24aab..fe44fc26 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -138,7 +138,7 @@ jobs: run: | docker run --rm \ -v /var/run/docker.sock:/var/run/docker.sock \ - ghcr.io/wagoodman/dive \ + 'ghcr.io/wagoodman/dive' \ 'ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:dive' \ --ci \ --highestUserWastedPercent '0.03' \ @@ -163,28 +163,3 @@ jobs: FFMPEG_DATE=${{ needs.info.outputs.ffmpeg-date }} FFMPEG_VERSION=${{ needs.info.outputs.ffmpeg-version }} YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} - dive: - if: ${{ !cancelled() && 'success' == needs.info.result }} - needs: ['info'] - runs-on: ubuntu-latest - steps: - - uses: docker/setup-buildx-action@v3 - - uses: actions/checkout@v4 - - name: Build `${{ env.IMAGE_NAME }}` image - run: | - docker buildx build \ - --build-arg IMAGE_NAME=${{ env.IMAGE_NAME }} \ - --build-arg FFMPEG_DATE=${{ needs.info.outputs.ffmpeg-date }} \ - --build-arg FFMPEG_VERSION=${{ needs.info.outputs.ffmpeg-version }} \ - --build-arg YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} \ - --cache-from type=gha --load \ - -t ${{ env.IMAGE_NAME }} . - - name: Analysis with `dive` - run: | - docker run --rm \ - -v /var/run/docker.sock:/var/run/docker.sock \ - ghcr.io/wagoodman/dive \ - '${{ env.IMAGE_NAME }}' \ - --ci \ - --highestUserWastedPercent '0.03' \ - --highestWastedBytes '10M' From 2e634511d79a32983f194be92ac790a9cfd4660d Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 1 Apr 2025 19:28:06 -0400 Subject: [PATCH 364/417] Disable the info job --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fe44fc26..401ae05e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ on: jobs: info: - #if: ${{ !cancelled() && 'pull_request' != github.event_name }} + if: ${{ !cancelled() && 'pull_request' != github.event_name }} runs-on: ubuntu-latest outputs: ffmpeg-date: ${{ steps.jq.outputs.FFMPEG_DATE }} From 8ccd4e68c42e164dce1fbe274b5f6b2839e9dd18 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 2 Apr 2025 08:04:42 -0400 Subject: [PATCH 365/417] Run the check source directory task before saving existing sources --- tubesync/sync/signals.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index be848a0a..165a1be7 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -30,6 +30,8 @@ def source_pre_save(sender, instance, **kwargs): log.debug(f'source_pre_save signal: no existing source: {sender} - {instance}') return + args = ( str(instance.pk), ) + check_source_directory_exists.now(*args) existing_dirpath = existing_source.directory_path.resolve(strict=True) new_dirpath = instance.directory_path.resolve(strict=False) if existing_dirpath != new_dirpath: From f5ad4eda16e40de45bcb92a49e50892b0783ffd2 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 3 Apr 2025 00:31:56 -0400 Subject: [PATCH 366/417] Log more details about what `pipenv` installs --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 15a0bf45..966a6664 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -63,7 +63,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pipenv - pipenv install --system --skip-lock + PIPENV_VERBOSITY=64 pipenv install --system --skip-lock - name: Set up Django environment run: | cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py From a13e5942ae40a3885f0bdda90aad13338b5dd9db Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 3 Apr 2025 00:37:26 -0400 Subject: [PATCH 367/417] Adjust the test for Django `5.2` --- tubesync/sync/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 514f75b1..b05c3991 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -138,7 +138,7 @@ class FrontEndTestCase(TestCase): else: # Invalid source tests should reload the page with an error self.assertEqual(response.status_code, 200) - self.assertIn('
    ', + self.assertIn('
      Date: Thu, 3 Apr 2025 01:05:08 -0400 Subject: [PATCH 368/417] Avoid Django `5.2` until it is tested --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index bf53b4bf..2976db2e 100644 --- a/Pipfile +++ b/Pipfile @@ -7,7 +7,7 @@ verify_ssl = true autopep8 = "*" [packages] -django = "*" +django = "<5.2" django-sass-processor = {extras = ["management-command"], version = "*"} pillow = "*" whitenoise = "*" From eab0ad9d7c0a9f2ba1a4b826cdf29bff06b93fb7 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 3 Apr 2025 01:50:20 -0400 Subject: [PATCH 369/417] Review of tasks.py --- tubesync/sync/tasks.py | 147 +++++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 71 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 6cf0fc2d..8e35f7ac 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -21,6 +21,7 @@ from django.db.transaction import atomic from django.utils import timezone from django.utils.translation import gettext_lazy as _ from background_task import background +from background_task.exceptions import InvalidTaskError from background_task.models import Task, CompletedTask from common.logger import log from common.errors import NoMediaException, NoMetadataException, DownloadFailedException @@ -123,7 +124,8 @@ def update_task_status(task, status): except DatabaseError as e: if 'Save with update_fields did not affect any rows.' == str(e): pass - raise + else: + raise return True @@ -136,6 +138,7 @@ def get_source_completed_tasks(source_id, only_errors=False): q['failed_at__isnull'] = False return CompletedTask.objects.filter(**q).order_by('-failed_at') + def get_tasks(task_name, id=None, /, instance=None): assert not (id is None and instance is None) arg = str(id or instance.pk) @@ -160,6 +163,7 @@ def get_source_check_task(source_id): def get_source_index_task(source_id): return get_first_task('sync.tasks.index_source_task', source_id) + def delete_task_by_source(task_name, source_id): now = timezone.now() unlocked = Task.objects.unlocked(now) @@ -191,7 +195,7 @@ def schedule_media_servers_update(): for mediaserver in MediaServer.objects.all(): rescan_media_server( str(mediaserver.pk), - priority=30, + priority=10, verbose_name=verbose_name.format(mediaserver), remove_existing_tasks=True, ) @@ -225,7 +229,7 @@ def cleanup_removed_media(source, videos): schedule_media_servers_update() -@background(schedule=300, remove_existing_tasks=True) +@background(schedule=dict(run_at=300), remove_existing_tasks=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -235,18 +239,20 @@ def index_source_task(source_id): cleanup_old_media() try: source = Source.objects.get(pk=source_id) - except Source.DoesNotExist: + except Source.DoesNotExist as e: # Task triggered but the Source has been deleted, delete the task - return + raise InvalidTaskError(_('no such source')) from e # An inactive Source would return an empty list for videos anyway if not source.is_active: return # Reset any errors + # TODO: determine if this affects anything source.has_failed = False source.save() # Index the source videos = source.index_media() if not videos: + # TODO: Record this error in source.has_failed ? raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no ' f'media to index, is the source key valid? Check the ' f'source configuration is correct and that the source ' @@ -310,7 +316,7 @@ def index_source_task(source_id): cleanup_removed_media(source, videos) -@background(schedule=0) +@background(schedule=dict(run_at=0)) def check_source_directory_exists(source_id): ''' Checks the output directory for a source exists and is writable, if it does @@ -319,17 +325,17 @@ def check_source_directory_exists(source_id): ''' try: source = Source.objects.get(pk=source_id) - except Source.DoesNotExist: + except Source.DoesNotExist as e: # Task triggered but the Source has been deleted, delete the task - return + raise InvalidTaskError(_('no such source')) from e # Check the source output directory exists if not source.directory_exists(): - # Try and create it + # Try to create it log.info(f'Creating directory: {source.directory_path}') source.make_directory() -@background(schedule=0) +@background(schedule=dict(run_at=0)) def download_source_images(source_id): ''' Downloads an image and save it as a local thumbnail attached to a @@ -337,11 +343,11 @@ def download_source_images(source_id): ''' try: source = Source.objects.get(pk=source_id) - except Source.DoesNotExist: + except Source.DoesNotExist as e: # Task triggered but the source no longer exists, do nothing log.error(f'Task download_source_images(pk={source_id}) called but no ' f'source exists with ID: {source_id}') - return + raise InvalidTaskError(_('no such source')) from e avatar, banner = source.get_image_url log.info(f'Thumbnail URL for source with ID: {source_id} / {source} ' f'Avatar: {avatar} ' @@ -379,18 +385,18 @@ def download_source_images(source_id): log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') -@background(schedule=60, remove_existing_tasks=True) +@background(schedule=dict(run_at=60), remove_existing_tasks=True) def download_media_metadata(media_id): ''' Downloads the metadata for a media item. ''' try: media = Media.objects.get(pk=media_id) - except Media.DoesNotExist: + except Media.DoesNotExist as e: # Task triggered but the media no longer exists, do nothing log.error(f'Task download_media_metadata(pk={media_id}) called but no ' f'media exists with ID: {media_id}') - return + raise InvalidTaskError(_('no such media')) from e if media.manual_skip: log.info(f'Task for ID: {media_id} / {media} skipped, due to task being manually skipped.') return @@ -466,7 +472,7 @@ def download_media_metadata(media_id): f'{source} / {media}: {media_id}') -@background(schedule=60, remove_existing_tasks=True) +@background(schedule=dict(run_at=60), remove_existing_tasks=True) def download_media_thumbnail(media_id, url): ''' Downloads an image from a URL and save it as a local thumbnail attached to a @@ -474,10 +480,10 @@ def download_media_thumbnail(media_id, url): ''' try: media = Media.objects.get(pk=media_id) - except Media.DoesNotExist: + except Media.DoesNotExist as e: # Task triggered but the media no longer exists, do nothing - return - if media.skip: + raise InvalidTaskError(_('no such media')) from e + if media.skip or media.manual_skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' f'it is now marked to be skipped, not downloading thumbnail') @@ -504,38 +510,43 @@ def download_media_thumbnail(media_id, url): return True -@background(schedule=60, remove_existing_tasks=True) +@background(schedule=dict(run_at=60), remove_existing_tasks=True) def download_media(media_id): ''' Downloads the media to disk and attaches it to the Media instance. ''' try: media = Media.objects.get(pk=media_id) - except Media.DoesNotExist: + except Media.DoesNotExist as e: # Task triggered but the media no longer exists, do nothing - return - if not media.has_metadata: - raise NoMetadataException('Metadata is not yet available.') - if media.skip: - # Media was toggled to be skipped after the task was scheduled - log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' - f'it is now marked to be skipped, not downloading') - return - downloaded_file_exists = ( - media.media_file_exists or - media.filepath.exists() - ) - if media.downloaded and downloaded_file_exists: - # Media has been marked as downloaded before the download_media task was fired, - # skip it - log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' - f'it has already been marked as downloaded, not downloading again') - return + raise InvalidTaskError(_('no such media')) from e if not media.source.download_media: log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' f'the source {media.source} has since been marked to not download, ' f'not downloading') return + if media.skip or media.manual_skip: + # Media was toggled to be skipped after the task was scheduled + log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' + f'it is now marked to be skipped, not downloading') + return + # metadata is required to generate the proper filepath + if not media.has_metadata: + raise NoMetadataException('Metadata is not yet available.') + downloaded_file_exists = ( + media.downloaded and + media.has_metadata and + ( + media.media_file_exists or + media.filepath.exists() + ) + ) + if downloaded_file_exists: + # Media has been marked as downloaded before the download_media task was fired, + # skip it + log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' + f'it has already been marked as downloaded, not downloading again') + return max_cap_age = media.source.download_cap_date published = media.published if max_cap_age and published: @@ -608,16 +619,7 @@ def download_media(media_id): log.warn(f'A permissions problem occured when writing the new media NFO file: {e.msg}') pass # Schedule a task to update media servers - for mediaserver in MediaServer.objects.all(): - log.info(f'Scheduling media server updates') - verbose_name = _('Request media server rescan for "{}"') - rescan_media_server( - str(mediaserver.pk), - queue=str(media.source.pk), - priority=0, - verbose_name=verbose_name.format(mediaserver), - remove_existing_tasks=True - ) + schedule_media_servers_update() else: # Expected file doesn't exist on disk err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, ' @@ -630,22 +632,22 @@ def download_media(media_id): raise DownloadFailedException(err) -@background(schedule=300, remove_existing_tasks=True) +@background(schedule=dict(run_at=300), remove_existing_tasks=True) def rescan_media_server(mediaserver_id): ''' Attempts to request a media rescan on a remote media server. ''' try: mediaserver = MediaServer.objects.get(pk=mediaserver_id) - except MediaServer.DoesNotExist: + except MediaServer.DoesNotExist as e: # Task triggered but the media server no longer exists, do nothing - return + raise InvalidTaskError(_('no such server')) from e # Request an rescan / update log.info(f'Updating media server: {mediaserver}') mediaserver.update() -@background(schedule=300, remove_existing_tasks=True) +@background(schedule=dict(run_at=300), remove_existing_tasks=True) def save_all_media_for_source(source_id): ''' Iterates all media items linked to a source and saves them to @@ -655,11 +657,11 @@ def save_all_media_for_source(source_id): ''' try: source = Source.objects.get(pk=source_id) - except Source.DoesNotExist: + except Source.DoesNotExist as e: # Task triggered but the source no longer exists, do nothing log.error(f'Task save_all_media_for_source(pk={source_id}) called but no ' f'source exists with ID: {source_id}') - return + raise InvalidTaskError(_('no such source')) from e already_saved = set() mqs = Media.objects.filter(source=source) @@ -694,41 +696,41 @@ def save_all_media_for_source(source_id): # flags may need to be recalculated tvn_format = '2/{:,}' + f'/{mqs.count():,}' for mn, media in enumerate(mqs, start=1): - update_task_status(task, tvn_format.format(mn)) if media.uuid not in already_saved: + update_task_status(task, tvn_format.format(mn)) with atomic(): media.save() # Reset task.verbose_name to the saved value update_task_status(task, None) -@background(schedule=60, remove_existing_tasks=True) +@background(schedule=dict(run_at=60), remove_existing_tasks=True) def rename_media(media_id): try: media = Media.objects.defer('metadata', 'thumb').get(pk=media_id) - except Media.DoesNotExist: - return + except Media.DoesNotExist as e: + raise InvalidTaskError(_('no such media')) from e media.rename_files() -@background(schedule=300, remove_existing_tasks=True) +@background(schedule=dict(run_at=300), remove_existing_tasks=True) @atomic(durable=True) def rename_all_media_for_source(source_id): try: source = Source.objects.get(pk=source_id) - except Source.DoesNotExist: + except Source.DoesNotExist as e: # Task triggered but the source no longer exists, do nothing log.error(f'Task rename_all_media_for_source(pk={source_id}) called but no ' f'source exists with ID: {source_id}') - return + raise InvalidTaskError(_('no such source')) from e # Check that the settings allow renaming - rename_sources_setting = settings.RENAME_SOURCES or list() + rename_sources_setting = getattr(settings, 'RENAME_SOURCES', list()) create_rename_tasks = ( ( source.directory and source.directory in rename_sources_setting ) or - settings.RENAME_ALL_SOURCES + getattr(settings, 'RENAME_ALL_SOURCES', False) ) if not create_rename_tasks: return @@ -744,15 +746,15 @@ def rename_all_media_for_source(source_id): media.rename_files() -@background(schedule=60, remove_existing_tasks=True) +@background(schedule=dict(run_at=60), remove_existing_tasks=True) def wait_for_media_premiere(media_id): hours = lambda td: 1+int((24*td.days)+(td.seconds/(60*60))) try: media = Media.objects.get(pk=media_id) - except Media.DoesNotExist: - return - if media.metadata: + except Media.DoesNotExist as e: + raise InvalidTaskError(_('no such media')) from e + if media.has_metadata: return now = timezone.now() if media.published < now: @@ -764,17 +766,20 @@ def wait_for_media_premiere(media_id): media.manual_skip = True media.title = _(f'Premieres in {hours(media.published - now)} hours') media.save() + task = get_media_premiere_task(media_id) + if task: + update_task_status(task, f'available in {hours(media.published - now)} hours') -@background(schedule=300, remove_existing_tasks=False) +@background(schedule=dict(run_at=300), remove_existing_tasks=False) def delete_all_media_for_source(source_id, source_name): source = None try: source = Source.objects.get(pk=source_id) - except Source.DoesNotExist: + except Source.DoesNotExist as e: # Task triggered but the source no longer exists, do nothing log.error(f'Task delete_all_media_for_source(pk={source_id}) called but no ' f'source exists with ID: {source_id}') - pass + raise InvalidTaskError(_('no such source')) from e mqs = Media.objects.all().defer( 'metadata', ).filter( From 6293625a019f9fb80cd1d9826f51f1d89e68e322 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 3 Apr 2025 15:19:08 -0400 Subject: [PATCH 370/417] Clean up old `debconf` cache files --- Dockerfile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 82bc665e..d3169884 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,7 +47,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va locale-gen en_US.UTF-8 && \ # Clean up apt-get -y autopurge && \ - apt-get -y autoclean + apt-get -y autoclean && \ + rm -f /var/cache/debconf/*.dat-old FROM alpine:${ALPINE_VERSION} AS ffmpeg-download ARG FFMPEG_DATE @@ -289,7 +290,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va useradd -M -d /app -s /bin/false -g app app && \ # Clean up apt-get -y autopurge && \ - apt-get -y autoclean + apt-get -y autoclean && \ + rm -v -f /var/cache/debconf/*.dat-old # Install third party software COPY --from=s6-overlay / / @@ -310,7 +312,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va apt-get -y autoremove --purge file && \ # Clean up apt-get -y autopurge && \ - apt-get -y autoclean + apt-get -y autoclean && \ + rm -v -f /var/cache/debconf/*.dat-old # Switch workdir to the the app WORKDIR /app @@ -362,6 +365,7 @@ RUN --mount=type=tmpfs,target=/cache \ && \ apt-get -y autopurge && \ apt-get -y autoclean && \ + rm -v -f /var/cache/debconf/*.dat-old && \ rm -v -rf /tmp/* # Copy root From ec41580fe9ab69dbbe5958b2550ba840bb01547b Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 4 Apr 2025 16:33:41 -0400 Subject: [PATCH 371/417] Close the `ThreadPool` before exiting --- .../management/commands/process_tasks.py | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 patches/background_task/management/commands/process_tasks.py diff --git a/patches/background_task/management/commands/process_tasks.py b/patches/background_task/management/commands/process_tasks.py new file mode 100644 index 00000000..9484c393 --- /dev/null +++ b/patches/background_task/management/commands/process_tasks.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +import logging +import random +import sys +import time + +from django import VERSION +from django.core.management.base import BaseCommand +from django.utils import autoreload + +from background_task.tasks import tasks, autodiscover +from background_task.utils import SignalManager +from django.db import close_old_connections as close_connection + + +logger = logging.getLogger(__name__) + + +def _configure_log_std(): + class StdOutWrapper(object): + def write(self, s): + logger.info(s) + + class StdErrWrapper(object): + def write(self, s): + logger.error(s) + sys.stdout = StdOutWrapper() + sys.stderr = StdErrWrapper() + + +class Command(BaseCommand): + help = 'Run tasks that are scheduled to run on the queue' + + # Command options are specified in an abstract way to enable Django < 1.8 compatibility + OPTIONS = ( + (('--duration', ), { + 'action': 'store', + 'dest': 'duration', + 'type': int, + 'default': 0, + 'help': 'Run task for this many seconds (0 or less to run forever) - default is 0', + }), + (('--sleep', ), { + 'action': 'store', + 'dest': 'sleep', + 'type': float, + 'default': 5.0, + 'help': 'Sleep for this many seconds before checking for new tasks (if none were found) - default is 5', + }), + (('--queue', ), { + 'action': 'store', + 'dest': 'queue', + 'help': 'Only process tasks on this named queue', + }), + (('--log-std', ), { + 'action': 'store_true', + 'dest': 'log_std', + 'help': 'Redirect stdout and stderr to the logging system', + }), + (('--dev', ), { + 'action': 'store_true', + 'dest': 'dev', + 'help': 'Auto-reload your code on changes. Use this only for development', + }), + ) + + if VERSION < (1, 8): + from optparse import make_option + option_list = BaseCommand.option_list + tuple([make_option(*args, **kwargs) for args, kwargs in OPTIONS]) + + # Used in Django >= 1.8 + def add_arguments(self, parser): + for (args, kwargs) in self.OPTIONS: + parser.add_argument(*args, **kwargs) + + def __init__(self, *args, **kwargs): + super(Command, self).__init__(*args, **kwargs) + self.sig_manager = None + self._tasks = tasks + + def run(self, *args, **options): + duration = options.get('duration', 0) + sleep = options.get('sleep', 5.0) + queue = options.get('queue', None) + log_std = options.get('log_std', False) + is_dev = options.get('dev', False) + sig_manager = self.sig_manager + + if is_dev: + # raise last Exception is exist + autoreload.raise_last_exception() + + if log_std: + _configure_log_std() + + autodiscover() + + start_time = time.time() + + while (duration <= 0) or (time.time() - start_time) <= duration: + if sig_manager.kill_now: + # shutting down gracefully + break + + if not self._tasks.run_next_task(queue): + # there were no tasks in the queue, let's recover. + close_connection() + logger.debug('waiting for tasks') + time.sleep(sleep) + else: + # there were some tasks to process, let's check if there is more work to do after a little break. + time.sleep(random.uniform(sig_manager.time_to_wait[0], sig_manager.time_to_wait[1])) + self._tasks._pool_runner._pool.close() + + def handle(self, *args, **options): + is_dev = options.get('dev', False) + self.sig_manager = SignalManager() + if is_dev: + reload_func = autoreload.run_with_reloader + if VERSION < (2, 2): + reload_func = autoreload.main + reload_func(self.run, *args, **options) + else: + self.run(*args, **options) From ddf985ff7d3d193fa7a9205308e76a68354e29a3 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 13:47:16 -0400 Subject: [PATCH 372/417] Use `schedule_media_servers_update` function --- tubesync/sync/management/commands/delete-source.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tubesync/sync/management/commands/delete-source.py b/tubesync/sync/management/commands/delete-source.py index 206aee7f..5ab8a325 100644 --- a/tubesync/sync/management/commands/delete-source.py +++ b/tubesync/sync/management/commands/delete-source.py @@ -6,7 +6,7 @@ from django.db.models import signals from common.logger import log from sync.models import Source, Media, MediaServer from sync.signals import media_post_delete -from sync.tasks import rescan_media_server +from sync.tasks import schedule_media_servers_update class Command(BaseCommand): @@ -37,15 +37,6 @@ class Command(BaseCommand): log.info(f'Source directory: {source.directory_path}') source.delete() # Update any media servers - for mediaserver in MediaServer.objects.all(): - log.info(f'Scheduling media server updates') - verbose_name = _('Request media server rescan for "{}"') - rescan_media_server( - str(mediaserver.pk), - priority=0, - schedule=30, - verbose_name=verbose_name.format(mediaserver), - remove_existing_tasks=True - ) + schedule_media_servers_update() # All done log.info('Done') From e0bbb5951b25cd5c8fdfead7a0a9423bba7c47af Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 14:28:34 -0400 Subject: [PATCH 373/417] Remove `priority` kwarg --- tubesync/sync/management/commands/reset-tasks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/management/commands/reset-tasks.py b/tubesync/sync/management/commands/reset-tasks.py index 7d78c09f..3d5ecb98 100644 --- a/tubesync/sync/management/commands/reset-tasks.py +++ b/tubesync/sync/management/commands/reset-tasks.py @@ -1,4 +1,5 @@ from django.core.management.base import BaseCommand, CommandError +from django.db.transaction import atomic from django.utils.translation import gettext_lazy as _ from background_task.models import Task from sync.models import Source @@ -12,6 +13,7 @@ class Command(BaseCommand): help = 'Resets all tasks' + @atomic(durable=True) def handle(self, *args, **options): log.info('Resettings all tasks...') # Delete all tasks @@ -23,9 +25,8 @@ class Command(BaseCommand): verbose_name = _('Index media from source "{}"') index_source_task( str(source.pk), - repeat=source.index_schedule, queue=str(source.pk), - priority=10, + repeat=source.index_schedule, verbose_name=verbose_name.format(source.name) ) # This also chains down to call each Media objects .save() as well From 07390a32a813ae93190598c354d12c4e289aa893 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 14:33:12 -0400 Subject: [PATCH 374/417] Remove `priority` kwarg --- tubesync/sync/views.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 3d1896d2..bfdcbf6f 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -123,8 +123,6 @@ class SourcesView(ListView): str(sobj.pk), queue=str(sobj.pk), repeat=0, - priority=10, - schedule=30, remove_existing_tasks=False, verbose_name=verbose_name.format(sobj.name)) url = reverse_lazy('sync:sources') @@ -932,9 +930,8 @@ class ResetTasks(FormView): verbose_name = _('Index media from source "{}"') index_source_task( str(source.pk), - repeat=source.index_schedule, queue=str(source.pk), - priority=10, + repeat=source.index_schedule, verbose_name=verbose_name.format(source.name) ) # This also chains down to call each Media objects .save() as well From 4c087269062c66f308d9e9cc5b576fc7b288d026 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 14:42:21 -0400 Subject: [PATCH 375/417] priority: 10: index_source_task --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 8e35f7ac..fb7a0892 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -229,7 +229,7 @@ def cleanup_removed_media(source, videos): schedule_media_servers_update() -@background(schedule=dict(run_at=300), remove_existing_tasks=True) +@background(schedule=dict(priority=10, run_at=30), remove_existing_tasks=True) def index_source_task(source_id): ''' Indexes media available from a Source object. From 017966160d74304130f4a1de7baef8b4b9c7ba25 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 15:55:46 -0400 Subject: [PATCH 376/417] Remove `priority` kwarg --- tubesync/sync/signals.py | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 165a1be7..90b39480 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -92,12 +92,10 @@ def source_pre_save(sender, instance, **kwargs): verbose_name = _('Index media from source "{}"') index_source_task( str(instance.pk), - schedule=instance.index_schedule, - repeat=instance.index_schedule, queue=str(instance.pk), - priority=10, + repeat=instance.index_schedule, + schedule=instance.index_schedule, verbose_name=verbose_name.format(instance.name), - remove_existing_tasks=True ) @@ -108,14 +106,14 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name = _('Check download directory exists for source "{}"') check_source_directory_exists( str(instance.pk), - priority=0, - verbose_name=verbose_name.format(instance.name) + queue=str(instance.pk), + verbose_name=verbose_name.format(instance.name), ) if instance.source_type != Val(YouTube_SourceType.PLAYLIST) and instance.copy_channel_images: download_source_images( str(instance.pk), - priority=5, - verbose_name=verbose_name.format(instance.name) + queue=str(instance.pk), + verbose_name=verbose_name.format(instance.name), ) if instance.index_schedule > 0: delete_task_by_source('sync.tasks.index_source_task', instance.pk) @@ -123,20 +121,17 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name = _('Index media from source "{}"') index_source_task( str(instance.pk), - schedule=600, - repeat=instance.index_schedule, queue=str(instance.pk), - priority=10, + repeat=instance.index_schedule, + schedule=600, verbose_name=verbose_name.format(instance.name), - remove_existing_tasks=True ) verbose_name = _('Checking all media for source "{}"') save_all_media_for_source( str(instance.pk), - priority=25, + queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), - remove_existing_tasks=True ) @@ -157,7 +152,6 @@ def source_pre_delete(sender, instance, **kwargs): delete_all_media_for_source( str(instance.pk), str(instance.name), - priority=1, verbose_name=verbose_name.format(instance.name), ) # Try to do it all immediately @@ -245,9 +239,7 @@ def media_post_save(sender, instance, created, **kwargs): rename_media( str(media.pk), queue=str(media.pk), - priority=20, verbose_name=verbose_name.format(media.key, media.name), - remove_existing_tasks=True ) # If the media is missing metadata schedule it to be downloaded @@ -256,9 +248,8 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name = _('Downloading metadata for "{}"') download_media_metadata( str(instance.pk), - priority=20, + queue=str(media.pk), verbose_name=verbose_name.format(instance.pk), - remove_existing_tasks=True ) # If the media is missing a thumbnail schedule it to be downloaded (unless we are skipping this media) if not instance.thumb_file_exists: @@ -272,10 +263,8 @@ def media_post_save(sender, instance, created, **kwargs): download_media_thumbnail( str(instance.pk), thumbnail_url, - queue=str(instance.source.pk), - priority=15, + queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), - remove_existing_tasks=True ) # If the media has not yet been downloaded schedule it to be downloaded if not (instance.media_file_exists or instance.filepath.exists() or existing_media_download_task): @@ -289,10 +278,8 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name = _('Downloading media for "{}"') download_media( str(instance.pk), - queue=str(instance.source.pk), - priority=15, + queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), - remove_existing_tasks=True ) # Save the instance if any changes were required if skip_changed or can_download_changed: From 82688a8475e9c8fa02feb6fd827cec89137d314d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 16:11:08 -0400 Subject: [PATCH 377/417] Restore `schedule` kwarg --- tubesync/sync/views.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index bfdcbf6f..5e937e5e 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -118,13 +118,16 @@ class SourcesView(ListView): if sobj is None: return HttpResponseNotFound() + source = sobj verbose_name = _('Index media from source "{}" once') index_source_task( - str(sobj.pk), - queue=str(sobj.pk), - repeat=0, + str(source.pk), + queue=str(source.pk), remove_existing_tasks=False, - verbose_name=verbose_name.format(sobj.name)) + repeat=0, + schedule=30, + verbose_name=verbose_name.format(source.name), + ) url = reverse_lazy('sync:sources') url = append_uri_params(url, {'message': 'source-refreshed'}) return HttpResponseRedirect(url) From 52579865b2e9f01d06b9c4cece2d72da31ab0b8d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 16:27:18 -0400 Subject: [PATCH 378/417] priority: 01: delete_all_media_for_source --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index fb7a0892..d335eb31 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -770,7 +770,7 @@ def wait_for_media_premiere(media_id): if task: update_task_status(task, f'available in {hours(media.published - now)} hours') -@background(schedule=dict(run_at=300), remove_existing_tasks=False) +@background(schedule=dict(priority=1, run_at=300), remove_existing_tasks=False) def delete_all_media_for_source(source_id, source_name): source = None try: From 61623b66abeb4f7c4de4594578468a70cf8834cb Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 16:33:31 -0400 Subject: [PATCH 379/417] priority: 05: download_source_images --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d335eb31..7aaa8a8e 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -335,7 +335,7 @@ def check_source_directory_exists(source_id): source.make_directory() -@background(schedule=dict(run_at=0)) +@background(schedule=dict(priority=5, run_at=0)) def download_source_images(source_id): ''' Downloads an image and save it as a local thumbnail attached to a From 11baadc6efac3b75e32db9e6dbb66d9e0f39a0ff Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 16:37:09 -0400 Subject: [PATCH 380/417] priority: 25: save_all_media_for_source --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 7aaa8a8e..f7986f70 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -647,7 +647,7 @@ def rescan_media_server(mediaserver_id): mediaserver.update() -@background(schedule=dict(run_at=300), remove_existing_tasks=True) +@background(schedule=dict(priority=25, run_at=300), remove_existing_tasks=True) def save_all_media_for_source(source_id): ''' Iterates all media items linked to a source and saves them to From 1af9070d1c0ded0217ab5e1aea9890dcbdeaa2dd Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 16:56:24 -0400 Subject: [PATCH 381/417] priority: 20: rename_media --- tubesync/sync/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index f7986f70..897257ba 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -704,7 +704,7 @@ def save_all_media_for_source(source_id): update_task_status(task, None) -@background(schedule=dict(run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=60), remove_existing_tasks=True) def rename_media(media_id): try: media = Media.objects.defer('metadata', 'thumb').get(pk=media_id) @@ -713,7 +713,7 @@ def rename_media(media_id): media.rename_files() -@background(schedule=dict(run_at=300), remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=300), remove_existing_tasks=True) @atomic(durable=True) def rename_all_media_for_source(source_id): try: From c8a9037fb20a20ec15bbfcbf03e788c320f2c494 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 17:15:09 -0400 Subject: [PATCH 382/417] priority: 20: download_media_metadata --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 897257ba..3675421a 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -385,7 +385,7 @@ def download_source_images(source_id): log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') -@background(schedule=dict(run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=60), remove_existing_tasks=True) def download_media_metadata(media_id): ''' Downloads the metadata for a media item. From d769f39a86be7412a19c962380a208ce909c0a79 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 17:20:27 -0400 Subject: [PATCH 383/417] priority: 15: download_media_thumbnail --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3675421a..b2e15aee 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -472,7 +472,7 @@ def download_media_metadata(media_id): f'{source} / {media}: {media_id}') -@background(schedule=dict(run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=15, run_at=60), remove_existing_tasks=True) def download_media_thumbnail(media_id, url): ''' Downloads an image from a URL and save it as a local thumbnail attached to a From 8e3523ae9c7fa437c8f71dd64065a8d742dea64d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 17:23:56 -0400 Subject: [PATCH 384/417] priority: 15: download_media --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index b2e15aee..a172d098 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -510,7 +510,7 @@ def download_media_thumbnail(media_id, url): return True -@background(schedule=dict(run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=15, run_at=60), remove_existing_tasks=True) def download_media(media_id): ''' Downloads the media to disk and attaches it to the Media instance. From 529f3cbbd0591b95cf06eadbd58cbef697968088 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 17:40:10 -0400 Subject: [PATCH 385/417] Add priority kwarg for default value --- tubesync/sync/tasks.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a172d098..b539a2f9 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -316,7 +316,7 @@ def index_source_task(source_id): cleanup_removed_media(source, videos) -@background(schedule=dict(run_at=0)) +@background(schedule=dict(priority=0, run_at=0)) def check_source_directory_exists(source_id): ''' Checks the output directory for a source exists and is writable, if it does @@ -335,7 +335,7 @@ def check_source_directory_exists(source_id): source.make_directory() -@background(schedule=dict(priority=5, run_at=0)) +@background(schedule=dict(priority=5, run_at=10)) def download_source_images(source_id): ''' Downloads an image and save it as a local thumbnail attached to a @@ -472,7 +472,7 @@ def download_media_metadata(media_id): f'{source} / {media}: {media_id}') -@background(schedule=dict(priority=15, run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=15, run_at=10), remove_existing_tasks=True) def download_media_thumbnail(media_id, url): ''' Downloads an image from a URL and save it as a local thumbnail attached to a @@ -632,7 +632,7 @@ def download_media(media_id): raise DownloadFailedException(err) -@background(schedule=dict(run_at=300), remove_existing_tasks=True) +@background(schedule=dict(priority=0, run_at=30), remove_existing_tasks=True) def rescan_media_server(mediaserver_id): ''' Attempts to request a media rescan on a remote media server. @@ -647,7 +647,7 @@ def rescan_media_server(mediaserver_id): mediaserver.update() -@background(schedule=dict(priority=25, run_at=300), remove_existing_tasks=True) +@background(schedule=dict(priority=25, run_at=600), remove_existing_tasks=True) def save_all_media_for_source(source_id): ''' Iterates all media items linked to a source and saves them to @@ -746,7 +746,7 @@ def rename_all_media_for_source(source_id): media.rename_files() -@background(schedule=dict(run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=0, run_at=60), remove_existing_tasks=True) def wait_for_media_premiere(media_id): hours = lambda td: 1+int((24*td.days)+(td.seconds/(60*60))) From 7ac5f2c148c2b8ea184b35b3f14f0d9732712683 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 17:53:58 -0400 Subject: [PATCH 386/417] Update for adjusted queue --- tubesync/sync/tests.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index b05c3991..c16c4954 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -420,8 +420,7 @@ class FrontEndTestCase(TestCase): found_download_task1 = False found_download_task2 = False found_download_task3 = False - q = {'queue': str(test_source.pk), - 'task_name': 'sync.tasks.download_media_thumbnail'} + q = {'task_name': 'sync.tasks.download_media_thumbnail'} for task in Task.objects.filter(**q): if test_media1_pk in task.task_params: found_thumbnail_task1 = True @@ -429,8 +428,7 @@ class FrontEndTestCase(TestCase): found_thumbnail_task2 = True if test_media3_pk in task.task_params: found_thumbnail_task3 = True - q = {'queue': str(test_source.pk), - 'task_name': 'sync.tasks.download_media'} + q = {'task_name': 'sync.tasks.download_media'} for task in Task.objects.filter(**q): if test_media1_pk in task.task_params: found_download_task1 = True From 0b92ae0500aa1675c7ff26f606faa7783d6f8ecc Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 18:42:07 -0400 Subject: [PATCH 387/417] Assign task queues based on resources used --- tubesync/sync/tasks.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index b539a2f9..aec69216 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -229,7 +229,7 @@ def cleanup_removed_media(source, videos): schedule_media_servers_update() -@background(schedule=dict(priority=10, run_at=30), remove_existing_tasks=True) +@background(schedule=dict(priority=10, run_at=30), queue='network', remove_existing_tasks=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -316,7 +316,7 @@ def index_source_task(source_id): cleanup_removed_media(source, videos) -@background(schedule=dict(priority=0, run_at=0)) +@background(schedule=dict(priority=0, run_at=0), queue='filesystem') def check_source_directory_exists(source_id): ''' Checks the output directory for a source exists and is writable, if it does @@ -335,7 +335,7 @@ def check_source_directory_exists(source_id): source.make_directory() -@background(schedule=dict(priority=5, run_at=10)) +@background(schedule=dict(priority=5, run_at=10), queue='network') def download_source_images(source_id): ''' Downloads an image and save it as a local thumbnail attached to a @@ -385,7 +385,7 @@ def download_source_images(source_id): log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') -@background(schedule=dict(priority=20, run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=60), queue='network', remove_existing_tasks=True) def download_media_metadata(media_id): ''' Downloads the metadata for a media item. @@ -472,7 +472,7 @@ def download_media_metadata(media_id): f'{source} / {media}: {media_id}') -@background(schedule=dict(priority=15, run_at=10), remove_existing_tasks=True) +@background(schedule=dict(priority=15, run_at=10), queue='network', remove_existing_tasks=True) def download_media_thumbnail(media_id, url): ''' Downloads an image from a URL and save it as a local thumbnail attached to a @@ -510,7 +510,7 @@ def download_media_thumbnail(media_id, url): return True -@background(schedule=dict(priority=15, run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=15, run_at=60), queue='network', remove_existing_tasks=True) def download_media(media_id): ''' Downloads the media to disk and attaches it to the Media instance. @@ -632,7 +632,7 @@ def download_media(media_id): raise DownloadFailedException(err) -@background(schedule=dict(priority=0, run_at=30), remove_existing_tasks=True) +@background(schedule=dict(priority=0, run_at=30), queue='network', remove_existing_tasks=True) def rescan_media_server(mediaserver_id): ''' Attempts to request a media rescan on a remote media server. @@ -647,7 +647,7 @@ def rescan_media_server(mediaserver_id): mediaserver.update() -@background(schedule=dict(priority=25, run_at=600), remove_existing_tasks=True) +@background(schedule=dict(priority=25, run_at=600), queue='network', remove_existing_tasks=True) def save_all_media_for_source(source_id): ''' Iterates all media items linked to a source and saves them to @@ -704,7 +704,7 @@ def save_all_media_for_source(source_id): update_task_status(task, None) -@background(schedule=dict(priority=20, run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=60), queue='filesystem', remove_existing_tasks=True) def rename_media(media_id): try: media = Media.objects.defer('metadata', 'thumb').get(pk=media_id) @@ -713,7 +713,7 @@ def rename_media(media_id): media.rename_files() -@background(schedule=dict(priority=20, run_at=300), remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=300), queue='filesystem', remove_existing_tasks=True) @atomic(durable=True) def rename_all_media_for_source(source_id): try: @@ -746,7 +746,7 @@ def rename_all_media_for_source(source_id): media.rename_files() -@background(schedule=dict(priority=0, run_at=60), remove_existing_tasks=True) +@background(schedule=dict(priority=0, run_at=60), queue='database', remove_existing_tasks=True) def wait_for_media_premiere(media_id): hours = lambda td: 1+int((24*td.days)+(td.seconds/(60*60))) @@ -770,7 +770,7 @@ def wait_for_media_premiere(media_id): if task: update_task_status(task, f'available in {hours(media.published - now)} hours') -@background(schedule=dict(priority=1, run_at=300), remove_existing_tasks=False) +@background(schedule=dict(priority=1, run_at=300), queue='filesystem', remove_existing_tasks=False) def delete_all_media_for_source(source_id, source_name): source = None try: From a2426a14533a9521569be07e683809f81032476f Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 19:11:46 -0400 Subject: [PATCH 388/417] Keep `sleep_interval` under half of the maximum --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 1ebc6ec6..bedd34d5 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -316,7 +316,7 @@ def download_media( 'check_formats': None, 'overwrites': None, 'skip_unavailable_fragments': False, - 'sleep_interval': max(10, int(settings.DOWNLOAD_MEDIA_DELAY / 20)), + 'sleep_interval': min(10*60, max(10, int(settings.DOWNLOAD_MEDIA_DELAY / 20))), 'max_sleep_interval': min(20*60, max(60, settings.DOWNLOAD_MEDIA_DELAY)), 'sleep_interval_requests': 1 + (2 * settings.BACKGROUND_TASK_ASYNC_THREADS), 'paths': opts.get('paths', dict()), From f316c3e81a728e5b983f579aedc99afa12ec3d61 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 19:28:05 -0400 Subject: [PATCH 389/417] Fix typo --- tubesync/sync/matching.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/matching.py b/tubesync/sync/matching.py index 9390e6fa..ffb86416 100644 --- a/tubesync/sync/matching.py +++ b/tubesync/sync/matching.py @@ -236,7 +236,7 @@ def get_best_video_format(media): break if not best_match: for fmt in video_formats: - # Check for codec and resolution match bot drop 60fps + # Check for codec and resolution match but drop 60fps if (source_resolution == fmt['format'] and source_vcodec == fmt['vcodec'] and not fmt['is_hdr']): @@ -294,7 +294,7 @@ def get_best_video_format(media): break if not best_match: for fmt in video_formats: - # Check for codec and resolution match bot drop hdr + # Check for codec and resolution match but drop hdr if (source_resolution == fmt['format'] and source_vcodec == fmt['vcodec'] and not fmt['is_60fps']): From 73195fa79b0c873a5b798b9e03c95d86cda634ec Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 19:41:49 -0400 Subject: [PATCH 390/417] Do not use a thread pool for workers --- tubesync/tubesync/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index c44c888f..0ac2b462 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -136,7 +136,7 @@ HEALTHCHECK_ALLOWED_IPS = ('127.0.0.1',) MAX_ATTEMPTS = 15 # Number of times tasks will be retried MAX_RUN_TIME = 1*(24*60*60) # Maximum amount of time in seconds a task can run -BACKGROUND_TASK_RUN_ASYNC = True # Run tasks async in the background +BACKGROUND_TASK_RUN_ASYNC = False # Run tasks async in the background BACKGROUND_TASK_ASYNC_THREADS = 1 # Number of async tasks to run at once MAX_BACKGROUND_TASK_ASYNC_THREADS = 8 # For sanity reasons BACKGROUND_TASK_PRIORITY_ORDERING = 'ASC' # Use 'niceness' task priority ordering From b33ff71678e1d4b64e88d9af2003bb6d1a555c8b Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 20:38:26 -0400 Subject: [PATCH 391/417] Added additional tubesync workers --- .../{tubesync-worker => tubesync-db-worker}/dependencies | 0 .../{tubesync-worker => tubesync-db-worker}/down-signal | 0 config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/run | 5 +++++ .../s6-rc.d/{tubesync-worker => tubesync-db-worker}/type | 0 .../etc/s6-overlay/s6-rc.d/tubesync-fs-worker/dependencies | 1 + .../etc/s6-overlay/s6-rc.d/tubesync-fs-worker/down-signal | 1 + config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/run | 5 +++++ config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/type | 1 + .../s6-overlay/s6-rc.d/tubesync-network-worker/dependencies | 1 + .../s6-overlay/s6-rc.d/tubesync-network-worker/down-signal | 1 + .../s6-rc.d/{tubesync-worker => tubesync-network-worker}/run | 0 .../root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/type | 1 + .../user/contents.d/{tubesync-worker => tubesync-db-worker} | 0 .../s6-overlay/s6-rc.d/user/contents.d/tubesync-fs-worker | 0 .../s6-rc.d/user/contents.d/tubesync-network-worker | 0 15 files changed, 16 insertions(+) rename config/root/etc/s6-overlay/s6-rc.d/{tubesync-worker => tubesync-db-worker}/dependencies (100%) rename config/root/etc/s6-overlay/s6-rc.d/{tubesync-worker => tubesync-db-worker}/down-signal (100%) create mode 100755 config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/run rename config/root/etc/s6-overlay/s6-rc.d/{tubesync-worker => tubesync-db-worker}/type (100%) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/dependencies create mode 100644 config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/down-signal create mode 100755 config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/run create mode 100644 config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/type create mode 100644 config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/dependencies create mode 100644 config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/down-signal rename config/root/etc/s6-overlay/s6-rc.d/{tubesync-worker => tubesync-network-worker}/run (100%) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/type rename config/root/etc/s6-overlay/s6-rc.d/user/contents.d/{tubesync-worker => tubesync-db-worker} (100%) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-fs-worker create mode 100644 config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-network-worker diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/dependencies b/config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/dependencies similarity index 100% rename from config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/dependencies rename to config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/dependencies diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal b/config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/down-signal similarity index 100% rename from config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/down-signal rename to config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/down-signal diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/run new file mode 100755 index 00000000..03b75ea8 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py process_tasks \ + --queue database diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/type b/config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/type similarity index 100% rename from config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/type rename to config/root/etc/s6-overlay/s6-rc.d/tubesync-db-worker/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/dependencies b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/dependencies new file mode 100644 index 00000000..283e1305 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/dependencies @@ -0,0 +1 @@ +gunicorn \ No newline at end of file diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/down-signal b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/down-signal new file mode 100644 index 00000000..d751378e --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/down-signal @@ -0,0 +1 @@ +SIGINT diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/run new file mode 100755 index 00000000..0642054d --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py process_tasks \ + --queue filesystem diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/type b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/type new file mode 100644 index 00000000..1780f9f4 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-fs-worker/type @@ -0,0 +1 @@ +longrun \ No newline at end of file diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/dependencies b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/dependencies new file mode 100644 index 00000000..283e1305 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/dependencies @@ -0,0 +1 @@ +gunicorn \ No newline at end of file diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/down-signal b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/down-signal new file mode 100644 index 00000000..d751378e --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/down-signal @@ -0,0 +1 @@ +SIGINT diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/run similarity index 100% rename from config/root/etc/s6-overlay/s6-rc.d/tubesync-worker/run rename to config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/type b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/type new file mode 100644 index 00000000..1780f9f4 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/type @@ -0,0 +1 @@ +longrun \ No newline at end of file diff --git a/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-worker b/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-db-worker similarity index 100% rename from config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-worker rename to config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-db-worker diff --git a/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-fs-worker b/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-fs-worker new file mode 100644 index 00000000..e69de29b diff --git a/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-network-worker b/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/tubesync-network-worker new file mode 100644 index 00000000..e69de29b From 6058a66df10e510c2722238211f1d474714a1fe6 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 20:44:23 -0400 Subject: [PATCH 392/417] Set executable bit on `full_playlist.sh` --- tubesync/full_playlist.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tubesync/full_playlist.sh diff --git a/tubesync/full_playlist.sh b/tubesync/full_playlist.sh old mode 100644 new mode 100755 From 4228d69023ac5eb389addca6ddd416f09be18074 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 20:48:57 -0400 Subject: [PATCH 393/417] Remove `queue` kwarg --- tubesync/sync/management/commands/reset-tasks.py | 1 - tubesync/sync/signals.py | 9 --------- tubesync/sync/views.py | 2 -- 3 files changed, 12 deletions(-) diff --git a/tubesync/sync/management/commands/reset-tasks.py b/tubesync/sync/management/commands/reset-tasks.py index 3d5ecb98..d7818007 100644 --- a/tubesync/sync/management/commands/reset-tasks.py +++ b/tubesync/sync/management/commands/reset-tasks.py @@ -25,7 +25,6 @@ class Command(BaseCommand): verbose_name = _('Index media from source "{}"') index_source_task( str(source.pk), - queue=str(source.pk), repeat=source.index_schedule, verbose_name=verbose_name.format(source.name) ) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 90b39480..6ee64747 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -92,7 +92,6 @@ def source_pre_save(sender, instance, **kwargs): verbose_name = _('Index media from source "{}"') index_source_task( str(instance.pk), - queue=str(instance.pk), repeat=instance.index_schedule, schedule=instance.index_schedule, verbose_name=verbose_name.format(instance.name), @@ -106,13 +105,11 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name = _('Check download directory exists for source "{}"') check_source_directory_exists( str(instance.pk), - queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), ) if instance.source_type != Val(YouTube_SourceType.PLAYLIST) and instance.copy_channel_images: download_source_images( str(instance.pk), - queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), ) if instance.index_schedule > 0: @@ -121,7 +118,6 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name = _('Index media from source "{}"') index_source_task( str(instance.pk), - queue=str(instance.pk), repeat=instance.index_schedule, schedule=600, verbose_name=verbose_name.format(instance.name), @@ -130,7 +126,6 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name = _('Checking all media for source "{}"') save_all_media_for_source( str(instance.pk), - queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), ) @@ -238,7 +233,6 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name = _('Renaming media for: {}: "{}"') rename_media( str(media.pk), - queue=str(media.pk), verbose_name=verbose_name.format(media.key, media.name), ) @@ -248,7 +242,6 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name = _('Downloading metadata for "{}"') download_media_metadata( str(instance.pk), - queue=str(media.pk), verbose_name=verbose_name.format(instance.pk), ) # If the media is missing a thumbnail schedule it to be downloaded (unless we are skipping this media) @@ -263,7 +256,6 @@ def media_post_save(sender, instance, created, **kwargs): download_media_thumbnail( str(instance.pk), thumbnail_url, - queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), ) # If the media has not yet been downloaded schedule it to be downloaded @@ -278,7 +270,6 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name = _('Downloading media for "{}"') download_media( str(instance.pk), - queue=str(instance.pk), verbose_name=verbose_name.format(instance.name), ) # Save the instance if any changes were required diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 5e937e5e..3f6eda84 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -122,7 +122,6 @@ class SourcesView(ListView): verbose_name = _('Index media from source "{}" once') index_source_task( str(source.pk), - queue=str(source.pk), remove_existing_tasks=False, repeat=0, schedule=30, @@ -933,7 +932,6 @@ class ResetTasks(FormView): verbose_name = _('Index media from source "{}"') index_source_task( str(source.pk), - queue=str(source.pk), repeat=source.index_schedule, verbose_name=verbose_name.format(source.name) ) From 2613d9392410f9ae475c70221be3ff52abee3b30 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 6 Apr 2025 20:50:11 -0400 Subject: [PATCH 394/417] Remove unneeded kwargs --- tubesync/sync/tasks.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index aec69216..651a02a8 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -434,12 +434,9 @@ def download_media_metadata(media_id): verbose_name = _('Waiting for the premiere of "{}" at: {}') wait_for_media_premiere( str(media.pk), - priority=0, - queue=str(media.pk), repeat=Task.HOURLY, repeat_until = published_datetime + timedelta(hours=1), verbose_name=verbose_name.format(media.key, published_datetime.isoformat(' ', 'seconds')), - remove_existing_tasks=True, ) raise_exception = False if raise_exception: From b97de08ffdfb66f9c8a07d359e34d68108ad300b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 00:19:23 -0400 Subject: [PATCH 395/417] Filter on `task_params` instead of `queue` --- tubesync/sync/tasks.py | 8 ++++++-- tubesync/sync/views.py | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 651a02a8..d9610ddb 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -133,7 +133,7 @@ def get_source_completed_tasks(source_id, only_errors=False): ''' Returns a queryset of CompletedTask objects for a source by source ID. ''' - q = {'queue': source_id} + q = {'task_params__istartswith': f'[["{source_id}"'} if only_errors: q['failed_at__isnull'] = False return CompletedTask.objects.filter(**q).order_by('-failed_at') @@ -167,7 +167,11 @@ def get_source_index_task(source_id): def delete_task_by_source(task_name, source_id): now = timezone.now() unlocked = Task.objects.unlocked(now) - return unlocked.filter(task_name=task_name, queue=str(source_id)).delete() + qs = unlocked.filter( + task_name=task_name, + task_params__istartswith=f'[["{source_id}"', + ) + return qs.delete() def delete_task_by_media(task_name, args): diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 3f6eda84..c9fee226 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -768,7 +768,8 @@ class TasksView(ListView): def get_queryset(self): qs = Task.objects.all() if self.filter_source: - qs = qs.filter(queue=str(self.filter_source.pk)) + params_prefix=f'[["{self.filter_source.pk}"' + qs = qs.filter(task_params__istartswith=params_prefix) order = getattr(settings, 'BACKGROUND_TASK_PRIORITY_ORDERING', 'DESC' @@ -896,7 +897,8 @@ class CompletedTasksView(ListView): def get_queryset(self): qs = CompletedTask.objects.all() if self.filter_source: - qs = qs.filter(queue=str(self.filter_source.pk)) + params_prefix=f'[["{self.filter_source.pk}"' + qs = qs.filter(task_params__istartswith=params_prefix) return qs.order_by('-run_at') def get_context_data(self, *args, **kwargs): From 8e52692ec89966af93d1be3199b196cf6469b32b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 00:22:55 -0400 Subject: [PATCH 396/417] Label the queue as it is no longer `Source.uuid` --- tubesync/sync/templates/sync/tasks-completed.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/templates/sync/tasks-completed.html b/tubesync/sync/templates/sync/tasks-completed.html index b87805be..52f576df 100644 --- a/tubesync/sync/templates/sync/tasks-completed.html +++ b/tubesync/sync/templates/sync/tasks-completed.html @@ -17,14 +17,14 @@ {% if task.has_error %} {{ task.verbose_name }}
      - Source: "{{ task.queue }}"
      + Queue: "{{ task.queue }}"
      Error: "{{ task.error_message }}"
      Task ran at {{ task.run_at|date:'Y-m-d H:i:s' }}
      {% else %} {{ task.verbose_name }}
      - Source: "{{ task.queue }}"
      + Queue: "{{ task.queue }}"
      Task ran at {{ task.run_at|date:'Y-m-d H:i:s' }}
      {% endif %} From 468242c626d1767629eb87a2d2db4fb1da861e87 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 00:29:14 -0400 Subject: [PATCH 397/417] Use the `TaskQueue` class --- tubesync/sync/choices.py | 6 ++++++ tubesync/sync/tasks.py | 25 +++++++++++++------------ tubesync/sync/tests.py | 4 ++-- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tubesync/sync/choices.py b/tubesync/sync/choices.py index c67de54b..25dd762a 100644 --- a/tubesync/sync/choices.py +++ b/tubesync/sync/choices.py @@ -160,6 +160,12 @@ class SponsorBlock_Category(models.TextChoices): MUSIC_OFFTOPIC = 'music_offtopic', _( 'Non-Music Section' ) +class TaskQueue(models.TextChoices): + DB = 'database', _('Database') + FS = 'filesystem', _('Filesystem') + NET = 'network', _('Networking') + + class YouTube_SourceType(models.TextChoices): CHANNEL = 'c', _('YouTube channel') CHANNEL_ID = 'i', _('YouTube channel by ID') diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d9610ddb..d937d690 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -26,6 +26,7 @@ from background_task.models import Task, CompletedTask from common.logger import log from common.errors import NoMediaException, NoMetadataException, DownloadFailedException from common.utils import json_serial, remove_enclosed +from .choices import Val, TaskQueue from .models import Source, Media, MediaServer from .utils import (get_remote_image, resize_image_to_height, delete_file, write_text_file, filter_response) @@ -233,7 +234,7 @@ def cleanup_removed_media(source, videos): schedule_media_servers_update() -@background(schedule=dict(priority=10, run_at=30), queue='network', remove_existing_tasks=True) +@background(schedule=dict(priority=10, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -320,7 +321,7 @@ def index_source_task(source_id): cleanup_removed_media(source, videos) -@background(schedule=dict(priority=0, run_at=0), queue='filesystem') +@background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.FS)) def check_source_directory_exists(source_id): ''' Checks the output directory for a source exists and is writable, if it does @@ -339,7 +340,7 @@ def check_source_directory_exists(source_id): source.make_directory() -@background(schedule=dict(priority=5, run_at=10), queue='network') +@background(schedule=dict(priority=5, run_at=10), Val(TaskQueue.NET)) def download_source_images(source_id): ''' Downloads an image and save it as a local thumbnail attached to a @@ -389,7 +390,7 @@ def download_source_images(source_id): log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') -@background(schedule=dict(priority=20, run_at=60), queue='network', remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=60), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def download_media_metadata(media_id): ''' Downloads the metadata for a media item. @@ -473,7 +474,7 @@ def download_media_metadata(media_id): f'{source} / {media}: {media_id}') -@background(schedule=dict(priority=15, run_at=10), queue='network', remove_existing_tasks=True) +@background(schedule=dict(priority=15, run_at=10), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def download_media_thumbnail(media_id, url): ''' Downloads an image from a URL and save it as a local thumbnail attached to a @@ -511,7 +512,7 @@ def download_media_thumbnail(media_id, url): return True -@background(schedule=dict(priority=15, run_at=60), queue='network', remove_existing_tasks=True) +@background(schedule=dict(priority=15, run_at=60), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def download_media(media_id): ''' Downloads the media to disk and attaches it to the Media instance. @@ -633,7 +634,7 @@ def download_media(media_id): raise DownloadFailedException(err) -@background(schedule=dict(priority=0, run_at=30), queue='network', remove_existing_tasks=True) +@background(schedule=dict(priority=0, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def rescan_media_server(mediaserver_id): ''' Attempts to request a media rescan on a remote media server. @@ -648,7 +649,7 @@ def rescan_media_server(mediaserver_id): mediaserver.update() -@background(schedule=dict(priority=25, run_at=600), queue='network', remove_existing_tasks=True) +@background(schedule=dict(priority=25, run_at=600), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def save_all_media_for_source(source_id): ''' Iterates all media items linked to a source and saves them to @@ -705,7 +706,7 @@ def save_all_media_for_source(source_id): update_task_status(task, None) -@background(schedule=dict(priority=20, run_at=60), queue='filesystem', remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=60), queue=Val(TaskQueue.FS), remove_existing_tasks=True) def rename_media(media_id): try: media = Media.objects.defer('metadata', 'thumb').get(pk=media_id) @@ -714,7 +715,7 @@ def rename_media(media_id): media.rename_files() -@background(schedule=dict(priority=20, run_at=300), queue='filesystem', remove_existing_tasks=True) +@background(schedule=dict(priority=20, run_at=300), queue=Val(TaskQueue.FS), remove_existing_tasks=True) @atomic(durable=True) def rename_all_media_for_source(source_id): try: @@ -747,7 +748,7 @@ def rename_all_media_for_source(source_id): media.rename_files() -@background(schedule=dict(priority=0, run_at=60), queue='database', remove_existing_tasks=True) +@background(schedule=dict(priority=0, run_at=60), queue=Val(TaskQueue.DB), remove_existing_tasks=True) def wait_for_media_premiere(media_id): hours = lambda td: 1+int((24*td.days)+(td.seconds/(60*60))) @@ -771,7 +772,7 @@ def wait_for_media_premiere(media_id): if task: update_task_status(task, f'available in {hours(media.published - now)} hours') -@background(schedule=dict(priority=1, run_at=300), queue='filesystem', remove_existing_tasks=False) +@background(schedule=dict(priority=1, run_at=300), queue=Val(TaskQueue.FS), remove_existing_tasks=False) def delete_all_media_for_source(source_id, source_name): source = None try: diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index c16c4954..303aa18a 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -20,7 +20,7 @@ from .tasks import cleanup_old_media, check_source_directory_exists from .filtering import filter_media from .utils import filter_response from .choices import (Val, Fallback, IndexSchedule, SourceResolution, - YouTube_AudioCodec, YouTube_VideoCodec, + TaskQueue, YouTube_AudioCodec, YouTube_VideoCodec, YouTube_SourceType, youtube_long_source_types) @@ -211,7 +211,7 @@ class FrontEndTestCase(TestCase): source_uuid = str(source.pk) task = Task.objects.get_task('sync.tasks.index_source_task', args=(source_uuid,))[0] - self.assertEqual(task.queue, source_uuid) + self.assertEqual(task.queue, Val(TaskQueue.NET)) # Run the check_source_directory_exists task check_source_directory_exists.now(source_uuid) # Check the source is now on the source overview page From 10aa455ba9679f4e9d7ac0bb7549c16f0c8001ef Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 00:38:09 -0400 Subject: [PATCH 398/417] Limit the worker to a single queue --- config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/run | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/run index b2c3a841..a9c17d49 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-network-worker/run @@ -1,4 +1,5 @@ #!/command/with-contenv bash exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ - /usr/bin/python3 /app/manage.py process_tasks + /usr/bin/python3 /app/manage.py process_tasks \ + --queue network From e2b99de843d5bdca12c8a71ae4fb9b05f53b237c Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 01:26:52 -0400 Subject: [PATCH 399/417] fixup: restore the missing keyword --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d937d690..7c10c038 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -340,7 +340,7 @@ def check_source_directory_exists(source_id): source.make_directory() -@background(schedule=dict(priority=5, run_at=10), Val(TaskQueue.NET)) +@background(schedule=dict(priority=5, run_at=10), queue=Val(TaskQueue.NET)) def download_source_images(source_id): ''' Downloads an image and save it as a local thumbnail attached to a From 0c056cc115c8ce092a65252b5ae1b5671bed8dbb Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 03:51:33 -0400 Subject: [PATCH 400/417] Add `migrate_queues` function --- tubesync/sync/tasks.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 7c10c038..34c37e6c 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -192,6 +192,13 @@ def cleanup_completed_tasks(): CompletedTask.objects.filter(run_at__lt=delta).delete() +@atomic(durable=False) +def migrate_queues(): + tqs = Task.objects.all() + qs = tqs.exclude(queue__in=TaskQueue.values) + return qs.update(queue=Val(TaskQueue.NET)) + + def schedule_media_servers_update(): with atomic(): # Schedule a task to update media servers From 2c41a90695ec76984abb2ee759aa458012c688b9 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 03:57:31 -0400 Subject: [PATCH 401/417] Migrate old tasks to the new queues --- tubesync/sync/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index c9fee226..f489144b 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -29,7 +29,7 @@ from .forms import (ValidateSourceForm, ConfirmDeleteSourceForm, RedownloadMedia from .utils import validate_url, delete_file, multi_key_sort from .tasks import (map_task_to_instance, get_error_message, get_source_completed_tasks, get_media_download_task, - delete_task_by_media, index_source_task) + delete_task_by_media, index_source_task, migrate_queues) from .choices import (Val, MediaServerType, SourceResolution, YouTube_SourceType, youtube_long_source_types, youtube_help, youtube_validation_urls) @@ -797,6 +797,7 @@ class TasksView(ListView): data['total_errors'] = errors_qs.count() data['scheduled'] = list() data['total_scheduled'] = scheduled_qs.count() + data['migrated'] = migrate_queues() def add_to_task(task): obj, url = map_task_to_instance(task) From 456eadd35e37e68dda7dd7051089fa8111132271 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 14:35:39 -0400 Subject: [PATCH 402/417] Add files via upload --- .../0030_alter_source_source_vcodec.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tubesync/sync/migrations/0030_alter_source_source_vcodec.py diff --git a/tubesync/sync/migrations/0030_alter_source_source_vcodec.py b/tubesync/sync/migrations/0030_alter_source_source_vcodec.py new file mode 100644 index 00000000..2b4f3618 --- /dev/null +++ b/tubesync/sync/migrations/0030_alter_source_source_vcodec.py @@ -0,0 +1,18 @@ +# Generated by Django 5.1.8 on 2025-04-07 18:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0029_alter_mediaserver_fields'), + ] + + operations = [ + migrations.AlterField( + model_name='source', + name='source_vcodec', + field=models.CharField(choices=[('AVC1', 'AVC1 (H.264)'), ('VP9', 'VP9'), ('AV1', 'AV1')], db_index=True, default='VP9', help_text='Source video codec, desired video encoding format to download (ignored if "resolution" is audio only)', max_length=8, verbose_name='source video codec'), + ), + ] From c72a9c858499c3219413a85967bd1260ba75c223 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 17:20:26 -0400 Subject: [PATCH 403/417] Use a task per instance for `Media.refresh_formats` --- tubesync/sync/tasks.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 34c37e6c..ceda71d2 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -54,6 +54,7 @@ def map_task_to_instance(task): 'sync.tasks.download_media': Media, 'sync.tasks.download_media_metadata': Media, 'sync.tasks.save_all_media_for_source': Source, + 'sync.tasks.refesh_formats': Media, 'sync.tasks.rename_media': Media, 'sync.tasks.rename_all_media_for_source': Source, 'sync.tasks.wait_for_media_premiere': Media, @@ -656,7 +657,7 @@ def rescan_media_server(mediaserver_id): mediaserver.update() -@background(schedule=dict(priority=25, run_at=600), queue=Val(TaskQueue.NET), remove_existing_tasks=True) +@background(schedule=dict(priority=25, run_at=600), queue=Val(TaskQueue.FS), remove_existing_tasks=True) def save_all_media_for_source(source_id): ''' Iterates all media items linked to a source and saves them to @@ -672,7 +673,7 @@ def save_all_media_for_source(source_id): f'source exists with ID: {source_id}') raise InvalidTaskError(_('no such source')) from e - already_saved = set() + saved_later = set() mqs = Media.objects.filter(source=source) task = get_source_check_task(source_id) refresh_qs = mqs.filter( @@ -691,21 +692,14 @@ def save_all_media_for_source(source_id): tvn_format = '1/{:,}' + f'/{refresh_qs.count():,}' for mn, media in enumerate(refresh_qs, start=1): update_task_status(task, tvn_format.format(mn)) - try: - media.refresh_formats - except YouTubeError as e: - log.debug(f'Failed to refresh formats for: {source} / {media.key}: {e!s}') - pass - else: - with atomic(): - media.save() - already_saved.add(media.uuid) + refesh_formats(str(media.pk)) + saved_later.add(media.uuid) # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated tvn_format = '2/{:,}' + f'/{mqs.count():,}' for mn, media in enumerate(mqs, start=1): - if media.uuid not in already_saved: + if media.uuid not in saved_later: update_task_status(task, tvn_format.format(mn)) with atomic(): media.save() @@ -713,6 +707,22 @@ def save_all_media_for_source(source_id): update_task_status(task, None) +@background(schedule=dict(priority=10, run_at=0), queue=Val(TaskQueue.NET), remove_existing_tasks=True) +def refesh_formats(media_id): + try: + media = Media.objects.get(pk=media_id) + except Media.DoesNotExist as e: + raise InvalidTaskError(_('no such media')) from e + try: + media.refresh_formats + except YouTubeError as e: + log.debug(f'Failed to refresh formats for: {media.source} / {media.key}: {e!s}') + pass + else: + with atomic(): + media.save() + + @background(schedule=dict(priority=20, run_at=60), queue=Val(TaskQueue.FS), remove_existing_tasks=True) def rename_media(media_id): try: From 4d1699406265dbef3639a53a212350110f9ef80b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 7 Apr 2025 17:28:54 -0400 Subject: [PATCH 404/417] Stop increasing the minimum sleep --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index a720274e..a10ca31d 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -316,7 +316,7 @@ def download_media( 'check_formats': None, 'overwrites': None, 'skip_unavailable_fragments': False, - 'sleep_interval': min(10*60, max(10, int(settings.DOWNLOAD_MEDIA_DELAY / 20))), + 'sleep_interval': 10, 'max_sleep_interval': min(20*60, max(60, settings.DOWNLOAD_MEDIA_DELAY)), 'sleep_interval_requests': 1 + (2 * settings.BACKGROUND_TASK_ASYNC_THREADS), 'paths': opts.get('paths', dict()), From 360c708b3886fe734348058ea5219566a6d6d46d Mon Sep 17 00:00:00 2001 From: Daniel Mawhirter Date: Mon, 7 Apr 2025 21:25:52 -0500 Subject: [PATCH 405/417] Update matching.py --- tubesync/sync/matching.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/matching.py b/tubesync/sync/matching.py index ffb86416..93f7e4d0 100644 --- a/tubesync/sync/matching.py +++ b/tubesync/sync/matching.py @@ -95,6 +95,8 @@ def get_best_video_format(media): continue if not fmt['vcodec']: continue + if any(key[0] not in fmt for key in sort_keys): + continue if media.source.source_resolution.strip().upper() == fmt['format']: video_formats.append(fmt) elif media.source.source_resolution_height == fmt['height']: From b0a72cf1fb858abbe4b445f8db5781e0fafad81b Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 02:08:48 -0400 Subject: [PATCH 406/417] Set `verbose_name` for `refesh_formats` task --- tubesync/sync/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index ceda71d2..79e283c3 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -692,7 +692,10 @@ def save_all_media_for_source(source_id): tvn_format = '1/{:,}' + f'/{refresh_qs.count():,}' for mn, media in enumerate(refresh_qs, start=1): update_task_status(task, tvn_format.format(mn)) - refesh_formats(str(media.pk)) + refesh_formats( + str(media.pk), + verbose_name=f'Refreshing metadata formats for: {media.key}: "{media.name}"', + ) saved_later.add(media.uuid) # Trigger the post_save signal for each media item linked to this source as various From e01d155b10fde284500804fca33fc83fcef38afe Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 15:55:24 -0400 Subject: [PATCH 407/417] Switch to `getattr` --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 6ee64747..ac325073 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -221,7 +221,7 @@ def media_post_save(sender, instance, created, **kwargs): else: # Downloaded media might need to be renamed # Check settings before any rename tasks are scheduled - rename_sources_setting = settings.RENAME_SOURCES or list() + rename_sources_setting = getattr(settings, 'RENAME_SOURCES', list()) create_rename_task = ( ( media.source.directory and From afe0a75824a351cdf0159cc4ae6c0c788c80c32d Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 16:21:01 -0400 Subject: [PATCH 408/417] Handle `None` returned by `getattr` --- tubesync/sync/signals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index ac325073..4c332eca 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -221,7 +221,7 @@ def media_post_save(sender, instance, created, **kwargs): else: # Downloaded media might need to be renamed # Check settings before any rename tasks are scheduled - rename_sources_setting = getattr(settings, 'RENAME_SOURCES', list()) + rename_sources_setting = getattr(settings, 'RENAME_SOURCES') or list() create_rename_task = ( ( media.source.directory and From 033656b436db2ad3951e5944d17b5d9d68ead9b3 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 16:23:48 -0400 Subject: [PATCH 409/417] Be consistent in the task also --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 34c37e6c..d29b080c 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -733,7 +733,7 @@ def rename_all_media_for_source(source_id): f'source exists with ID: {source_id}') raise InvalidTaskError(_('no such source')) from e # Check that the settings allow renaming - rename_sources_setting = getattr(settings, 'RENAME_SOURCES', list()) + rename_sources_setting = getattr(settings, 'RENAME_SOURCES') or list() create_rename_tasks = ( ( source.directory and From 15b8d4b83fc8079630e57b55107a80f9fb711dbf Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 17:31:20 -0400 Subject: [PATCH 410/417] Add warning to the dashboard --- tubesync/sync/templates/sync/dashboard.html | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tubesync/sync/templates/sync/dashboard.html b/tubesync/sync/templates/sync/dashboard.html index af342800..23e1cdb2 100644 --- a/tubesync/sync/templates/sync/dashboard.html +++ b/tubesync/sync/templates/sync/dashboard.html @@ -99,6 +99,18 @@
+
+
+

Warnings

+
+ An upcoming release, after 2025-006-01, will introduce automated file renaming.
+ To prevent this change from taking effect, you can set an environment variable before that date.
+ See the GitHub README + for more details or ask questions using + issue #785.
+
+
+

Runtime information

From 55f55e73058f701f23c09da505792f1d634782a4 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 20:01:20 -0400 Subject: [PATCH 411/417] Extract audio from `webm` downloads --- tubesync/sync/youtube.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index a10ca31d..d990b0f0 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -14,6 +14,7 @@ from tempfile import TemporaryDirectory from urllib.parse import urlsplit, parse_qs from django.conf import settings +from .choices import Val, FileExtension from .hooks import postprocessor_hook, progress_hook from .utils import mkdir_p import yt_dlp @@ -301,6 +302,10 @@ def download_media( ).options.sponsorblock_mark pp_opts.sponsorblock_remove.update(sponsor_categories or {}) + if Val(FileExtension.OGG) == extension: + pp_opts.extractaudio = True + pp_opts.nopostoverwrites = False + ytopts = { 'format': media_format, 'merge_output_format': extension, From e394232b15ef85ba39bd1a47d9ff1b524783077a Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 20:10:55 -0400 Subject: [PATCH 412/417] Use a set of audio-only extensions --- tubesync/sync/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index d990b0f0..921c664b 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -302,7 +302,12 @@ def download_media( ).options.sponsorblock_mark pp_opts.sponsorblock_remove.update(sponsor_categories or {}) - if Val(FileExtension.OGG) == extension: + # Enable audio extraction for audio-only extensions + audio_exts = { + Val(FileExtension.M4A), + Val(FileExtension.OGG), + } + if extension in audio_exts: pp_opts.extractaudio = True pp_opts.nopostoverwrites = False From aa78c7309e69e1d28dc116956fcf2df8f32cda60 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 20:13:20 -0400 Subject: [PATCH 413/417] Use a single `Val` call --- tubesync/sync/youtube.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 921c664b..145e4c5d 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -303,10 +303,10 @@ def download_media( pp_opts.sponsorblock_remove.update(sponsor_categories or {}) # Enable audio extraction for audio-only extensions - audio_exts = { - Val(FileExtension.M4A), - Val(FileExtension.OGG), - } + audio_exts = set(Val( + FileExtension.M4A, + FileExtension.OGG, + )) if extension in audio_exts: pp_opts.extractaudio = True pp_opts.nopostoverwrites = False From 7a0fdd16cd6e935fd39295adc5b6b2949dd00e1b Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 8 Apr 2025 21:11:08 -0400 Subject: [PATCH 414/417] Remove unused `media_post_delete` --- tubesync/sync/management/commands/delete-source.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/management/commands/delete-source.py b/tubesync/sync/management/commands/delete-source.py index 5ab8a325..2f149a67 100644 --- a/tubesync/sync/management/commands/delete-source.py +++ b/tubesync/sync/management/commands/delete-source.py @@ -5,7 +5,6 @@ from django.core.management.base import BaseCommand, CommandError from django.db.models import signals from common.logger import log from sync.models import Source, Media, MediaServer -from sync.signals import media_post_delete from sync.tasks import schedule_media_servers_update From c145987b0f3e5009a541e90bba113d84327f6cca Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 9 Apr 2025 00:04:08 -0400 Subject: [PATCH 415/417] Use distinct transactions --- .../sync/management/commands/reset-tasks.py | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/tubesync/sync/management/commands/reset-tasks.py b/tubesync/sync/management/commands/reset-tasks.py index d7818007..3d6f515d 100644 --- a/tubesync/sync/management/commands/reset-tasks.py +++ b/tubesync/sync/management/commands/reset-tasks.py @@ -13,21 +13,28 @@ class Command(BaseCommand): help = 'Resets all tasks' - @atomic(durable=True) def handle(self, *args, **options): log.info('Resettings all tasks...') - # Delete all tasks - Task.objects.all().delete() - # Iter all tasks - for source in Source.objects.all(): - # Recreate the initial indexing task - log.info(f'Resetting tasks for source: {source}') - verbose_name = _('Index media from source "{}"') - index_source_task( - str(source.pk), - repeat=source.index_schedule, - verbose_name=verbose_name.format(source.name) - ) - # This also chains down to call each Media objects .save() as well - source.save() + with atomic(durable=True): + # Delete all tasks + Task.objects.all().delete() + # Iter all sources, creating new tasks + for source in Source.objects.all(): + verbose_name = _('Check download directory exists for source "{}"') + check_source_directory_exists( + str(source.pk), + verbose_name=verbose_name.format(source.name), + ) + # Recreate the initial indexing task + log.info(f'Resetting tasks for source: {source}') + verbose_name = _('Index media from source "{}"') + index_source_task( + str(source.pk), + repeat=source.index_schedule, + verbose_name=verbose_name.format(source.name), + ) + with atomic(durable=True): + for source in Source.objects.all(): + # This also chains down to call each Media objects .save() as well + source.save() log.info('Done') From 6278030a9bb0c7a89df2f60f1b9a5fdd7b6e5db9 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 9 Apr 2025 00:10:26 -0400 Subject: [PATCH 416/417] Check source directory when tasks were reset --- tubesync/sync/views.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index f489144b..0e3f8dbb 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -931,6 +931,11 @@ class ResetTasks(FormView): Task.objects.all().delete() # Iter all tasks for source in Source.objects.all(): + verbose_name = _('Check download directory exists for source "{}"') + check_source_directory_exists( + str(source.pk), + verbose_name=verbose_name.format(source.name), + ) # Recreate the initial indexing task verbose_name = _('Index media from source "{}"') index_source_task( From 60ce61bfd8202a0fef5fa8c40e11a10aa56fe23a Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 9 Apr 2025 00:41:57 -0400 Subject: [PATCH 417/417] Increase minor version --- tubesync/tubesync/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index 0ac2b462..fdf42c3a 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -7,7 +7,7 @@ CONFIG_BASE_DIR = BASE_DIR DOWNLOADS_BASE_DIR = BASE_DIR -VERSION = '0.13.7' +VERSION = '0.14.1' SECRET_KEY = '' DEBUG = False ALLOWED_HOSTS = []