From aedaa6f7b748cb3ba83ac6238759de9f6530942d Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 16 May 2025 03:33:33 -0400 Subject: [PATCH 001/118] Copy generic functions from `sync.utils` --- tubesync/common/utils.py | 65 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index c4798943..fdf04352 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -8,11 +8,22 @@ import string import time from datetime import datetime from django.core.paginator import Paginator +from functools import partial +from operator import attrgetter, itemgetter +from pathlib import Path from urllib.parse import urlunsplit, urlencode, urlparse from yt_dlp.utils import LazyList from .errors import DatabaseConnectionError +def directory_and_stem(arg_path, /): + filepath = Path(arg_path) + stem = Path(filepath.stem) + while stem.suffixes and '' != stem.suffix: + stem = Path(stem.stem) + return (filepath.parent, str(stem),) + + def getenv(key, default=None, /, *, integer=False, string=True): ''' Guarantees a returned type from calling `os.getenv` @@ -48,6 +59,51 @@ def getenv(key, default=None, /, *, integer=False, string=True): return r +def glob_quote(filestr, /): + _glob_specials = { + '?': '[?]', + '*': '[*]', + '[': '[[]', + ']': '[]]', # probably not needed, but it won't hurt + } + + if not isinstance(filestr, str): + raise TypeError(f'expected a str, got "{type(filestr)}"') + + return filestr.translate(str.maketrans(_glob_specials)) + + +def list_of_dictionaries(arg_list, /, *, arg_function=lambda x: x): + assert callable(arg_function) + if isinstance(arg_list, list): + _map_func = partial(lambda f, d: f(d) if isinstance(d, dict) else d, arg_function) + return (True, list(map(_map_func, arg_list)),) + return (False, arg_list,) + + +def mkdir_p(arg_path, /, *, mode=0o777): + ''' + Reminder: mode only affects the last directory + ''' + dirpath = Path(arg_path) + return dirpath.mkdir(mode=mode, parents=True, exist_ok=True) + + +def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None): + result = list(iterable) + if key_func is None: + # itemgetter is the default + if item or not (item or attr): + key_func = itemgetter + elif attr: + key_func = attrgetter + for key, reverse in reversed(specs): + result.sort(key=key_func(key), reverse=reverse) + if use_reversed: + return list(reversed(result)) + return result + + def parse_database_connection_string(database_connection_string): ''' Parses a connection string in a URL style format, such as: @@ -180,6 +236,15 @@ def json_serial(obj): raise TypeError(f'Type {type(obj)} is not json_serial()-able') +def seconds_to_timestr(seconds): + seconds = seconds % (24 * 3600) + hour = seconds // 3600 + seconds %= 3600 + minutes = seconds // 60 + seconds %= 60 + return '{:02d}:{:02d}:{:02d}'.format(hour, minutes, seconds) + + def time_func(func): def wrapper(*args, **kwargs): start = time.perf_counter() From 3aa5ec9aae4a9f7caa80586a17428ba76df200ea Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 20 May 2025 04:25:47 -0400 Subject: [PATCH 002/118] Remove `json_serial` as it is in json.py instead --- tubesync/common/utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 64c597cb..e773bb5b 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -223,14 +223,6 @@ def clean_emoji(s): return emoji.replace_emoji(s) -def json_serial(obj): - if isinstance(obj, datetime): - return obj.isoformat() - if isinstance(obj, LazyList): - return list(obj) - raise TypeError(f'Type {type(obj)} is not json_serial()-able') - - def seconds_to_timestr(seconds): seconds = seconds % (24 * 3600) hour = seconds // 3600 From 360b39ebcf166059d72f4cc7a94825cf17be76e3 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 24 May 2025 23:03:45 -0400 Subject: [PATCH 003/118] Update `directory_and_stem` in utils.py --- tubesync/common/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index e773bb5b..7256b3d8 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -13,11 +13,10 @@ from pathlib import Path from urllib.parse import urlunsplit, urlencode, urlparse from .errors import DatabaseConnectionError - -def directory_and_stem(arg_path, /): +def directory_and_stem(arg_path, /, all_suffixes=False): filepath = Path(arg_path) stem = Path(filepath.stem) - while stem.suffixes and '' != stem.suffix: + while all_suffixes and stem.suffixes and '' != stem.suffix: stem = Path(stem.stem) return (filepath.parent, str(stem),) From 18e506a703d2fb14919cfecbb30dcbd9e25ae6c5 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 27 May 2025 03:39:43 -0400 Subject: [PATCH 004/118] Add `django-huey` --- Pipfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Pipfile b/Pipfile index 49b5127b..b0b3c6f5 100644 --- a/Pipfile +++ b/Pipfile @@ -8,6 +8,7 @@ autopep8 = "*" [packages] django = "~=5.2.1" +django-huey = "*" django-sass-processor = {extras = ["management-command"], version = "*"} pillow = "*" whitenoise = "*" From 3093315271fdacd95a16b92862de3f63461f5fab Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 01:39:54 -0400 Subject: [PATCH 005/118] Use `Metadata` table to store Source indexing results --- tubesync/sync/models/metadata.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models/metadata.py b/tubesync/sync/models/metadata.py index 17d214fb..66b78b64 100644 --- a/tubesync/sync/models/metadata.py +++ b/tubesync/sync/models/metadata.py @@ -3,8 +3,9 @@ from common.json import JSONEncoder from common.timestamp import timestamp_to_datetime from common.utils import django_queryset_generator as qs_gen from django import db +from django.utils import timezone from django.utils.translation import gettext_lazy as _ -from .media import Media +from .media import Media, Source class Metadata(db.models.Model): @@ -17,6 +18,7 @@ class Metadata(db.models.Model): verbose_name_plural = _('Metadata about Media') unique_together = ( ('media', 'site', 'key'), + ('source', 'site', 'key', ), ) get_latest_by = ["-retrieved", "-created"] @@ -27,12 +29,22 @@ class Metadata(db.models.Model): default=uuid.uuid4, help_text=_('UUID of the metadata'), ) + source = db.models.ForeignKey( + Source, + on_delete=db.models.CASCADE, + related_name="videos", + related_query_name="video", + help_text=_('Source from which the video was retrieved'), + blank=True, + null=True, + ) media = db.models.OneToOneField( Media, # on_delete=models.DO_NOTHING, on_delete=db.models.SET_NULL, related_name='new_metadata', help_text=_('Media the metadata belongs to'), + blank=True, null=True, parent_link=False, ) @@ -62,8 +74,8 @@ class Metadata(db.models.Model): ) retrieved = db.models.DateTimeField( _('retrieved'), - auto_now_add=True, db_index=True, + default=timezone.now, help_text=_('Date and time the metadata was retrieved'), ) uploaded = db.models.DateTimeField( From 811e36abe0ebc7768272bc146f51cfc2f8df944b Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 01:53:01 -0400 Subject: [PATCH 006/118] Do not accept indexing results as media metadata --- tubesync/sync/models/media.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index 62f73d5d..973b0b3e 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -600,8 +600,10 @@ class Media(models.Model): arg_dict=data, ) md_model = self._meta.fields_map.get('new_metadata').related_model - md, created = md_model.objects.get_or_create( - media_id=self.pk, + md, created = md_model.objects.filter( + source__isnull=True, + ).get_or_create( + media=self, site=site, key=self.key, ) From 2e1b96bb611472e8207c69558a07be7c0f29562d Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 05:16:37 -0400 Subject: [PATCH 007/118] Write to the database in batched transactions --- tubesync/sync/tasks.py | 118 ++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 38 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index bf5e43ed..3ceb31b0 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -10,6 +10,7 @@ import random import requests import time import uuid +from collections import deque as queue from io import BytesIO from hashlib import sha1 from pathlib import Path @@ -33,7 +34,7 @@ from common.errors import ( NoFormatException, NoMediaException, from common.utils import ( django_queryset_generator as qs_gen, remove_enclosed, ) from .choices import Val, TaskQueue -from .models import Source, Media, MediaServer +from .models import Source, Media, MediaServer, Metadata from .utils import ( get_remote_image, resize_image_to_height, write_text_file, filter_response, seconds_to_timestr, ) from .youtube import YouTubeError @@ -302,6 +303,24 @@ def cleanup_removed_media(source, video_keys): schedule_media_servers_update() +def save_db_batch(qs, objs, fields, /): + assert hasattr(qs, 'bulk_update') + assert callable(qs.bulk_update) + assert hasattr(objs, '__len__') + assert callable(objs.__len__) + assert isinstance(fields, (tuple, list, set, frozenset)) + + num_updated = 0 + num_objs = len(objs) + with atomic(durable=False): + num_updated = qs.bulk_update(objs=objs, fields=fields) + if num_objs == num_updated: + # this covers at least: list, set, deque + if hasattr(objs, 'clear') and callable(objs.clear): + objs.clear() + return num_updated + + @background(schedule=dict(priority=20, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def index_source_task(source_id): ''' @@ -347,6 +366,17 @@ def index_source_task(source_id): tvn_format = '{:,}' + f'/{num_videos:,}' vn = 0 video_keys = set() + db_batch_data = queue(list(), maxlen=50) + db_fields_data = frozenset(( + 'retrieved', + 'value', + )) + db_batch_media = queue(list(), maxlen=10) + db_fields_media = frozenset(( + 'duration', + 'published', + 'title', + )) while len(videos) > 0: vn += 1 video = videos.popleft() @@ -355,14 +385,24 @@ def index_source_task(source_id): if not key: # Video has no unique key (ID), it can't be indexed continue + if len(db_batch_data) == db_batch_data.maxlen: + save_db_batch(Metadata.objects, db_batch_data, db_fields_data) + if len(db_batch_media) == db_batch_media.maxlen: + save_db_batch(Media.objects, db_batch_media, db_fields_media) video_keys.add(key) update_task_status(task, tvn_format.format(vn)) - # media, new_media = Media.objects.get_or_create(key=key, source=source) - try: - media = Media.objects.get(key=key, source=source) - except Media.DoesNotExist: - media = Media(key=key) - media.source = source + data, new_data = source.videos.defer('value').filter( + media__isnull=True, + ).get_or_create(key=key) + data.retrieved = source.last_crawl + data.value = video + db_batch_data.append(data) + media, new_media = source.media_source.only( + 'uuid', + 'source', + 'key', + *db_fields_media, + ).get_or_create(key=key) media.duration = float(video.get(fields('duration', media), None) or 0) or None media.title = str(video.get(fields('title', media), ''))[:200] timestamp = video.get(fields('timestamp', media), None) @@ -373,45 +413,47 @@ def index_source_task(source_id): else: if published_dt: media.published = published_dt - try: - media.save() - except IntegrityError as e: - log.error(f'Index media failed: {source} / {media} with "{e}"') - else: + db_batch_media.append(media) + if not new_media: log.debug(f'Indexed media: {vn}: {source} / {media}') + else: # log the new media instances - new_media_instance = ( - # new_media or - media.created and - source.last_crawl and - media.created >= source.last_crawl - ) - if new_media_instance: - log.info(f'Indexed new media: {source} / {media}') - log.info(f'Scheduling tasks to download thumbnail for: {media.key}') - thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg' - vn_fmt = _('Downloading {} thumbnail for: "{}": {}') - for prefix in ('hq', 'sd', 'maxres',): - thumbnail_url = thumbnail_fmt.format( - media.key, - prefix, - ) - download_media_thumbnail( - str(media.pk), - thumbnail_url, - verbose_name=vn_fmt.format(prefix, media.key, media.name), - ) - log.info(f'Scheduling task to download metadata for: {media.url}') - verbose_name = _('Downloading metadata for: "{}": {}') - download_media_metadata( - str(media.pk), - verbose_name=verbose_name.format(media.key, media.name), + log.info(f'Indexed new media: {source} / {media}') + log.info(f'Scheduling tasks to download thumbnail for: {media.key}') + thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg' + vn_fmt = _('Downloading {} thumbnail for: "{}": {}') + for prefix in ('hq', 'sd', 'maxres',): + thumbnail_url = thumbnail_fmt.format( + media.key, + prefix, ) + download_media_thumbnail( + str(media.pk), + thumbnail_url, + verbose_name=vn_fmt.format(prefix, media.key, media.name), + ) + log.info(f'Scheduling task to download metadata for: {media.url}') + verbose_name = _('Downloading metadata for: "{}": {}') + download_media_metadata( + str(media.pk), + verbose_name=verbose_name.format(media.key, media.name), + ) # Reset task.verbose_name to the saved value update_task_status(task, None) + # Update any remaining items in the batches + save_db_batch(Metadata.objects, db_batch_data, db_fields_data) + save_db_batch(Media.objects, db_batch_media, db_fields_media) # Cleanup of media no longer available from the source cleanup_removed_media(source, video_keys) videos = video = None + db_batch_data.clear() + db_batch_media.clear() + # Trigger any signals that we skipped with batches + vn_fmt = _('Checking all media for "{}"') + save_all_media_for_source( + str(source.pk), + verbose_name=vn_fmt.format(source.name), + ) @background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.FS)) From 1927b96fcc062d5b5462578fddceca0c3cad599d Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 05:37:09 -0400 Subject: [PATCH 008/118] Manage the "Checking all media" task better There is a delay for that task that allows small sources to finish indexing first. That left us with large indexing tasks and that task running concurrently, which some databases really do not handle well. Remove the task during indexing and schedule it when indexing has completed. --- tubesync/sync/tasks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3ceb31b0..d634e705 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -342,6 +342,7 @@ def index_source_task(source_id): # TODO: determine if this affects anything source.has_failed = False save_model(source) + delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk) # Index the source videos = source.index_media() if not videos: @@ -353,6 +354,7 @@ def index_source_task(source_id): # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() save_model(source) + delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk) num_videos = len(videos) log.info(f'Found {num_videos} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) @@ -445,13 +447,15 @@ def index_source_task(source_id): save_db_batch(Media.objects, db_batch_media, db_fields_media) # Cleanup of media no longer available from the source cleanup_removed_media(source, video_keys) + # Clear references to indexed data videos = video = None db_batch_data.clear() db_batch_media.clear() - # Trigger any signals that we skipped with batches + # Trigger any signals that we skipped with batched updates vn_fmt = _('Checking all media for "{}"') save_all_media_for_source( str(source.pk), + schedule=dict(run_at=60), verbose_name=vn_fmt.format(source.name), ) From 7f63c99d5cdf7a1d8c063332f6d32f0214265088 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 05:41:47 -0400 Subject: [PATCH 009/118] Remove an unused import --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d634e705..f2e4dc91 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -20,7 +20,7 @@ from django import db from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile -from django.db import DatabaseError, IntegrityError +from django.db import DatabaseError from django.db.transaction import atomic from django.utils import timezone from django.utils.translation import gettext_lazy as _ From 212f4fb9c716a784ab3879daf7be14289e01584f Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 06:12:39 -0400 Subject: [PATCH 010/118] Create 0035_alter_metadata_unique_together_metadata_source_and_more.py --- ...nique_together_metadata_source_and_more.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py diff --git a/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py new file mode 100644 index 00000000..388de67a --- /dev/null +++ b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py @@ -0,0 +1,58 @@ +# Generated by Django 5.2.1 on 2025-05-28 09:57 + +import django.db.models.deletion +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('sync', '0034_source_target_schedule_and_more'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='metadata', + unique_together={('media', 'site', 'key')}, + ), + migrations.AddField( + model_name='metadata', + name='source', + field=models.ForeignKey( + blank=True, + help_text='Source from which the video was retrieved', + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name='videos', + related_query_name='video', + to='sync.source', + ), + ), + migrations.AlterField( + model_name='metadata', + name='media', + field=models.OneToOneField( + blank=True, + help_text='Media the metadata belongs to', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='new_metadata', + to='sync.media', + ), + ), + migrations.AlterField( + model_name='metadata', + name='retrieved', + field=models.DateTimeField( + db_index=True, + default=django.utils.timezone.now, + help_text='Date and time the metadata was retrieved', + verbose_name='retrieved', + ), + ), + migrations.AlterUniqueTogether( + name='metadata', + unique_together={('media', 'site', 'key'), ('source', 'site', 'key')}, + ), + ] + From 595e3479ec7b6c738fc9268717f3598f821c0182 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 06:19:41 -0400 Subject: [PATCH 011/118] Depend on 0033 temporarily --- ..._alter_metadata_unique_together_metadata_source_and_more.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py index 388de67a..cd294226 100644 --- a/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py +++ b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py @@ -7,7 +7,8 @@ from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('sync', '0034_source_target_schedule_and_more'), + ('sync', '0033_alter_mediaserver_options_alter_source_source_acodec_and_more'), + #('sync', '0034_source_target_schedule_and_more'), ] operations = [ From d970733204cc6236ca2f76203d6bd390322362af Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 12:27:41 -0400 Subject: [PATCH 012/118] Providing the `source` column is still needed --- tubesync/sync/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index f2e4dc91..281b54c3 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -395,7 +395,7 @@ def index_source_task(source_id): update_task_status(task, tvn_format.format(vn)) data, new_data = source.videos.defer('value').filter( media__isnull=True, - ).get_or_create(key=key) + ).get_or_create(source=source, key=key) data.retrieved = source.last_crawl data.value = video db_batch_data.append(data) @@ -404,7 +404,7 @@ def index_source_task(source_id): 'source', 'key', *db_fields_media, - ).get_or_create(key=key) + ).get_or_create(source=source, key=key) media.duration = float(video.get(fields('duration', media), None) or 0) or None media.title = str(video.get(fields('title', media), ''))[:200] timestamp = video.get(fields('timestamp', media), None) From 0b5a475e03b70c9f59d1d0b7f1126741c200005c Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 16:37:57 -0400 Subject: [PATCH 013/118] Ignore indexed data when trying to reuse old metadata --- tubesync/sync/signals.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 69254146..4fc2fcc8 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -429,6 +429,7 @@ def media_post_delete(sender, instance, **kwargs): # Re-use the old metadata if it exists instance_qs = Metadata.objects.filter( media__isnull=True, + source__isnull=True, site=old_metadata.get(site_field) or 'Youtube', key=skipped_media.key, ) From 15d4a8f4e46fe86108a3a2cea94c7fe7304a8826 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 21:13:15 -0400 Subject: [PATCH 014/118] Create huey.py --- tubesync/common/huey.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tubesync/common/huey.py diff --git a/tubesync/common/huey.py b/tubesync/common/huey.py new file mode 100644 index 00000000..3d145ca0 --- /dev/null +++ b/tubesync/common/huey.py @@ -0,0 +1,19 @@ + + +def sqlite_tasks(key, /, prefix=None): + name_fmt = 'huey_{}' + if prefix is None: + prefix = '' + if prefix: + name_fmt = f'huey_{prefix}_' + '{}' + name = name_fmt.format(key) + return dict( + huey_class='huey.SqliteHuey', + name=name, + connection=dict( + filename=f'/config/tasks/{name}.db', + fsync=True, + strict_fifo=True, + ), + ) + From 261e17954fec7a4f2d0f7da9b4a87ebf17dcb6d3 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 21:17:27 -0400 Subject: [PATCH 015/118] Add `django_huey` queues to settings --- tubesync/tubesync/settings.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index cd6d1dca..a087da1a 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -1,5 +1,6 @@ from django import VERSION as DJANGO_VERSION from pathlib import Path +from common.huey import sqlite_tasks from common.utils import getenv @@ -8,7 +9,7 @@ CONFIG_BASE_DIR = BASE_DIR DOWNLOADS_BASE_DIR = BASE_DIR -VERSION = '0.15.4' +VERSION = '0.15.7' SECRET_KEY = '' DEBUG = False ALLOWED_HOSTS = [] @@ -24,6 +25,7 @@ INSTALLED_APPS = [ 'django.contrib.humanize', 'sass_processor', 'background_task', + 'django_huey', 'common', 'sync', ] @@ -47,6 +49,17 @@ ROOT_URLCONF = 'tubesync.urls' FORCE_SCRIPT_NAME = None +DJANGO_HUEY = { + 'default': 'network', + 'queues': { + 'database': sqlite_tasks('database'), + 'filesystem': sqlite_tasks('filesystem'), + 'limited': sqlite_tasks('limited', prefix='net'), + 'network': sqlite_tasks('network'), + }, +} + + TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', From 8d64a966ff19f83d770624443493312b09ca98ee Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 22:15:12 -0400 Subject: [PATCH 016/118] Add more configuration --- tubesync/common/huey.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tubesync/common/huey.py b/tubesync/common/huey.py index 3d145ca0..97bb6ee2 100644 --- a/tubesync/common/huey.py +++ b/tubesync/common/huey.py @@ -10,10 +10,21 @@ def sqlite_tasks(key, /, prefix=None): return dict( huey_class='huey.SqliteHuey', name=name, + immediate=False, + results=True, + store_none=False, + utc=True, + compression=True, connection=dict( filename=f'/config/tasks/{name}.db', fsync=True, strict_fifo=True, ), + consumer=dict( + workers=1, + worker_type='process', + max_delay=20.0, + scheduler_interval=10, + ), ) From 482d85768da17c36a2b6893ce9b18e117fc52d9b Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 22:32:25 -0400 Subject: [PATCH 017/118] Create the `/config/tasks` directory --- config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index baaf6e0c..7b085515 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -4,6 +4,9 @@ groupmod -o -g "${PGID:=911}" app usermod -o -u "${PUID:=911}" app +# Ensure /config directories exist +mkdir -v -p /config/tasks + # Reset permissions chown -R app:app /run/app chmod -R 0700 /run/app From 16ee589ab30d3faa9d10f25e5fd3f243f68a71a0 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 23:46:36 -0400 Subject: [PATCH 018/118] Create huey-net-limited/dependencies.d/base --- .../etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base @@ -0,0 +1 @@ + From 730e75e086513403674463e5686ed7feefcca0e2 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 23:54:11 -0400 Subject: [PATCH 019/118] Create huey-network/dependencies.d/base --- .../root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base @@ -0,0 +1 @@ + From 379d54873bfe557dfd9be386ccf92a415f06a998 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 23:56:16 -0400 Subject: [PATCH 020/118] Create huey-database/dependencies.d/base --- .../etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base @@ -0,0 +1 @@ + From f2afbc17b4fa1d0cc2b770c63ac96f511ff2077e Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 28 May 2025 23:58:21 -0400 Subject: [PATCH 021/118] Create huey-filesystem/dependencies.d/base --- .../etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base @@ -0,0 +1 @@ + From 5c54909edd1329fb58afa840e72f40ada8e312da Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:01:50 -0400 Subject: [PATCH 022/118] Create huey-net-limited/down-signal --- config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal new file mode 100644 index 00000000..d751378e --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal @@ -0,0 +1 @@ +SIGINT From 08d593d320958554b84bf4fa436ce15a40724ccb Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:03:04 -0400 Subject: [PATCH 023/118] Create huey-network/down-signal --- config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal new file mode 100644 index 00000000..d751378e --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal @@ -0,0 +1 @@ +SIGINT From 61260fe2a5275a35ec182b93a4ef3c40ff54a83c Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:04:27 -0400 Subject: [PATCH 024/118] Create huey-database/down-signal --- config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal new file mode 100644 index 00000000..d751378e --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal @@ -0,0 +1 @@ +SIGINT From c39491b644250e2701b8c167a449556746617a8a Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:05:32 -0400 Subject: [PATCH 025/118] Create huey-filesystem/down-signal --- config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal new file mode 100644 index 00000000..d751378e --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal @@ -0,0 +1 @@ +SIGINT From 227624a854d2be551cd7d00958ee27638fb47668 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:07:52 -0400 Subject: [PATCH 026/118] Create huey-net-limited/type --- config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type new file mode 100644 index 00000000..5883cff0 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type @@ -0,0 +1 @@ +longrun From c16d3fe18f0d718110738e868d309bd38a08e782 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:08:53 -0400 Subject: [PATCH 027/118] Create huey-network/type --- config/root/etc/s6-overlay/s6-rc.d/huey-network/type | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-network/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/type b/config/root/etc/s6-overlay/s6-rc.d/huey-network/type new file mode 100644 index 00000000..5883cff0 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/type @@ -0,0 +1 @@ +longrun From 8b08177394d1c9dce4b1b772cfade238ea48d9eb Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:10:01 -0400 Subject: [PATCH 028/118] Create huey-database/type --- config/root/etc/s6-overlay/s6-rc.d/huey-database/type | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-database/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/type b/config/root/etc/s6-overlay/s6-rc.d/huey-database/type new file mode 100644 index 00000000..5883cff0 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/type @@ -0,0 +1 @@ +longrun From 7ae89b3471fdfa37c2b81a2d849fd62e6cf49302 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:11:10 -0400 Subject: [PATCH 029/118] Create huey-filesystem/type --- config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type new file mode 100644 index 00000000..5883cff0 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type @@ -0,0 +1 @@ +longrun From 6e5099e199fbe14122851f474238336f2ffb80b8 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:29:55 -0400 Subject: [PATCH 030/118] Create huey-net-limited/run --- config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run new file mode 100644 index 00000000..c40cd786 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue limited From c1159088e533c018e7ddc5d95da55027609b7755 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:31:31 -0400 Subject: [PATCH 031/118] Create huey-network/run --- config/root/etc/s6-overlay/s6-rc.d/huey-network/run | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-network/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/run b/config/root/etc/s6-overlay/s6-rc.d/huey-network/run new file mode 100644 index 00000000..0e5f0e8f --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue network From 2b0d3db90a2630417d762a8f75cb9b113fd0258c Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:33:07 -0400 Subject: [PATCH 032/118] Create huey-database/run --- config/root/etc/s6-overlay/s6-rc.d/huey-database/run | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-database/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/run b/config/root/etc/s6-overlay/s6-rc.d/huey-database/run new file mode 100644 index 00000000..c803c4c3 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue database From 5e224254b57a8e0fd5f45a7383b5722ce00a91ba Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 00:34:14 -0400 Subject: [PATCH 033/118] Create huey-filesystem/run --- config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run new file mode 100644 index 00000000..f52ee7c6 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue filesystem From 6aac84922b1763e49370a2336c4f46c3d5a5ade3 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 01:16:06 -0400 Subject: [PATCH 034/118] Revert Depend on 0033 temporarily --- ..._alter_metadata_unique_together_metadata_source_and_more.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py index cd294226..388de67a 100644 --- a/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py +++ b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py @@ -7,8 +7,7 @@ from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('sync', '0033_alter_mediaserver_options_alter_source_source_acodec_and_more'), - #('sync', '0034_source_target_schedule_and_more'), + ('sync', '0034_source_target_schedule_and_more'), ] operations = [ From 9a5acdd2ab7fd08c36148f7e0a2ce749973321a7 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 08:52:07 -0400 Subject: [PATCH 035/118] Create new Media instances with defaults --- tubesync/sync/tasks.py | 82 ++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 10a0c8ae..2ded44cd 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -341,14 +341,12 @@ def index_source_task(source_id): # update the target schedule column source.task_run_at_dt # Reset any errors - # TODO: determine if this affects anything source.has_failed = False - save_model(source) - delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk) # Index the source videos = source.index_media() if not videos: - # TODO: Record this error in source.has_failed ? + source.has_failed = True + save_model(source) raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no ' f'media to index, is the source key valid? Check the ' f'source configuration is correct and that the source ' @@ -359,20 +357,11 @@ def index_source_task(source_id): delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk) num_videos = len(videos) log.info(f'Found {num_videos} media items for source: {source}') - fields = lambda f, m: m.get_metadata_field(f) - task = get_source_index_task(source_id) - if task: - task._verbose_name = remove_enclosed( - task.verbose_name, '[', ']', ' ', - valid='0123456789/,', - end=task.verbose_name.find('Index'), - ) tvn_format = '{:,}' + f'/{num_videos:,}' - vn = 0 - video_keys = set() db_batch_data = queue(list(), maxlen=50) db_fields_data = frozenset(( 'retrieved', + 'site', 'value', )) db_batch_media = queue(list(), maxlen=10) @@ -381,6 +370,16 @@ def index_source_task(source_id): 'published', 'title', )) + fields = lambda f, m: m.get_metadata_field(f) + task = get_source_index_task(source_id) + if task: + task._verbose_name = remove_enclosed( + task.verbose_name, '[', ']', ' ', + valid='0123456789/,', + end=task.verbose_name.find('Index'), + ) + vn = 0 + video_keys = set() while len(videos) > 0: vn += 1 video = videos.popleft() @@ -389,26 +388,18 @@ def index_source_task(source_id): if not key: # Video has no unique key (ID), it can't be indexed continue + video_keys.add(key) if len(db_batch_data) == db_batch_data.maxlen: save_db_batch(Metadata.objects, db_batch_data, db_fields_data) if len(db_batch_media) == db_batch_media.maxlen: save_db_batch(Media.objects, db_batch_media, db_fields_media) - video_keys.add(key) update_task_status(task, tvn_format.format(vn)) - data, new_data = source.videos.defer('value').filter( - media__isnull=True, - ).get_or_create(source=source, key=key) - data.retrieved = source.last_crawl - data.value = video - db_batch_data.append(data) - media, new_media = source.media_source.only( - 'uuid', - 'source', - 'key', - *db_fields_media, - ).get_or_create(source=source, key=key) - media.duration = float(video.get(fields('duration', media), None) or 0) or None - media.title = str(video.get(fields('title', media), ''))[:200] + media_defaults = dict() + # create a dummy instance to use its functions + media = Media(source=source, key=key) + media_defaults['duration'] = float(video.get(fields('duration', media), None) or 0) or None + media_defaults['title'] = str(video.get(fields('title', media), ''))[:200] + site = video.get(fields('ie_key', media), None) timestamp = video.get(fields('timestamp', media), None) try: published_dt = media.ts_to_dt(timestamp) @@ -416,9 +407,36 @@ def index_source_task(source_id): pass else: if published_dt: - media.published = published_dt + media_defaults['published'] = published_dt + # Retrieve or create the actual media instance + media, new_media = source.media_source.only( + 'uuid', + 'source', + 'key', + *db_fields_media, + ).get_or_create(defaults=media_defaults, source=source, key=key) + for key in ('epoch', 'availability',): + field = fields(key, media) + value = video.get(field) + if value is None and 'epoch' == key: + value = timestamp + if value is not None: + media.save_to_metadata(field, value) + if site: + media.save_to_metadata(fields('extractor_key', media), site) db_batch_media.append(media) + data, new_data = source.videos.defer('value').filter( + media__isnull=True, + ).get_or_create(source=source, key=key) + if site: + data.site = site + data.retrieved = source.last_crawl + data.value = video + db_batch_data.append(data) if not new_media: + # update the existing media + for key, value in media_defaults.items(): + setattr(media, key, value) log.debug(f'Indexed media: {vn}: {source} / {media}') else: # log the new media instances @@ -426,7 +444,7 @@ def index_source_task(source_id): log.info(f'Scheduling tasks to download thumbnail for: {media.key}') thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg' vn_fmt = _('Downloading {} thumbnail for: "{}": {}') - for prefix in ('hq', 'sd', 'maxres',): + for num, prefix in enumerate(('hq', 'sd', 'maxres',)): thumbnail_url = thumbnail_fmt.format( media.key, prefix, @@ -434,12 +452,14 @@ def index_source_task(source_id): download_media_thumbnail( str(media.pk), thumbnail_url, + schedule=dict(run_at=10+(300*num)), verbose_name=vn_fmt.format(prefix, media.key, media.name), ) log.info(f'Scheduling task to download metadata for: {media.url}') verbose_name = _('Downloading metadata for: "{}": {}') download_media_metadata( str(media.pk), + schedule=dict(priority=35), verbose_name=verbose_name.format(media.key, media.name), ) # Reset task.verbose_name to the saved value From 6b646145619e66a37e3cbf87dbf3086686262171 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 09:57:09 -0400 Subject: [PATCH 036/118] Use the `Media.metadata_clear` function --- tubesync/sync/models/media.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index 973b0b3e..41a96a1a 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -699,8 +699,7 @@ class Media(models.Model): data = self.loaded_metadata metadata_seconds = data.get('epoch', None) if not metadata_seconds: - self.metadata = None - self.save(update_fields={'metadata'}) + self.metadata_clear(save=True) return False now = timezone.now() From 5e20ebdf4760187c4386e190c11ee3bbaef476aa Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 10:18:23 -0400 Subject: [PATCH 037/118] Do not call `Media.save_to_metadata` function unless needed --- tubesync/sync/tasks.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 2ded44cd..289430d8 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -415,15 +415,19 @@ def index_source_task(source_id): 'key', *db_fields_media, ).get_or_create(defaults=media_defaults, source=source, key=key) - for key in ('epoch', 'availability',): + for key in ('epoch', 'availability', 'extractor_key',): field = fields(key, media) value = video.get(field) - if value is None and 'epoch' == key: - value = timestamp + existing_value = media.get_metadata_first_value(key) + if value is None: + if 'epoch' == key: + value = timestamp + elif 'extractor_key' == key: + value = site if value is not None: + if existing_value and ('epoch' == key or value == existing_value): + continue media.save_to_metadata(field, value) - if site: - media.save_to_metadata(fields('extractor_key', media), site) db_batch_media.append(media) data, new_data = source.videos.defer('value').filter( media__isnull=True, From e21da0fb26ab2821cce6cebf02f9c2d7595670af Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 11:29:32 -0400 Subject: [PATCH 038/118] Update `Media.has_metadata` One of the `*_id` keys must have a value. --- tubesync/sync/models/media.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index 41a96a1a..44a42a70 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -566,7 +566,11 @@ class Media(models.Model): @property def has_metadata(self): - return self.metadata is not None + result = self.metadata is not None + if not result: + return False + value = self.get_metadata_first_value(('id', 'display_id', 'channel_id', 'uploader_id',)) + return value is not None def metadata_clear(self, /, *, save=False): From 37a60447acbcb42b3eb66e11eeded4d1b5c33116 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 11:41:09 -0400 Subject: [PATCH 039/118] Add `channel_id` to the minimal metadata --- tubesync/sync/tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 47089673..9d5ce991 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -354,6 +354,7 @@ class FrontEndTestCase(TestCase): # Add some media test_minimal_metadata = ''' { + "channel_id":"testkey", "thumbnail":"https://example.com/thumb.jpg", "formats": [{ "format_id":"251", From c8f95456f2f4c7c06b4979b79b4c7890dba62bb5 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 19:56:57 -0400 Subject: [PATCH 040/118] Create gunicorn dependency for huey-database --- .../etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn @@ -0,0 +1 @@ + From 7078c2b1ed1da940ac993eceb5eacba9e86d60e5 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 19:58:16 -0400 Subject: [PATCH 041/118] Create gunicorn dependency for huey-filesystem --- .../s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn @@ -0,0 +1 @@ + From 16b8f9ad3c3c030a416d2650c3bd1c216715dbf1 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 19:59:40 -0400 Subject: [PATCH 042/118] Create gunicorn dependency for huey-network --- .../etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn @@ -0,0 +1 @@ + From 7e9be86789023a7d7d42eb7afcf85be699cfa575 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:00:47 -0400 Subject: [PATCH 043/118] Create gunicorn dependency for huey-net-limited --- .../s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn @@ -0,0 +1 @@ + From 9b33606168cc042da81f33d13e7f4ebf156bed15 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:08:00 -0400 Subject: [PATCH 044/118] Create background-task-workers bundle --- config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type new file mode 100644 index 00000000..757b4221 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type @@ -0,0 +1 @@ +bundle From 71c0ae6ed2898e67c7f29697314d5ab6e8b5809d Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:10:49 -0400 Subject: [PATCH 045/118] Add tubesync-network-worker to background-task-workers bundle --- .../background-task-workers/contents.d/tubesync-network-worker | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-network-worker diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-network-worker b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-network-worker new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-network-worker @@ -0,0 +1 @@ + From b4c14ea1dc76b72b4952a6d9ba61b79c72582f8b Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:12:56 -0400 Subject: [PATCH 046/118] Add tubesync-db-worker to background-task-workers bundle --- .../background-task-workers/contents.d/tubesync-db-worker | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-db-worker diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-db-worker b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-db-worker new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-db-worker @@ -0,0 +1 @@ + From 3890ad880696c5ea7bdf5d0f5397fca429661727 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:14:23 -0400 Subject: [PATCH 047/118] Add tubesync-fs-worker to background-task-workers bundle --- .../background-task-workers/contents.d/tubesync-fs-worker | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-fs-worker diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-fs-worker b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-fs-worker new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-fs-worker @@ -0,0 +1 @@ + From b72fc2d0229728e3658896cf4ce88b6d78ffbea5 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:21:50 -0400 Subject: [PATCH 048/118] Create huey-consumers bundle --- config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type new file mode 100644 index 00000000..757b4221 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type @@ -0,0 +1 @@ +bundle From 95181c45f019a4a01cb85aabe68c992ab0d2f517 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:25:08 -0400 Subject: [PATCH 049/118] Add huey-net-limited to huey-consumers bundle --- .../s6-rc.d/huey-consumers/contents.d/huey-net-limited | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-net-limited diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-net-limited b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-net-limited new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-net-limited @@ -0,0 +1 @@ + From 3c8d18929a25f612825af1c55004a27c8f31e820 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:27:53 -0400 Subject: [PATCH 050/118] Add huey-network to huey-consumers bundle --- .../s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network @@ -0,0 +1 @@ + From a2abe4f584fd0b7a3cb720a7d2f737946f42447b Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:29:37 -0400 Subject: [PATCH 051/118] Add huey-database to huey-consumers bundle --- .../s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database @@ -0,0 +1 @@ + From a8e112b024fd8d1c8a2587fa97b2a5a862a043c5 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:30:55 -0400 Subject: [PATCH 052/118] Add huey-filesystem to huey-consumers bundle --- .../s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem @@ -0,0 +1 @@ + From 4a090ab5538487c6b1010b3dbdd24cd37abb1ef6 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:32:52 -0400 Subject: [PATCH 053/118] Add huey-consumers to user bundle --- .../root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers | 1 + 1 file changed, 1 insertion(+) create mode 100644 config/root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers diff --git a/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers b/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers @@ -0,0 +1 @@ + From 894f33254e7efb71e04e003d6ddab5e3a05b5ab8 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:43:58 -0400 Subject: [PATCH 054/118] Match the current version --- tubesync/tubesync/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index 938f7ecd..ea2f668b 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -9,7 +9,7 @@ CONFIG_BASE_DIR = BASE_DIR DOWNLOADS_BASE_DIR = BASE_DIR -VERSION = '0.15.7' +VERSION = '0.15.6' SECRET_KEY = '' DEBUG = False ALLOWED_HOSTS = [] From 36059c0c62d04588216a4de1a4945747d0808ea0 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 29 May 2025 20:50:16 -0400 Subject: [PATCH 055/118] Update ci.yaml --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5ae58c0a..ef45ca10 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -103,6 +103,7 @@ jobs: pip install --system --strict --requirements requirements.txt - name: Set up Django environment run: | + mkdir -v -p /config/tasks cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/background_task/ patches/background_task/* cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* From d08bbf24e25459de85f0f8d3ebde3df423fac1f0 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 00:13:46 -0400 Subject: [PATCH 056/118] Use a symbolic link for `/config` --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ef45ca10..55224a1b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -103,7 +103,8 @@ jobs: pip install --system --strict --requirements requirements.txt - name: Set up Django environment run: | - mkdir -v -p /config/tasks + mkdir -v -p ~/.config/TubeSync/config/tasks + sudo ln -v -s -f -T ~/.config/TubeSync/config /config cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/background_task/ patches/background_task/* cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* From 6f5d35d33c7978c3038723bb0c60f77fff3f868e Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 00:35:39 -0400 Subject: [PATCH 057/118] Support local settings stored in `/config/tubesync` --- config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index 7b085515..1b136c2d 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -7,6 +7,18 @@ usermod -o -u "${PUID:=911}" app # Ensure /config directories exist mkdir -v -p /config/tasks +# Copy local_settings.py for the user +if [ -f /config/tubesync/local_settings.py ] +then + # from the image for an example or comparison + cp -v -p /app/tubesync/local_settings.py \ + /config/tubesync/local_settings.py.image + + # to the image to apply the user's settings + cp -v -p /config/tubesync/local_settings.py \ + /app/tubesync/local_settings.py +fi + # Reset permissions chown -R app:app /run/app chmod -R 0700 /run/app From bd06c5b888733af1660cb29d675f0af04591bbcf Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 01:17:11 -0400 Subject: [PATCH 058/118] Add the rest of the `/config` sub-directories --- config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index 1b136c2d..df003540 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -5,7 +5,7 @@ groupmod -o -g "${PGID:=911}" app usermod -o -u "${PUID:=911}" app # Ensure /config directories exist -mkdir -v -p /config/tasks +mkdir -v -p /config/{cache,media,tasks,tubesync} # Copy local_settings.py for the user if [ -f /config/tubesync/local_settings.py ] From 0b63d70fd7cddc0fdf289384bf6f964044eae795 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 01:29:52 -0400 Subject: [PATCH 059/118] Make directories before creating SQLite database --- tubesync/tubesync/settings.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index ea2f668b..0eb4ad58 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -58,6 +58,11 @@ DJANGO_HUEY = { 'network': sqlite_tasks('network'), }, } +for django_huey_queue in DJANGO_HUEY['queues'].items(): + connection = django_huey_queue.get('connection') + if connection: + filepath = Path('/.' + connection.get('filename') or '').resolve(strict=False) + filepath.parent.mkdir(exist_ok=True, parents=True) TEMPLATES = [ From f561de8140b1453a496a1a0c4f34244acd745529 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 01:34:10 -0400 Subject: [PATCH 060/118] Clean up any files created in Python --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index da1e84c5..118eda78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -484,9 +484,10 @@ RUN set -x && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ + rm -rf /config /downloads /run/app && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ - mkdir -v -p /config/media && \ + mkdir -v -p /config/media /config/tasks && \ mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ From 33491edb9e15cc156bd4ca3d23ad95707d14254a Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 01:53:16 -0400 Subject: [PATCH 061/118] Only create `/config` for tests --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 55224a1b..05aab5a0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -103,7 +103,7 @@ jobs: pip install --system --strict --requirements requirements.txt - name: Set up Django environment run: | - mkdir -v -p ~/.config/TubeSync/config/tasks + mkdir -v -p ~/.config/TubeSync/config sudo ln -v -s -f -T ~/.config/TubeSync/config /config cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/background_task/ patches/background_task/* From ea43d540218fffa27c48de91a481ff9d9c789a6f Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 01:55:12 -0400 Subject: [PATCH 062/118] fixup: values instead of items --- tubesync/tubesync/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index 0eb4ad58..f5689dd1 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -58,7 +58,7 @@ DJANGO_HUEY = { 'network': sqlite_tasks('network'), }, } -for django_huey_queue in DJANGO_HUEY['queues'].items(): +for django_huey_queue in DJANGO_HUEY['queues'].values(): connection = django_huey_queue.get('connection') if connection: filepath = Path('/.' + connection.get('filename') or '').resolve(strict=False) From 98df36a1fc887bb6d868e57cecf321981e3269ef Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 02:18:01 -0400 Subject: [PATCH 063/118] Fail the build rather than hiding the file that is in a lower layer --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 118eda78..314eb98d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -480,7 +480,7 @@ COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings. # Build app RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database - rm -rf /app/db.sqlite3 && \ + test '!' -e /app/db.sqlite3 && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ From 479f36b7f18668017ad7caeb8d747f1ddaa243c7 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 08:29:58 -0400 Subject: [PATCH 064/118] Add and document logging settings --- tubesync/common/huey.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tubesync/common/huey.py b/tubesync/common/huey.py index 97bb6ee2..e88e319b 100644 --- a/tubesync/common/huey.py +++ b/tubesync/common/huey.py @@ -25,6 +25,12 @@ def sqlite_tasks(key, /, prefix=None): worker_type='process', max_delay=20.0, scheduler_interval=10, + simple_log=False, + # verbose has three positions: + # DEBUG: True + # INFO: None + # WARNING: False + verbose=False, ), ) From 78a57211b2dc52c971442966dd71b7c54fc87f0a Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 30 May 2025 08:35:36 -0400 Subject: [PATCH 065/118] Unlock previously locked tasks when the consumer starts --- tubesync/common/huey.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/common/huey.py b/tubesync/common/huey.py index e88e319b..66d13fc2 100644 --- a/tubesync/common/huey.py +++ b/tubesync/common/huey.py @@ -24,6 +24,7 @@ def sqlite_tasks(key, /, prefix=None): workers=1, worker_type='process', max_delay=20.0, + flush_locks=True, scheduler_interval=10, simple_log=False, # verbose has three positions: From 1ac70a16a0e0442371c71c893b467c006a160aff Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 1 Jun 2025 01:56:18 -0400 Subject: [PATCH 066/118] Enable `editor` in the container For less than 3 MiB, I can have a modern editor that does enough of what `vim-nox` does that zi won't miss that. --- Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Dockerfile b/Dockerfile index da1e84c5..aba92a07 100644 --- a/Dockerfile +++ b/Dockerfile @@ -321,6 +321,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va apt-get -y autoclean && \ rm -v -f /var/cache/debconf/*.dat-old +# The preference for openresty over nginx, +# is for the newer version. FROM tubesync-openresty AS tubesync ARG S6_VERSION @@ -352,7 +354,12 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va python3-pip-whl \ python3-socks \ curl \ + indent \ less \ + lua-lpeg \ + tre-agrep \ + vis \ + xxd \ && \ # Link to the current python3 version ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ From 89abf8f56875c898de15d5fdb16c7f7018b041ae Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 1 Jun 2025 02:01:02 -0400 Subject: [PATCH 067/118] Add `babi` editor --- Pipfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Pipfile b/Pipfile index 49b5127b..34a52aa8 100644 --- a/Pipfile +++ b/Pipfile @@ -25,3 +25,4 @@ emoji = "*" brotli = "*" html5lib = "*" bgutil-ytdlp-pot-provider = "*" +babi = "*" From e5c19a2c47984af152e5ffa60d336064b8a3a2ec Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 1 Jun 2025 12:39:10 -0400 Subject: [PATCH 068/118] Configure alternatives groups --- Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index aba92a07..735c10eb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -362,7 +362,14 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va xxd \ && \ # Link to the current python3 version - ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ + update-alternatives --install /usr/local/lib/python3 python3-lib \ + "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" 100 && \ + # Configure the editor and nano alternatives + touch /usr/local/bin/babi /bin/nano && \ + update-alternatives --install /usr/local/bin/nano nano /bin/nano 10 && \ + update-alternatives --install /usr/local/bin/nano nano /usr/local/bin/babi 20 && \ + update-alternatives --install /usr/bin/editor editor /usr/local/bin/babi 50 && \ + rm -v /usr/local/bin/babi /bin/nano && \ # Create a 'app' user which the application will run as groupadd app && \ useradd -M -d /app -s /bin/false -g app app && \ From c1e30b775117d6046dfec32853358c08e84c7d76 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 1 Jun 2025 12:57:21 -0400 Subject: [PATCH 069/118] Add the `vim` alternatives group --- Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 735c10eb..d2df5d67 100644 --- a/Dockerfile +++ b/Dockerfile @@ -364,12 +364,14 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va # Link to the current python3 version update-alternatives --install /usr/local/lib/python3 python3-lib \ "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" 100 && \ - # Configure the editor and nano alternatives - touch /usr/local/bin/babi /bin/nano && \ + # Configure the editor alternatives + touch /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \ + update-alternatives --install /usr/bin/editor editor /usr/local/bin/babi 50 && \ update-alternatives --install /usr/local/bin/nano nano /bin/nano 10 && \ update-alternatives --install /usr/local/bin/nano nano /usr/local/bin/babi 20 && \ - update-alternatives --install /usr/bin/editor editor /usr/local/bin/babi 50 && \ - rm -v /usr/local/bin/babi /bin/nano && \ + update-alternatives --install /usr/local/bin/vim vim /usr/bin/vim.tiny 15 && \ + rm -v /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \ + update-alternatives --install /usr/local/bin/vim vim /usr/bin/vis 35 && \ # Create a 'app' user which the application will run as groupadd app && \ useradd -M -d /app -s /bin/false -g app app && \ From 8b0d3ebd30b323648ff14d72d8bc517ee5a305b0 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 1 Jun 2025 13:00:21 -0400 Subject: [PATCH 070/118] Set the `EDITOR` environment variable to use the alternatives group --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index d2df5d67..022a2b7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,6 +25,7 @@ ARG TARGETARCH ENV DEBIAN_FRONTEND="noninteractive" \ APT_KEEP_ARCHIVES=1 \ + EDITOR="editor" \ HOME="/root" \ LANGUAGE="en_US.UTF-8" \ LANG="en_US.UTF-8" \ From 133d65a6c98b02685b966ca51384b72c1f9cdfe4 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 1 Jun 2025 13:04:18 -0400 Subject: [PATCH 071/118] Prevent `update-alternatives` from doing the wrong thing --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 022a2b7b..7ad61039 100644 --- a/Dockerfile +++ b/Dockerfile @@ -371,8 +371,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va update-alternatives --install /usr/local/bin/nano nano /bin/nano 10 && \ update-alternatives --install /usr/local/bin/nano nano /usr/local/bin/babi 20 && \ update-alternatives --install /usr/local/bin/vim vim /usr/bin/vim.tiny 15 && \ - rm -v /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \ update-alternatives --install /usr/local/bin/vim vim /usr/bin/vis 35 && \ + rm -v /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \ # Create a 'app' user which the application will run as groupadd app && \ useradd -M -d /app -s /bin/false -g app app && \ From 6d04d773c3bedfe5ed2cde907526ef1488f5f8a5 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 14:39:07 -0400 Subject: [PATCH 072/118] Remove `glob_quote` from sync/utils.py --- tubesync/sync/utils.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index fc7874fd..46668c25 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -95,20 +95,6 @@ def resize_image_to_height(image, width, height): return image -def glob_quote(filestr): - _glob_specials = { - '?': '[?]', - '*': '[*]', - '[': '[[]', - ']': '[]]', # probably not needed, but it won't hurt - } - - if not isinstance(filestr, str): - raise TypeError(f'filestr must be a str, got "{type(filestr)}"') - - return filestr.translate(str.maketrans(_glob_specials)) - - def file_is_editable(filepath): ''' Checks that a file exists and the file is in an allowed predefined tuple of From dbd30e65d6a4b96316b1561b6795785aeefd0573 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 14:42:12 -0400 Subject: [PATCH 073/118] Adjust the `glob_quote` import in sync/signals.py --- tubesync/sync/signals.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index d68a082f..e1a22ee5 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -10,6 +10,7 @@ from django.utils.translation import gettext_lazy as _ from background_task.signals import task_failed from background_task.models import Task from common.logger import log +from common.utils import glob_quote from .models import Source, Media, Metadata from .tasks import (delete_task_by_source, delete_task_by_media, index_source_task, download_media_thumbnail, download_media_metadata, @@ -17,7 +18,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta download_media, download_source_images, delete_all_media_for_source, save_all_media_for_source, rename_media, get_media_metadata_task, get_media_download_task) -from .utils import delete_file, glob_quote, mkdir_p +from .utils import delete_file, mkdir_p from .filtering import filter_media from .choices import Val, YouTube_SourceType From c7e8c974e67e6ed49f225c44411d5e7937d064b6 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 14:45:28 -0400 Subject: [PATCH 074/118] Adjust the `glob_quote` import in sync/media.py --- tubesync/sync/models/media.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index 62f73d5d..32e026e8 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -17,7 +17,7 @@ from common.logger import log from common.errors import NoFormatException from common.json import JSONEncoder from common.utils import ( - clean_filename, clean_emoji, + clean_filename, clean_emoji, glob_quote, ) from ..youtube import ( get_media_info as get_youtube_media_info, @@ -25,7 +25,7 @@ from ..youtube import ( ) from ..utils import ( seconds_to_timestr, parse_media_format, filter_response, - write_text_file, mkdir_p, glob_quote, multi_key_sort, + write_text_file, mkdir_p, multi_key_sort, ) from ..matching import ( get_best_combined_format, From 7b23d3b0f01567df99408fac5052ceb5510421d8 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 14:51:45 -0400 Subject: [PATCH 075/118] Remove `seconds_to_timestr` from sync/utils.py --- tubesync/sync/utils.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 46668c25..3f62492e 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -148,15 +148,6 @@ def delete_file(filepath): return False -def seconds_to_timestr(seconds): - seconds = seconds % (24 * 3600) - hour = seconds // 3600 - seconds %= 3600 - minutes = seconds // 60 - seconds %= 60 - return '{:02d}:{:02d}:{:02d}'.format(hour, minutes, seconds) - - def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None): result = list(iterable) if key_func is None: From 2c67b2f31c4b1f0dd118c47b1e99c914a39a77e2 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 14:59:08 -0400 Subject: [PATCH 076/118] Adjust the `seconds_to_timestr` import in sync/tasks.py --- tubesync/sync/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index c5903bb0..f01f769e 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -31,11 +31,11 @@ from common.errors import ( NoFormatException, NoMediaException, NoThumbnailException, DownloadFailedException, ) from common.utils import ( django_queryset_generator as qs_gen, - remove_enclosed, ) + remove_enclosed, seconds_to_timestr, ) from .choices import Val, TaskQueue from .models import Source, Media, MediaServer from .utils import ( get_remote_image, resize_image_to_height, - write_text_file, filter_response, seconds_to_timestr, ) + write_text_file, filter_response, ) from .youtube import YouTubeError db_vendor = db.connection.vendor From db7af44cec1b8a9059969748157f1222ffdaa270 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:03:32 -0400 Subject: [PATCH 077/118] Adjust the `seconds_to_timestr` import in sync/media.py --- tubesync/sync/models/media.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index 32e026e8..f50de356 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -18,13 +18,14 @@ from common.errors import NoFormatException from common.json import JSONEncoder from common.utils import ( clean_filename, clean_emoji, glob_quote, + seconds_to_timestr, ) from ..youtube import ( get_media_info as get_youtube_media_info, download_media as download_youtube_media, ) from ..utils import ( - seconds_to_timestr, parse_media_format, filter_response, + parse_media_format, filter_response, write_text_file, mkdir_p, multi_key_sort, ) from ..matching import ( From 8852c8c2eff06ec916e653d033cf5af5ea4d1c98 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:16:47 -0400 Subject: [PATCH 078/118] Switch to the `common.utils` version --- tubesync/sync/models/media.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index f50de356..377d87c3 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -17,8 +17,8 @@ from common.logger import log from common.errors import NoFormatException from common.json import JSONEncoder from common.utils import ( - clean_filename, clean_emoji, glob_quote, - seconds_to_timestr, + clean_filename, clean_emoji, directory_and_stem, + glob_quote, seconds_to_timestr, ) from ..youtube import ( get_media_info as get_youtube_media_info, @@ -39,7 +39,7 @@ from ..choices import ( from ._migrations import ( media_file_storage, get_media_thumb_path, get_media_file_path, ) -from ._private import _srctype_dict, _nfo_element, directory_and_stem +from ._private import _srctype_dict, _nfo_element from .media__tasks import ( download_checklist, download_finished, wait_for_premiere, ) From 8b127afc234f1bddca5198aea53b022c78767b06 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:18:57 -0400 Subject: [PATCH 079/118] Remove the old copy from sync/models/_private.py --- tubesync/sync/models/_private.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tubesync/sync/models/_private.py b/tubesync/sync/models/_private.py index 8cf41ce1..094d3763 100644 --- a/tubesync/sync/models/_private.py +++ b/tubesync/sync/models/_private.py @@ -11,11 +11,3 @@ def _nfo_element(nfo, label, text, /, *, attrs={}, tail='\n', char=' ', indent=2 element.tail = tail + (char * indent) return element -def directory_and_stem(arg_path, /, all_suffixes=False): - filepath = Path(arg_path) - stem = Path(filepath.stem) - while all_suffixes and stem.suffixes and '' != stem.suffix: - stem = Path(stem.stem) - stem = str(stem) - return (filepath.parent, stem,) - From 432bc019d5197b3adcdfaaabc924e533a3e99268 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:23:18 -0400 Subject: [PATCH 080/118] fixup: remove an unused import --- tubesync/sync/models/_private.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/models/_private.py b/tubesync/sync/models/_private.py index 094d3763..5ec14d7c 100644 --- a/tubesync/sync/models/_private.py +++ b/tubesync/sync/models/_private.py @@ -1,4 +1,3 @@ -from pathlib import Path from ..choices import Val, YouTube_SourceType # noqa From dc0edef5624d1dc0bc8c0231c9eb3790a42275d2 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:31:32 -0400 Subject: [PATCH 081/118] Remove `list_of_dictionaries` from sync/utils.py --- tubesync/sync/utils.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 3f62492e..617bccca 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -7,6 +7,7 @@ from pathlib import Path from tempfile import NamedTemporaryFile import requests from PIL import Image +from common.utils import list_of_dictionaries from django.conf import settings from urllib.parse import urlsplit, parse_qs from django.forms import ValidationError @@ -178,17 +179,6 @@ def normalize_codec(codec_str): return result -def list_of_dictionaries(arg_list, arg_function=lambda x: x): - assert callable(arg_function) - if isinstance(arg_list, list): - def _call_func_with_dict(arg_dict): - if isinstance(arg_dict, dict): - return arg_function(arg_dict) - return arg_dict - return (True, list(map(_call_func_with_dict, arg_list)),) - return (False, arg_list,) - - def _url_keys(arg_dict, filter_func): result = {} if isinstance(arg_dict, dict): From f097b6b4e06e33db0b3b5f9840bc6d64d3791ba4 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:38:36 -0400 Subject: [PATCH 082/118] Remove `mkdir_p` from sync/utils.py --- tubesync/sync/utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 617bccca..9b18248b 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -117,14 +117,6 @@ def file_is_editable(filepath): return False -def mkdir_p(arg_path, mode=0o777): - ''' - Reminder: mode only affects the last directory - ''' - dirpath = Path(arg_path) - return dirpath.mkdir(mode=mode, parents=True, exist_ok=True) - - def write_text_file(filepath, filedata): if not isinstance(filedata, str): raise TypeError(f'filedata must be a str, got "{type(filedata)}"') From 0be0aa9e812cf1acc74a68bbf40a4352ccc1af77 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:46:50 -0400 Subject: [PATCH 083/118] Adjust the `mkdir_p` import in sync/models/media.py --- tubesync/sync/models/media.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index 377d87c3..ab21fb9f 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -18,7 +18,7 @@ from common.errors import NoFormatException from common.json import JSONEncoder from common.utils import ( clean_filename, clean_emoji, directory_and_stem, - glob_quote, seconds_to_timestr, + glob_quote, mkdir_p, seconds_to_timestr, ) from ..youtube import ( get_media_info as get_youtube_media_info, @@ -26,7 +26,7 @@ from ..youtube import ( ) from ..utils import ( parse_media_format, filter_response, - write_text_file, mkdir_p, multi_key_sort, + write_text_file, multi_key_sort, ) from ..matching import ( get_best_combined_format, From ff30543a2016a78bfac6da4b59a03395b21cce55 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:55:58 -0400 Subject: [PATCH 084/118] Adjust the `mkdir_p` import in sync/signals.py --- tubesync/sync/signals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index e1a22ee5..f25d6a92 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -10,7 +10,7 @@ from django.utils.translation import gettext_lazy as _ from background_task.signals import task_failed from background_task.models import Task from common.logger import log -from common.utils import glob_quote +from common.utils import glob_quote, mkdir_p from .models import Source, Media, Metadata from .tasks import (delete_task_by_source, delete_task_by_media, index_source_task, download_media_thumbnail, download_media_metadata, @@ -18,7 +18,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta download_media, download_source_images, delete_all_media_for_source, save_all_media_for_source, rename_media, get_media_metadata_task, get_media_download_task) -from .utils import delete_file, mkdir_p +from .utils import delete_file from .filtering import filter_media from .choices import Val, YouTube_SourceType From fbb27eaa5dce8e3d351c6a50b47e21c6827e5c30 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 15:58:11 -0400 Subject: [PATCH 085/118] Adjust the `mkdir_p` import in sync/views.py --- tubesync/sync/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 5f13877f..45236465 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -20,13 +20,13 @@ from django.utils._os import safe_join from django.utils import timezone from django.utils.translation import gettext_lazy as _ from common.timestamp import timestamp_to_datetime -from common.utils import append_uri_params +from common.utils import append_uri_params, mkdir_p from background_task.models import Task, CompletedTask from .models import Source, Media, MediaServer from .forms import (ValidateSourceForm, ConfirmDeleteSourceForm, RedownloadMediaForm, SkipMediaForm, EnableMediaForm, ResetTasksForm, ScheduleTaskForm, ConfirmDeleteMediaServerForm, SourceForm) -from .utils import validate_url, delete_file, multi_key_sort, mkdir_p +from .utils import validate_url, delete_file, multi_key_sort from .tasks import (map_task_to_instance, get_error_message, get_source_completed_tasks, get_media_download_task, delete_task_by_media, index_source_task, From 021988ff9ffcc0e8009af4a82dbe625f23773521 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 16:02:12 -0400 Subject: [PATCH 086/118] Adjust the `mkdir_p` import in sync/youtube.py --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 7afdf337..f8516b69 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -7,6 +7,7 @@ import os from common.logger import log +from common.utils import mkdir_p from copy import deepcopy from pathlib import Path from tempfile import TemporaryDirectory @@ -15,7 +16,6 @@ from urllib.parse import urlsplit, parse_qs from django.conf import settings from .choices import Val, FileExtension from .hooks import postprocessor_hook, progress_hook -from .utils import mkdir_p import yt_dlp import yt_dlp.patch.check_thumbnails import yt_dlp.patch.fatal_http_errors From 1afe0d41539731fb7d16fd5969d8b825b5096a03 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 18:31:48 -0400 Subject: [PATCH 087/118] Adjust the `multi_key_sort` import in sync/matching.py --- tubesync/sync/matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/matching.py b/tubesync/sync/matching.py index 4196a9f8..f5fe3fd1 100644 --- a/tubesync/sync/matching.py +++ b/tubesync/sync/matching.py @@ -6,7 +6,7 @@ from .choices import Val, Fallback -from .utils import multi_key_sort +from common.utils import multi_key_sort from django.conf import settings From 64cd082406ad3e6baf7887af6069e59c71275cb2 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 18:40:05 -0400 Subject: [PATCH 088/118] Remove `multi_key_sort` from sync/utils.py --- tubesync/sync/utils.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 9b18248b..7c8947bb 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -141,21 +141,6 @@ def delete_file(filepath): return False -def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None): - result = list(iterable) - if key_func is None: - # itemgetter is the default - if item or not (item or attr): - key_func = itemgetter - elif attr: - key_func = attrgetter - for key, reverse in reversed(specs): - result.sort(key=key_func(key), reverse=reverse) - if use_reversed: - return list(reversed(result)) - return result - - def normalize_codec(codec_str): result = str(codec_str).upper() parts = result.split('.') From decbe14968a5ce43df89d5f67d10345d64034226 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 18:47:52 -0400 Subject: [PATCH 089/118] Adjust the `multi_key_sort` import in sync/models/media.py --- tubesync/sync/models/media.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index ab21fb9f..b2c79e15 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -18,15 +18,14 @@ from common.errors import NoFormatException from common.json import JSONEncoder from common.utils import ( clean_filename, clean_emoji, directory_and_stem, - glob_quote, mkdir_p, seconds_to_timestr, + glob_quote, mkdir_p, multi_key_sort, seconds_to_timestr, ) from ..youtube import ( get_media_info as get_youtube_media_info, download_media as download_youtube_media, ) from ..utils import ( - parse_media_format, filter_response, - write_text_file, multi_key_sort, + filter_response, parse_media_format, write_text_file, ) from ..matching import ( get_best_combined_format, From ae1974b503c734c1b220a4f577f00cc6c101bdec Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 18:52:09 -0400 Subject: [PATCH 090/118] Adjust the `multi_key_sort` import in sync/views.py --- tubesync/sync/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 45236465..493098cd 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -20,13 +20,13 @@ from django.utils._os import safe_join from django.utils import timezone from django.utils.translation import gettext_lazy as _ from common.timestamp import timestamp_to_datetime -from common.utils import append_uri_params, mkdir_p +from common.utils import append_uri_params, mkdir_p, multi_key_sort from background_task.models import Task, CompletedTask from .models import Source, Media, MediaServer from .forms import (ValidateSourceForm, ConfirmDeleteSourceForm, RedownloadMediaForm, SkipMediaForm, EnableMediaForm, ResetTasksForm, ScheduleTaskForm, ConfirmDeleteMediaServerForm, SourceForm) -from .utils import validate_url, delete_file, multi_key_sort +from .utils import delete_file, validate_url from .tasks import (map_task_to_instance, get_error_message, get_source_completed_tasks, get_media_download_task, delete_task_by_media, index_source_task, From 659e46a31f85535f6d199491958ec806e63719e8 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 18:55:36 -0400 Subject: [PATCH 091/118] fixup: remove unused imports --- tubesync/sync/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 7c8947bb..cbd14eab 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -2,7 +2,6 @@ import os import re import math from copy import deepcopy -from operator import attrgetter, itemgetter from pathlib import Path from tempfile import NamedTemporaryFile import requests From 60aafbdab4c7f4b770be3c0b9ce77f8280b02757 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 19:04:40 -0400 Subject: [PATCH 092/118] fixup: accept the existing calls --- tubesync/common/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 7256b3d8..0c7507e9 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -70,7 +70,7 @@ def glob_quote(filestr, /): return filestr.translate(str.maketrans(_glob_specials)) -def list_of_dictionaries(arg_list, /, *, arg_function=lambda x: x): +def list_of_dictionaries(arg_list, /, arg_function=lambda x: x): assert callable(arg_function) if isinstance(arg_list, list): _map_func = partial(lambda f, d: f(d) if isinstance(d, dict) else d, arg_function) From 54aa509bbff91d61b3c130810ef11c1ebd9f4535 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 2 Jun 2025 20:36:55 -0400 Subject: [PATCH 093/118] Migrate data from indexing to metadata in a task --- tubesync/sync/tasks.py | 51 +++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 289430d8..98312fd1 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -321,6 +321,43 @@ def save_db_batch(qs, objs, fields, /): return num_updated +@background(schedule=dict(priority=20, run_at=60), queue=Val(TaskQueue.DB), remove_existing_tasks=True) +def migrate_to_metadata(media_id): + try: + media = Media.objects.get(pk=media_id) + except Media.DoesNotExist as e: + # Task triggered but the media no longer exists, do nothing + log.error(f'Task migrate_to_metadata(pk={media_id}) called but no ' + f'media exists with ID: {media_id}') + raise InvalidTaskError(_('no such media')) from e + + try: + data = Metadata.objects.get( + media__isnull=True, + source=media.source, + key=media.key, + ) + except Metadata.DoesNotExist as e: + raise InvalidTaskError(_('no indexed data to migrate to metadata')) from e + + video = data.value + fields = lambda f, m: m.get_metadata_field(f) + timestamp = video.get(fields('timestamp', media), None) + for key in ('epoch', 'availability', 'extractor_key',): + field = fields(key, media) + value = video.get(field) + existing_value = media.get_metadata_first_value(key) + if value is None: + if 'epoch' == key: + value = timestamp + elif 'extractor_key' == key: + value = data.site + if value is not None: + if existing_value and ('epoch' == key or value == existing_value): + continue + media.save_to_metadata(field, value) + + @background(schedule=dict(priority=20, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def index_source_task(source_id): ''' @@ -415,19 +452,6 @@ def index_source_task(source_id): 'key', *db_fields_media, ).get_or_create(defaults=media_defaults, source=source, key=key) - for key in ('epoch', 'availability', 'extractor_key',): - field = fields(key, media) - value = video.get(field) - existing_value = media.get_metadata_first_value(key) - if value is None: - if 'epoch' == key: - value = timestamp - elif 'extractor_key' == key: - value = site - if value is not None: - if existing_value and ('epoch' == key or value == existing_value): - continue - media.save_to_metadata(field, value) db_batch_media.append(media) data, new_data = source.videos.defer('value').filter( media__isnull=True, @@ -437,6 +461,7 @@ def index_source_task(source_id): data.retrieved = source.last_crawl data.value = video db_batch_data.append(data) + migrate_to_metadata(str(media.pk)) if not new_media: # update the existing media for key, value in media_defaults.items(): From 3fc2b09a26f6cfa69630863e00b39e262684b75f Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 3 Jun 2025 09:56:15 -0400 Subject: [PATCH 094/118] Remove the `python3-lib` alternative --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7ad61039..ffc08066 100644 --- a/Dockerfile +++ b/Dockerfile @@ -363,8 +363,7 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va xxd \ && \ # Link to the current python3 version - update-alternatives --install /usr/local/lib/python3 python3-lib \ - "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" 100 && \ + ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ # Configure the editor alternatives touch /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \ update-alternatives --install /usr/bin/editor editor /usr/local/bin/babi 50 && \ From f74c22c849a216cc807a97e2d3930349e6044133 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 3 Jun 2025 10:32:28 -0400 Subject: [PATCH 095/118] fixup: building from source on arm64 --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index ffc08066..2dee66ac 100644 --- a/Dockerfile +++ b/Dockerfile @@ -423,6 +423,7 @@ RUN --mount=type=tmpfs,target=/cache \ g++ \ gcc \ libjpeg-dev \ + libonig-dev \ libpq-dev \ libwebp-dev \ make \ @@ -466,6 +467,7 @@ RUN --mount=type=tmpfs,target=/cache \ g++ \ gcc \ libjpeg-dev \ + libonig-dev \ libpq-dev \ libwebp-dev \ make \ From a849367ac86dc75955dc1396f6647941fe6c3ad1 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 3 Jun 2025 10:48:17 -0400 Subject: [PATCH 096/118] Add the library `libonig-dev` depends upon also --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 2dee66ac..2a95f304 100644 --- a/Dockerfile +++ b/Dockerfile @@ -347,6 +347,7 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va apt-get -y --no-install-recommends install \ libjpeg62-turbo \ libmariadb3 \ + libonig5 \ libpq5 \ libwebp7 \ pkgconf \ From 8baf50d98e98db7148a34e0f78ff6c3051103602 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 3 Jun 2025 13:09:56 -0400 Subject: [PATCH 097/118] Check for unresolved Python shared libraries --- Dockerfile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index da1e84c5..61324a9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -459,8 +459,16 @@ RUN --mount=type=tmpfs,target=/cache \ && \ apt-get -y autopurge && \ apt-get -y autoclean && \ + LD_LIBRARY_PATH=/usr/local/lib/python3/dist-packages/pillow.libs:/usr/local/lib/python3/dist-packages/psycopg_binary.libs \ + find /usr/local/lib/python3/dist-packages/ \ + -name '*.so*' -print \ + -exec du -h '{}' ';' \ + -exec ldd '{}' ';' \ + >| /cache/python-shared-objects 2>&1 && \ rm -v -f /var/cache/debconf/*.dat-old && \ - rm -v -rf /tmp/* + rm -v -rf /tmp/* ; \ + grep >/dev/null -Fe ' => not found' /cache/python-shared-objects && \ + cat -v /cache/python-shared-objects || : # Copy root COPY config/root / From 75aa06d495ab6cfb73a28d74a4fdcf609721aab2 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 3 Jun 2025 13:34:25 -0400 Subject: [PATCH 098/118] Stop the build when a shared object is unresolved --- Dockerfile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 61324a9d..353a54ae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -467,8 +467,13 @@ RUN --mount=type=tmpfs,target=/cache \ >| /cache/python-shared-objects 2>&1 && \ rm -v -f /var/cache/debconf/*.dat-old && \ rm -v -rf /tmp/* ; \ - grep >/dev/null -Fe ' => not found' /cache/python-shared-objects && \ - cat -v /cache/python-shared-objects || : + if grep >/dev/null -Fe ' => not found' /cache/python-shared-objects ; \ + then \ + cat -v /cache/python-shared-objects ; \ + printf -- 1>&2 '%s\n' \ + ERROR: ' An unresolved shared object was found.' ; \ + exit 1 ; \ + fi # Copy root COPY config/root / From 77472beb7a53d30823d4085b12b797bac325aed4 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 3 Jun 2025 14:51:05 -0400 Subject: [PATCH 099/118] Remove unnecessary dependencies --- Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index da1e84c5..69dc21fb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -342,10 +342,7 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va # Install dependencies we keep # Install required distro packages apt-get -y --no-install-recommends install \ - libjpeg62-turbo \ libmariadb3 \ - libpq5 \ - libwebp7 \ pkgconf \ python3 \ python3-libsass \ From bfaa76c3ddefccc3320d113f93fd0bab770c6bec Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 4 Jun 2025 04:06:01 -0400 Subject: [PATCH 100/118] Add `TaskQueue.LIMIT` for the new `huey` queue --- tubesync/sync/choices.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/choices.py b/tubesync/sync/choices.py index 92cb6c8f..238aa160 100644 --- a/tubesync/sync/choices.py +++ b/tubesync/sync/choices.py @@ -165,6 +165,7 @@ class TaskQueue(models.TextChoices): DB = 'database', _('Database') FS = 'filesystem', _('Filesystem') NET = 'network', _('Networking') + LIMIT = 'limited', _('Limited Networking') class WeekDay(models.IntegerChoices): From 93adc595ff54ef037ffba9a81cf1ac48141ea5f0 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 4 Jun 2025 15:59:25 -0400 Subject: [PATCH 101/118] Wait on database tasks and add verbose names to the new tasks --- tubesync/sync/tasks.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 98312fd1..90551676 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -57,6 +57,7 @@ def map_task_to_instance(task): because UUID's are incompatible with background_task's "creator" feature. ''' TASK_MAP = { + 'sync.tasks.migrate_to_metadata': Media, 'sync.tasks.index_source_task': Source, 'sync.tasks.check_source_directory_exists': Source, 'sync.tasks.download_media_thumbnail': Media, @@ -358,6 +359,12 @@ def migrate_to_metadata(media_id): media.save_to_metadata(field, value) +@background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.NET), remove_existing_tasks=True) +def wait_for_database_queue(): + while Task.objects.unlocked(timezone.now()).filter(queue=Val(TaskQueue.DB)).count() > 0: + time.sleep(5) + + @background(schedule=dict(priority=20, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def index_source_task(source_id): ''' @@ -391,6 +398,9 @@ def index_source_task(source_id): # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() save_model(source) + wait_for_database_queue( + verbose_name=_('Waiting for database tasks to complete'), + ) delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk) num_videos = len(videos) log.info(f'Found {num_videos} media items for source: {source}') @@ -461,7 +471,11 @@ def index_source_task(source_id): data.retrieved = source.last_crawl data.value = video db_batch_data.append(data) - migrate_to_metadata(str(media.pk)) + vn_fmt = _('Updating metadata from indexing results for: "{}": {}') + migrate_to_metadata( + str(media.pk), + verbose_name=vn_fmt.format(media.key, media.name), + ) if not new_media: # update the existing media for key, value in media_defaults.items(): From 3c85d5531eed9e2189041d07d8f95d2c3bcbc491 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 5 Jun 2025 03:29:57 -0400 Subject: [PATCH 102/118] Re-work the re-use of old metadata after deletion --- tubesync/sync/signals.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index d68a082f..8f705944 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -4,7 +4,7 @@ from tempfile import TemporaryDirectory from django.conf import settings from django.db import IntegrityError from django.db.models.signals import pre_save, post_save, pre_delete, post_delete -from django.db.transaction import on_commit +from django.db.transaction import atomic, on_commit from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from background_task.signals import task_failed @@ -433,13 +433,16 @@ def media_post_delete(sender, instance, **kwargs): key=skipped_media.key, ) try: - instance_qs.update(media=skipped_media) + if instance_qs.count(): + with atomic(durable=False): + instance_qs.update(media=skipped_media) except IntegrityError: - # Delete the new metadata - Metadata.objects.filter(media=skipped_media).delete() try: - instance_qs.update(media=skipped_media) - except IntegrityError: - # Delete the old metadata if it still failed + with atomic(durable=False): + # Delete the new metadata + Metadata.objects.filter(media=skipped_media).delete() + instance_qs.update(media=skipped_media) + finally: + # Delete the old metadata, if it wasn't used instance_qs.delete() From 587461978d0c6039d83d3babf8ce4eae164aa1f5 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 5 Jun 2025 04:03:01 -0400 Subject: [PATCH 103/118] Delay `save_all_media_for_source` while database tasks run --- tubesync/sync/tasks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 90551676..d7c71a5b 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -401,6 +401,11 @@ def index_source_task(source_id): wait_for_database_queue( verbose_name=_('Waiting for database tasks to complete'), ) + wait_for_database_queue( + priority=29, # the checking task uses 30 + queue=Val(TaskQueue.FS), + verbose_name=_('Delaying checking all media for database tasks'), + ) delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk) num_videos = len(videos) log.info(f'Found {num_videos} media items for source: {source}') From 2dae3b087e830fc16ceb533ef3b46beeb495e653 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 5 Jun 2025 04:15:45 -0400 Subject: [PATCH 104/118] Delay additional indexing tasks while database tasks run --- tubesync/sync/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d7c71a5b..1fb68d50 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -399,6 +399,7 @@ def index_source_task(source_id): source.last_crawl = timezone.now() save_model(source) wait_for_database_queue( + priority=19, # the indexing task uses 20 verbose_name=_('Waiting for database tasks to complete'), ) wait_for_database_queue( From 2fef103ea04a2cf051f8877ed0d1d900efd83315 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 5 Jun 2025 13:16:33 -0400 Subject: [PATCH 105/118] Don't remove based on `task_hash` --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 1fb68d50..e0d7cd50 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -359,7 +359,7 @@ def migrate_to_metadata(media_id): media.save_to_metadata(field, value) -@background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.NET), remove_existing_tasks=True) +@background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.NET), remove_existing_tasks=False) def wait_for_database_queue(): while Task.objects.unlocked(timezone.now()).filter(queue=Val(TaskQueue.DB)).count() > 0: time.sleep(5) From 4cffc94aecc32a1519dce9991ab8c5082988b3d4 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 6 Jun 2025 04:44:30 -0400 Subject: [PATCH 106/118] Retry the task when the queue worker is stopped --- tubesync/sync/tasks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index e0d7cd50..11ec02a4 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -361,8 +361,11 @@ def migrate_to_metadata(media_id): @background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.NET), remove_existing_tasks=False) def wait_for_database_queue(): + worker_down_path = Path('/run/service/tubesync-db-worker/down') while Task.objects.unlocked(timezone.now()).filter(queue=Val(TaskQueue.DB)).count() > 0: time.sleep(5) + if worker_down_path.exists() and worker_down_path.is_file(): + raise Exception(_('queue worker stopped')) @background(schedule=dict(priority=20, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) From 687957f21fbd6ff281b6a26bb88cf88cccc949d4 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 6 Jun 2025 04:50:20 -0400 Subject: [PATCH 107/118] Add `BgTaskWorkerError` exception --- tubesync/common/errors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubesync/common/errors.py b/tubesync/common/errors.py index 9ff44a48..17ebec40 100644 --- a/tubesync/common/errors.py +++ b/tubesync/common/errors.py @@ -42,3 +42,8 @@ class DatabaseConnectionError(Exception): Raised when parsing or initially connecting to a database. ''' pass + + +class BgTaskWorkerError(Exception): + # Raised when the worker process is not in a normal working state. + pass From 7b96b33ed2068447f70f892cdd5e69ddb5232f4d Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 6 Jun 2025 04:54:13 -0400 Subject: [PATCH 108/118] Use the new `BgTaskWorkerError` exception --- tubesync/sync/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 11ec02a4..46e688ff 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -28,9 +28,9 @@ from background_task import background from background_task.exceptions import InvalidTaskError from background_task.models import Task, CompletedTask from common.logger import log -from common.errors import ( NoFormatException, NoMediaException, - NoThumbnailException, - DownloadFailedException, ) +from common.errors import ( BgTaskWorkerError, DownloadFailedException, + NoFormatException, NoMediaException, + NoThumbnailException, ) from common.utils import ( django_queryset_generator as qs_gen, remove_enclosed, ) from .choices import Val, TaskQueue @@ -365,7 +365,7 @@ def wait_for_database_queue(): while Task.objects.unlocked(timezone.now()).filter(queue=Val(TaskQueue.DB)).count() > 0: time.sleep(5) if worker_down_path.exists() and worker_down_path.is_file(): - raise Exception(_('queue worker stopped')) + raise BgTaskWorkerError(_('queue worker stopped')) @background(schedule=dict(priority=20, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) From ce2dd7bbea3cb2fa0ca0b800dc349e7adaf0ce2d Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 7 Jun 2025 02:07:27 -0400 Subject: [PATCH 109/118] Handle multiple matches for metadata when searching by key --- tubesync/sync/signals.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 8f705944..ee34a88f 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -435,14 +435,33 @@ def media_post_delete(sender, instance, **kwargs): try: if instance_qs.count(): with atomic(durable=False): - instance_qs.update(media=skipped_media) + # clear the link to a media instance + Metadata.objects.filter(media=skipped_media).update(media=None) + # choose the oldest metadata for our key + md = instance_qs.filter( + key=skipped_media.key, + ).order_by( + 'key', + 'created', + ).first() + # set the link to a media instance only on our selected metadata + log.info('Reusing old metadata for "{}": {}', skipped_media.key, skipped_media.name) + instance_qs.filter(uuid=md.uuid).update(media=skipped_media) + # delete any metadata that we are no longer using + instance_qs.exclude(uuid=md.uuid).delete() + except IntegrityError: + # this probably won't happen, but try it without a transaction try: - with atomic(durable=False): - # Delete the new metadata - Metadata.objects.filter(media=skipped_media).delete() - instance_qs.update(media=skipped_media) + # clear the link to a media instance + Metadata.objects.filter(media=skipped_media).update(media=None) + # keep one metadata + md = instance_qs.order_by('created').first() + instance_qs.filter(uuid=md.uuid).update(media=skipped_media) + except IntegrityError as e: + log.exception('media_post_delete: could not update selected metadata: {}', e) finally: - # Delete the old metadata, if it wasn't used + log.debug('Deleting metadata for "{}": {}', skipped_media.key, skipped_media.pk) + # delete the old metadata instance_qs.delete() From af4f242b05935a2633a8c2c7ce9b2a5f4fc99368 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 7 Jun 2025 19:28:18 -0400 Subject: [PATCH 110/118] Use `timedelta` to avoid `ValueError` --- tubesync/sync/models/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index 06d652a0..ca74e682 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -404,7 +404,7 @@ class Source(db.models.Model): ) elif Val(IndexSchedule.EVERY_7_DAYS) > self.index_schedule: self.target_schedule = advance_hour( - when.replace(hour=1+when.hour), + when + timezone.timedelta(hours=1), self.target_schedule.hour, ) From 3c5cd48517e8c07ae8b7d76e798fc022d55a200d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 8 Jun 2025 06:10:25 -0400 Subject: [PATCH 111/118] Add `timedelta` filter --- tubesync/sync/templatetags/filters.py | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tubesync/sync/templatetags/filters.py b/tubesync/sync/templatetags/filters.py index 444969e9..d40adf08 100644 --- a/tubesync/sync/templatetags/filters.py +++ b/tubesync/sync/templatetags/filters.py @@ -1,5 +1,6 @@ from django import template from django.template.defaultfilters import filesizeformat +from math import ceil register = template.Library() @@ -23,3 +24,52 @@ def sub(value, arg): except Exception: return "" + +@register.filter +def timedelta(value, arg=None, /, *, fmt_2=None): + if hasattr(value, 'total_seconds') and callable(value.total_seconds): + seconds_total = value.total_seconds() + elif hasattr(value, 'seconds'): + seconds_total = value.seconds + (value.days * 24 * 60 * 60) + else: + seconds_total = value + + if arg is None: + arg = '{days_total} days, {hours2}:{minutes2}:{seconds2}' + if seconds_total < 1.0: + arg = f'{seconds_total:.6f} seconds' + + if fmt_2 is None: + fmt_2 = '{:02d}' + + seconds_total = ceil(seconds_total) + seconds = seconds_total % 60 + + minutes_total = seconds_total // 60 + minutes = minutes_total % 60 + + hours_total = minutes_total // 60 + hours = hours_total % 24 + + days_total = hours_total // 24 + days = days_total % 365 + + years_total = days_total // 365 + years = years_total + + return arg.format(**{ + 'seconds': seconds, + 'seconds2': fmt_2.format(seconds), + 'minutes': minutes, + 'minutes2': fmt_2.format(minutes), + 'hours': hours, + 'hours2': fmt_2.format(hours), + 'days': days, + 'years': years, + 'seconds_total': seconds_total, + 'minutes_total': minutes_total, + 'hours_total': hours_total, + 'days_total': days_total, + 'years_total': years_total, + }) + From 5838ed52f9f05743838ea4d549c961686deec35d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 8 Jun 2025 06:25:08 -0400 Subject: [PATCH 112/118] fixup: remove white-space --- tubesync/sync/templatetags/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templatetags/filters.py b/tubesync/sync/templatetags/filters.py index d40adf08..73d9d05b 100644 --- a/tubesync/sync/templatetags/filters.py +++ b/tubesync/sync/templatetags/filters.py @@ -41,7 +41,7 @@ def timedelta(value, arg=None, /, *, fmt_2=None): if fmt_2 is None: fmt_2 = '{:02d}' - + seconds_total = ceil(seconds_total) seconds = seconds_total % 60 From 54932f4951d5b770767f1ca3c0776e6df00ec694 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 8 Jun 2025 06:46:59 -0400 Subject: [PATCH 113/118] By default, display years and days only when needed --- tubesync/sync/templatetags/filters.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/templatetags/filters.py b/tubesync/sync/templatetags/filters.py index 73d9d05b..e619b766 100644 --- a/tubesync/sync/templatetags/filters.py +++ b/tubesync/sync/templatetags/filters.py @@ -35,9 +35,10 @@ def timedelta(value, arg=None, /, *, fmt_2=None): seconds_total = value if arg is None: - arg = '{days_total} days, {hours2}:{minutes2}:{seconds2}' if seconds_total < 1.0: - arg = f'{seconds_total:.6f} seconds' + return f'{seconds_total:.6f} seconds' + dynamic_arg = True + arg = '{hours2}:{minutes2}:{seconds2}' if fmt_2 is None: fmt_2 = '{:02d}' @@ -57,6 +58,16 @@ def timedelta(value, arg=None, /, *, fmt_2=None): years_total = days_total // 365 years = years_total + if dynamic_arg: + prefix_years = prefix_days = '' + if years_total > 0: + prefix_years = '{years_total} years, ' + if prefix_years and days_total > 0: + prefix_days = '{days} days, ' + elif days_total > 0: + prefix_days = '{total_days} days, ' + arg = prefix_years + prefix_days + arg + return arg.format(**{ 'seconds': seconds, 'seconds2': fmt_2.format(seconds), From 9575185bbd25bd074c0f982c0afc2520247e259d Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 8 Jun 2025 06:50:47 -0400 Subject: [PATCH 114/118] fixup: set a default value for the local --- tubesync/sync/templatetags/filters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/templatetags/filters.py b/tubesync/sync/templatetags/filters.py index e619b766..46c011c1 100644 --- a/tubesync/sync/templatetags/filters.py +++ b/tubesync/sync/templatetags/filters.py @@ -34,6 +34,7 @@ def timedelta(value, arg=None, /, *, fmt_2=None): else: seconds_total = value + dynamic_arg = False if arg is None: if seconds_total < 1.0: return f'{seconds_total:.6f} seconds' From 4f79e64088bf10110bd2f1e0c264a33d4d3a12a8 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 8 Jun 2025 07:05:21 -0400 Subject: [PATCH 115/118] fixup: use the correct variable --- tubesync/sync/templatetags/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templatetags/filters.py b/tubesync/sync/templatetags/filters.py index 46c011c1..ee8f9aa2 100644 --- a/tubesync/sync/templatetags/filters.py +++ b/tubesync/sync/templatetags/filters.py @@ -66,7 +66,7 @@ def timedelta(value, arg=None, /, *, fmt_2=None): if prefix_years and days_total > 0: prefix_days = '{days} days, ' elif days_total > 0: - prefix_days = '{total_days} days, ' + prefix_days = '{days_total} days, ' arg = prefix_years + prefix_days + arg return arg.format(**{ From ac34dabbf4faecf8e324f701c2945e74b8623f88 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 9 Jun 2025 08:19:43 -0400 Subject: [PATCH 116/118] Resolve bundles in restart_services.sh --- tubesync/restart_services.sh | 53 ++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/tubesync/restart_services.sh b/tubesync/restart_services.sh index bb34d809..abb372c6 100755 --- a/tubesync/restart_services.sh +++ b/tubesync/restart_services.sh @@ -1,26 +1,51 @@ #!/usr/bin/env sh -dir='/run/service' +_dir='/run/service' svc_path() ( - cd "${dir}" + cd "${_dir}" && realpath -e -s "$@" ) +_bundles="$( + find '/etc/s6-overlay/s6-rc.d' -mindepth 2 -maxdepth 2 \ + -name 'type' \ + -execdir grep -F -q -e bundle '{}' ';' \ + -printf '%P\n' | \ + sed -e 's,/type$,,' ; +)" +is_a_bundle() { + local bundle + for bundle in ${_bundles} + do + if [ "$1" = "${bundle}" ] + then + return 0 + fi + done + return 1 +} + if [ 0 -eq $# ] then - set -- \ - $( cd "${dir}" && svc_path tubesync*-worker ) \ - "$( svc_path gunicorn )" \ - "$( svc_path nginx )" + set -- $(/command/s6-rc list user | grep -v -e '-init$') fi -for service in $( svc_path "$@" ) +for arg in "$@" do - printf -- 'Restarting %-28s' "${service#${dir}/}..." - _began="$( date '+%s' )" - /command/s6-svc -wr -r "${service}" - _ended="$( date '+%s' )" - printf -- '\tcompleted (in %2.1d seconds).\n' \ - "$( expr "${_ended}" - "${_began}" )" + _svcs="${arg}" + if is_a_bundle "${arg}" + then + _svcs="$(/command/s6-rc list "${arg}" | grep -v -e '-init$')" + fi + for service in $(svc_path ${_svcs}) + do + printf -- 'Restarting %-28s' "${service#${dir}/}..." + _began="$( date '+%s' )" + /command/s6-svc -wr -r "${service}" + _ended="$( date '+%s' )" + printf -- '\tcompleted (in %2.1d seconds).\n' \ + "$( expr "${_ended}" - "${_began}" )" + done done -unset -v _began _ended service +unset -v _began _ended _svcs arg service +unset -v _bundles _dir From 032bd15d79e369add69a18f0d919b6645426b2a2 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 9 Jun 2025 08:28:18 -0400 Subject: [PATCH 117/118] fixup: missed a dir => _dir change --- tubesync/restart_services.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/restart_services.sh b/tubesync/restart_services.sh index abb372c6..3c1d981b 100755 --- a/tubesync/restart_services.sh +++ b/tubesync/restart_services.sh @@ -39,7 +39,7 @@ do fi for service in $(svc_path ${_svcs}) do - printf -- 'Restarting %-28s' "${service#${dir}/}..." + printf -- 'Restarting %-28s' "${service#${_dir}/}..." _began="$( date '+%s' )" /command/s6-svc -wr -r "${service}" _ended="$( date '+%s' )" From 1fd32296a3bb3b9758f7f6956954552b36d41ebf Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 9 Jun 2025 10:45:20 -0400 Subject: [PATCH 118/118] Use f-string for logged message --- tubesync/sync/signals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index ee34a88f..291eaf89 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -445,7 +445,7 @@ def media_post_delete(sender, instance, **kwargs): 'created', ).first() # set the link to a media instance only on our selected metadata - log.info('Reusing old metadata for "{}": {}', skipped_media.key, skipped_media.name) + log.info(f'Reusing old metadata for "{skipped_media.key}": {skipped_media.name}') instance_qs.filter(uuid=md.uuid).update(media=skipped_media) # delete any metadata that we are no longer using instance_qs.exclude(uuid=md.uuid).delete() @@ -459,9 +459,9 @@ def media_post_delete(sender, instance, **kwargs): md = instance_qs.order_by('created').first() instance_qs.filter(uuid=md.uuid).update(media=skipped_media) except IntegrityError as e: - log.exception('media_post_delete: could not update selected metadata: {}', e) + log.exception(f'media_post_delete: could not update selected metadata: {e}') finally: - log.debug('Deleting metadata for "{}": {}', skipped_media.key, skipped_media.pk) + log.debug(f'Deleting metadata for "{skipped_media.key}": {skipped_media.pk}') # delete the old metadata instance_qs.delete()