From 8a911d0a7cfea51f452caf554a13b8149856e7eb Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 9 Apr 2025 08:25:41 -0400 Subject: [PATCH 01/14] Special case `sqlite` to not use transactions --- tubesync/sync/tasks.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3b02e029..73789caa 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -16,7 +16,7 @@ from PIL import Image from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile -from django.db import DatabaseError, IntegrityError +from django.db import connection, DatabaseError, IntegrityError from django.db.transaction import atomic from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -200,6 +200,16 @@ def migrate_queues(): return qs.update(queue=Val(TaskQueue.NET)) +def save_model(instance): + if 'sqlite' == connection.vendor: + # a transaction here causes too many + # database is locked errors + instance.save() + else: + with atomic(): + instance.save() + + def schedule_media_servers_update(): with atomic(): # Schedule a task to update media servers @@ -261,7 +271,7 @@ def index_source_task(source_id): # Reset any errors # TODO: determine if this affects anything source.has_failed = False - source.save() + save_model(source) # Index the source videos = source.index_media() if not videos: @@ -272,7 +282,7 @@ def index_source_task(source_id): f'is reachable') # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() - source.save() + save_model(source) num_videos = len(videos) log.info(f'Found {num_videos} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) @@ -303,7 +313,7 @@ def index_source_task(source_id): if published_dt is not None: media.published = published_dt try: - media.save() + save_model(media) except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') else: @@ -477,7 +487,7 @@ def download_media_metadata(media_id): media.duration = media.metadata_duration # Don't filter media here, the post_save signal will handle that - media.save() + save_model(media) log.info(f'Saved {len(media.metadata)} bytes of metadata for: ' f'{source} / {media}: {media_id}') @@ -606,7 +616,7 @@ def download_media(media_id): media.downloaded_hdr = cformat['is_hdr'] else: media.downloaded_format = 'audio' - media.save() + save_model(media) # If selected, copy the thumbnail over as well if media.source.copy_thumbnails: if not media.thumb_file_exists: @@ -704,8 +714,7 @@ def save_all_media_for_source(source_id): for mn, media in enumerate(mqs, start=1): if media.uuid not in saved_later: update_task_status(task, tvn_format.format(mn)) - with atomic(): - media.save() + save_model(media) # Reset task.verbose_name to the saved value update_task_status(task, None) @@ -722,8 +731,7 @@ def refesh_formats(media_id): log.debug(f'Failed to refresh formats for: {media.source} / {media.key}: {e!s}') pass else: - with atomic(): - media.save() + save_model(media) @background(schedule=dict(priority=20, run_at=60), queue=Val(TaskQueue.FS), remove_existing_tasks=True) @@ -780,17 +788,16 @@ def wait_for_media_premiere(media_id): return now = timezone.now() if media.published < now: + # the download tasks start after the media is saved media.manual_skip = False media.skip = False - # start the download tasks - media.save() else: media.manual_skip = True media.title = _(f'Premieres in {hours(media.published - now)} hours') - media.save() task = get_media_premiere_task(media_id) if task: update_task_status(task, f'available in {hours(media.published - now)} hours') + save_model(media) @background(schedule=dict(priority=1, run_at=300), queue=Val(TaskQueue.FS), remove_existing_tasks=False) def delete_all_media_for_source(source_id, source_name): From 57b7502a7bf517ad50cadf55ead9894a76ffb6a4 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 15 Apr 2025 17:47:52 -0400 Subject: [PATCH 02/14] Update README.md Link to the environment variable anchor. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3eefaef8..1bc07e07 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ and less common features: > Enabling this feature by default is planned in an upcoming release, after `2025-006-01`. > > To prevent your installation from scheduling media file renaming tasks, -> you must set `TUBESYNC_RENAME_ALL_SOURCES=False` in the environment variables. +> you must set [`TUBESYNC_RENAME_ALL_SOURCES=False`](#advanced-configuration) in the environment variables. ### 2. Index frequency From 7fd87fbb8903c63781161f7ff767e07404668640 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 15 Apr 2025 17:51:53 -0400 Subject: [PATCH 03/14] Mention `settings.py` setting --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1bc07e07..f506d76e 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ and less common features: > Enabling this feature by default is planned in an upcoming release, after `2025-006-01`. > > To prevent your installation from scheduling media file renaming tasks, -> you must set [`TUBESYNC_RENAME_ALL_SOURCES=False`](#advanced-configuration) in the environment variables. +> you must set [`TUBESYNC_RENAME_ALL_SOURCES=False`](#advanced-configuration) in the environment variables or `RENAME_ALL_SOURCES = False` in `settings.py`. ### 2. Index frequency From cf51901bee5b9a12d1ca96b3a177d0f483339904 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 15 Apr 2025 17:55:45 -0400 Subject: [PATCH 04/14] Link to `settings.py` --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f506d76e..1f3f4408 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ and less common features: > Enabling this feature by default is planned in an upcoming release, after `2025-006-01`. > > To prevent your installation from scheduling media file renaming tasks, -> you must set [`TUBESYNC_RENAME_ALL_SOURCES=False`](#advanced-configuration) in the environment variables or `RENAME_ALL_SOURCES = False` in `settings.py`. +> you must set [`TUBESYNC_RENAME_ALL_SOURCES=False`](#advanced-configuration) in the environment variables or `RENAME_ALL_SOURCES = False` in [`settings.py`](blob/1fc0462c11741621350053144ab19cba5f266cb2/tubesync/tubesync/settings.py#L183). ### 2. Index frequency From 2a5bf30ab6b594dab3adb4dcd8248bb8afc857eb Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 16 Apr 2025 02:36:43 -0400 Subject: [PATCH 05/14] Adjust the relative link in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f3f4408..e7568535 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ and less common features: > Enabling this feature by default is planned in an upcoming release, after `2025-006-01`. > > To prevent your installation from scheduling media file renaming tasks, -> you must set [`TUBESYNC_RENAME_ALL_SOURCES=False`](#advanced-configuration) in the environment variables or `RENAME_ALL_SOURCES = False` in [`settings.py`](blob/1fc0462c11741621350053144ab19cba5f266cb2/tubesync/tubesync/settings.py#L183). +> you must set [`TUBESYNC_RENAME_ALL_SOURCES=False`](#advanced-configuration) in the environment variables or `RENAME_ALL_SOURCES = False` in [`settings.py`](../1fc0462c11741621350053144ab19cba5f266cb2/tubesync/tubesync/settings.py#L183). ### 2. Index frequency From d60a159af7405548249ebf856d5d3c882291b7d2 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 02:48:46 -0400 Subject: [PATCH 06/14] Add delay for SQLite to allow for more interleaving --- tubesync/sync/tasks.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index cb2b0a70..2c654749 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -7,6 +7,8 @@ import os import json import math +import random +import time import uuid from io import BytesIO from hashlib import sha1 @@ -17,7 +19,8 @@ from PIL import Image from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile -from django.db import connection, reset_queries, DatabaseError, IntegrityError +from django.db import reset_queries, DatabaseError, IntegrityError +from django.db.connection import vendor as db_vendor from django.db.transaction import atomic from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -203,10 +206,12 @@ def migrate_queues(): def save_model(instance): - if 'sqlite' == connection.vendor: + if 'sqlite' == db_vendor: # a transaction here causes too many # database is locked errors + # with atomic(): instance.save() + time.sleep(random.expovariate(1.5)) else: with atomic(): instance.save() @@ -752,6 +757,14 @@ def save_all_media_for_source(source_id): ) saved_later.add(media.uuid) + # Keep out of the way of the index task! + # SQLite will be locked for a while if we start + # a large source, which reschedules a more costly task. + if 'sqlite' == db_vendor: + index_task = get_source_index_task(source_id) + if index_task and index_task.locked_by_pid_running(): + raise Exception(_('Indexing not completed')) + # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated tvn_format = '2/{:,}' + f'/{save_qs.count():,}' From 09969afa6e42c5ee37c9cad46cac8e3b443e48e7 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 02:53:46 -0400 Subject: [PATCH 07/14] Import and set the variable --- tubesync/sync/tasks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 2c654749..3c461e38 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -16,11 +16,11 @@ from pathlib import Path from datetime import datetime, timedelta from shutil import copyfile, rmtree from PIL import Image +from django import db from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile from django.db import reset_queries, DatabaseError, IntegrityError -from django.db.connection import vendor as db_vendor from django.db.transaction import atomic from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -38,6 +38,8 @@ from .utils import ( get_remote_image, resize_image_to_height, delete_file, write_text_file, filter_response, ) from .youtube import YouTubeError +db_vendor = db.connection.vendor + def get_hash(task_name, pk): ''' From 4bf975bf92d1c95dde265a66b7492c415326e03b Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 03:03:00 -0400 Subject: [PATCH 08/14] Move the SQLite case to the end of the function --- tubesync/sync/tasks.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 3c461e38..17696295 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -208,15 +208,16 @@ def migrate_queues(): def save_model(instance): - if 'sqlite' == db_vendor: - # a transaction here causes too many - # database is locked errors - # with atomic(): - instance.save() - time.sleep(random.expovariate(1.5)) - else: + if 'sqlite' != db_vendor: with atomic(): instance.save() + return + + # work around for SQLite and its many + # "database is locked" errors + with atomic(): + instance.save() + time.sleep(random.expovariate(1.5)) @atomic(durable=False) From 3b5c7766347c64e761551ba5238cea73ea11aa41 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 03:06:17 -0400 Subject: [PATCH 09/14] Do not interfere with the exception handling --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 17696295..d19b9f70 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -332,7 +332,7 @@ def index_source_task(source_id): if published_dt is not None: media.published = published_dt try: - save_model(media) + media.save() except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') else: From b1e7f0eed2cffbad6bea28b2d7183071ca67879f Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 03:11:14 -0400 Subject: [PATCH 10/14] Be explicit about not using durable with atomic --- tubesync/sync/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d19b9f70..34a723d2 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -209,13 +209,13 @@ def migrate_queues(): def save_model(instance): if 'sqlite' != db_vendor: - with atomic(): + with atomic(durable=False): instance.save() return # work around for SQLite and its many # "database is locked" errors - with atomic(): + with atomic(durable=False): instance.save() time.sleep(random.expovariate(1.5)) From 035eeea54cfc97613ff8e8e97b55d6f0dc57dde4 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 03:24:05 -0400 Subject: [PATCH 11/14] Add `SQLITE_DELAY_FLOAT` setting --- tubesync/tubesync/local_settings.py.container | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index 4f386b66..bcd70330 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -59,6 +59,12 @@ else: } DATABASE_CONNECTION_STR = f'sqlite at "{DATABASES["default"]["NAME"]}"' + # the argument to random.expovariate(), + # a larger value means less delay + # with too little delay, you may see + # more "database is locked" errors + SQLITE_DELAY_FLOAT = 1.5 + DEFAULT_THREADS = 1 BACKGROUND_TASK_ASYNC_THREADS = getenv('TUBESYNC_WORKERS', DEFAULT_THREADS, integer=True) From 134231fda7d5033614b12f5cd0bfe260e4482816 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 03:27:17 -0400 Subject: [PATCH 12/14] Use the new setting --- tubesync/sync/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 34a723d2..87cf15a7 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -217,7 +217,8 @@ def save_model(instance): # "database is locked" errors with atomic(durable=False): instance.save() - time.sleep(random.expovariate(1.5)) + arg = getattr(settings, 'SQLITE_DELAY_FLOAT', 1.5) + time.sleep(random.expovariate(arg)) @atomic(durable=False) From 142e93500f8f3ad1dea3909b56eafed6b5ae1740 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 03:31:10 -0400 Subject: [PATCH 13/14] Use the db import for reset_queries --- tubesync/sync/tasks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 87cf15a7..2b055ce5 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -20,7 +20,7 @@ from django import db from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile -from django.db import reset_queries, DatabaseError, IntegrityError +from django.db import DatabaseError, IntegrityError from django.db.transaction import atomic from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -276,7 +276,7 @@ def index_source_task(source_id): ''' Indexes media available from a Source object. ''' - reset_queries() + db.reset_queries() cleanup_completed_tasks() # deleting expired media should happen any time an index task is requested cleanup_old_media() @@ -716,7 +716,7 @@ def save_all_media_for_source(source_id): source has its parameters changed and all media needs to be checked to see if its download status has changed. ''' - reset_queries() + db.reset_queries() try: source = Source.objects.get(pk=source_id) except Source.DoesNotExist as e: From a1495d27f0d211406cd045adfe8822bede0dd336 Mon Sep 17 00:00:00 2001 From: tcely Date: Thu, 17 Apr 2025 12:45:49 -0400 Subject: [PATCH 14/14] Update local_settings.py.container --- tubesync/tubesync/local_settings.py.container | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index bcd70330..8b61692b 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -63,7 +63,7 @@ else: # a larger value means less delay # with too little delay, you may see # more "database is locked" errors - SQLITE_DELAY_FLOAT = 1.5 + SQLITE_DELAY_FLOAT = 5 DEFAULT_THREADS = 1