From 6ae2676b8ff149fdd45ea2027d8eb41ab1361d47 Mon Sep 17 00:00:00 2001 From: Richard Hyde Date: Thu, 19 Dec 2024 18:18:33 +0000 Subject: [PATCH 01/42] Ignore case when checking the filter --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2f116356..19fe409a 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -597,7 +597,7 @@ class Source(models.Model): def is_regex_match(self, media_item_title): if not self.filter_text: return True - return bool(re.search(self.filter_text, media_item_title)) + return bool(re.search(self.filter_text, media_item_title, re.I)) def get_index(self, type): indexer = self.INDEXERS.get(self.source_type, None) From 435885ff61d72479b15d6e4079b4289e00bb69a1 Mon Sep 17 00:00:00 2001 From: Richard Hyde Date: Thu, 19 Dec 2024 18:39:19 +0000 Subject: [PATCH 02/42] added sync.tasks.download_media_metadata to the task map --- tubesync/sync/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 37983932..767820ea 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -49,6 +49,7 @@ def map_task_to_instance(task): 'sync.tasks.check_source_directory_exists': Source, 'sync.tasks.download_media_thumbnail': Media, 'sync.tasks.download_media': Media, + 'sync.tasks.download_media_metadata': Media, 'sync.tasks.save_all_media_for_source': Source, } MODEL_URL_MAP = { From ca61df2e0bb4d07bc518d8f70dd4521ccdd1fe8c Mon Sep 17 00:00:00 2001 From: Richard Hyde Date: Thu, 19 Dec 2024 18:42:14 +0000 Subject: [PATCH 03/42] don't add the sync.tasks.download_media_metadata task if the video is skipped or there's already a task running --- tubesync/sync/signals.py | 4 ++-- tubesync/sync/tasks.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 9c541e0a..aba445d0 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -12,7 +12,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta download_media_thumbnail, download_media_metadata, map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images, - save_all_media_for_source) + save_all_media_for_source, get_media_metadata_task) from .utils import delete_file from .filtering import filter_media @@ -133,7 +133,7 @@ def media_post_save(sender, instance, created, **kwargs): instance.save() post_save.connect(media_post_save, sender=Media) # If the media is missing metadata schedule it to be downloaded - if not instance.metadata: + if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk): log.info(f'Scheduling task to download metadata for: {instance.url}') verbose_name = _('Downloading metadata for "{}"') download_media_metadata( diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 767820ea..9f3eea57 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -118,6 +118,12 @@ def get_media_download_task(media_id): except IndexError: return False +def get_media_metadata_task(media_id): + try: + return Task.objects.get_task('sync.tasks.download_media_metadata', + args=(str(media_id),))[0] + except IndexError: + return False def delete_task_by_source(task_name, source_id): return Task.objects.filter(task_name=task_name, queue=str(source_id)).delete() From 919d933d4d18041741f8d434b0d6e614ea750fc8 Mon Sep 17 00:00:00 2001 From: Richard Hyde Date: Thu, 19 Dec 2024 18:44:35 +0000 Subject: [PATCH 04/42] mark the video as skipped if the sync.tasks.download_media_metadata task fails --- tubesync/sync/signals.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index aba445d0..cd8cf621 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -102,6 +102,10 @@ def task_task_failed(sender, task_id, completed_task, **kwargs): obj.has_failed = True obj.save() + if isinstance(obj, Media) and completed_task.task_name == "sync.tasks.download_media_metadata": + log.error(f'Permanent failure for media: {obj} task: {completed_task}') + obj.skip = True + obj.save() @receiver(post_save, sender=Media) def media_post_save(sender, instance, created, **kwargs): From 394f937fcbd6008a7466c7645fd72ab691cfa7ee Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 20 Dec 2024 10:07:24 -0500 Subject: [PATCH 05/42] Handle raise_no_formats exception Catching this and checking the message may be the best way to skip members only videos. --- tubesync/sync/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index d51c7b45..33cdb795 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -83,6 +83,7 @@ def get_media_info(url): ''' opts = get_yt_opts() opts.update({ + 'ignore_no_formats_error': False, 'skip_download': True, 'forcejson': True, 'simulate': True, @@ -93,6 +94,11 @@ def get_media_info(url): with yt_dlp.YoutubeDL(opts) as y: try: response = y.extract_info(url, download=False) + except yt_dlp.utils.ExtractorError as e: + if not e.expected: + raise e + log.warn(e.msg) + pass except yt_dlp.utils.DownloadError as e: raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e if not response: From 16e86ba79a90dcdfce1b4adeb5251e9de606045e Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 05:55:48 -0500 Subject: [PATCH 06/42] Accept limited metadata for subscriber_only videos --- tubesync/sync/youtube.py | 45 +++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 33cdb795..6911eabd 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -5,6 +5,7 @@ import os +import json from pathlib import Path from django.conf import settings from copy import copy @@ -74,6 +75,28 @@ def get_channel_image_info(url): raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e +def _subscriber_only(msg='', response=None): + if response is None: + # process msg only + msg = str(msg) + if 'access to members-only content' in msg: + return True + if ': Join this channel' in msg: + return True + else: + # ignore msg entirely + try: + data = json.loads(response) + except (TypeError, ValueError, AttributeError): + return False + + if 'availability' not in data.keys(): + return False + + # check for the specific expected value + return 'subscriber_only' == data.get('availability') + return False + def get_media_info(url): ''' @@ -83,7 +106,8 @@ def get_media_info(url): ''' opts = get_yt_opts() opts.update({ - 'ignore_no_formats_error': False, + 'ignoreerrors': False, # explicitly set this to catch exceptions + 'ignore_no_formats_error': False, # we must fail first to try again with this enabled 'skip_download': True, 'forcejson': True, 'simulate': True, @@ -94,13 +118,20 @@ def get_media_info(url): with yt_dlp.YoutubeDL(opts) as y: try: response = y.extract_info(url, download=False) - except yt_dlp.utils.ExtractorError as e: - if not e.expected: - raise e - log.warn(e.msg) - pass except yt_dlp.utils.DownloadError as e: - raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e + if not _subscriber_only(msg=e.msg): + raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e + # adjust options and try again + opts.update({'ignore_no_formats_error': True,}) + with yt_dlp.YoutubeDL(opts) as yy: + try: + response = yy.extract_info(url, download=False) + except yt_dlp.utils.DownloadError as ee: + raise YouTubeError(f'Failed (again) to extract_info for "{url}": {ee}') from ee + # validate the response is what we expected + if not _subscriber_only(response=response): + response = {} + if not response: raise YouTubeError(f'Failed to extract_info for "{url}": No metadata was ' f'returned by youtube-dl, check for error messages in the ' From 54f2663f820a7826ba3254ab3cc1b8efaf17f55e Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 06:50:24 -0500 Subject: [PATCH 07/42] json.loads() expects a string --- tubesync/sync/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 6911eabd..2a8de68f 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -86,7 +86,7 @@ def _subscriber_only(msg='', response=None): else: # ignore msg entirely try: - data = json.loads(response) + data = json.loads(str(response)) except (TypeError, ValueError, AttributeError): return False From cf951a820aefcafb51e3e655576a4aa4c29f9e1c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 07:53:23 -0500 Subject: [PATCH 08/42] response is a dict just now I don't know what I was thinking. Add some type checking while I am at it. --- tubesync/sync/youtube.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 2a8de68f..4de2759f 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -5,7 +5,6 @@ import os -import json from pathlib import Path from django.conf import settings from copy import copy @@ -85,16 +84,14 @@ def _subscriber_only(msg='', response=None): return True else: # ignore msg entirely - try: - data = json.loads(str(response)) - except (TypeError, ValueError, AttributeError): - return False + if not isinstance(response, dict): + raise TypeError(f'response must be a dict, got "{type(response)}" instead') - if 'availability' not in data.keys(): + if 'availability' not in response.keys(): return False # check for the specific expected value - return 'subscriber_only' == data.get('availability') + return 'subscriber_only' == response.get('availability') return False From 8f0e6c381f3d95d203e129b3cfab09ccf51b7206 Mon Sep 17 00:00:00 2001 From: Richard Hyde Date: Sat, 21 Dec 2024 14:16:16 +0000 Subject: [PATCH 09/42] Removed filter ignore case change for pull request --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 19fe409a..2f116356 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -597,7 +597,7 @@ class Source(models.Model): def is_regex_match(self, media_item_title): if not self.filter_text: return True - return bool(re.search(self.filter_text, media_item_title, re.I)) + return bool(re.search(self.filter_text, media_item_title)) def get_index(self, type): indexer = self.INDEXERS.get(self.source_type, None) From 6c757185db6ace30a9ad347941275abbc67eb5bb Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 16:19:26 -0500 Subject: [PATCH 10/42] Do not try to skip already skipped media Logically these functions can only mark media instances as skipped, so running them for media instances that are already marked that way is a waste of resources. --- tubesync/sync/filtering.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index 45710fe9..b051b0a3 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -15,27 +15,27 @@ def filter_media(instance: Media): skip = False # Check if it's published - if filter_published(instance): + if not instance.skip and filter_published(instance): skip = True # Check if older than max_cap_age, skip - if filter_max_cap(instance): + if not instance.skip and filter_max_cap(instance): skip = True # Check if older than source_cutoff - if filter_source_cutoff(instance): + if not instance.skip and filter_source_cutoff(instance): skip = True # Check if we have filter_text and filter text matches - if filter_filter_text(instance): + if not instance.skip and filter_filter_text(instance): skip = True # Check if the video is longer than the max, or shorter than the min - if filter_duration(instance): + if not instance.skip and filter_duration(instance): skip = True # If we aren't already skipping the file, call our custom function that can be overridden - if not skip and filter_custom(instance): + if not skip and not instance.skip and filter_custom(instance): log.info(f"Media: {instance.source} / {instance} has been skipped by Custom Filter") skip = True From f88df0d4e601c78b20cac9c9adb75348bc36eb6c Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 16:46:17 -0500 Subject: [PATCH 11/42] Manually skipped items do not need additional filtering --- tubesync/sync/filtering.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index b051b0a3..338b0a42 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -14,28 +14,32 @@ def filter_media(instance: Media): # Assume we aren't skipping it, if any of these conditions are true, we skip it skip = False + # Check if it has already been marked as skipped + if instance.manual_skip: + skip = True + # Check if it's published - if not instance.skip and filter_published(instance): + if not skip and filter_published(instance): skip = True # Check if older than max_cap_age, skip - if not instance.skip and filter_max_cap(instance): + if not skip and filter_max_cap(instance): skip = True # Check if older than source_cutoff - if not instance.skip and filter_source_cutoff(instance): + if not skip and filter_source_cutoff(instance): skip = True # Check if we have filter_text and filter text matches - if not instance.skip and filter_filter_text(instance): + if not skip and filter_filter_text(instance): skip = True # Check if the video is longer than the max, or shorter than the min - if not instance.skip and filter_duration(instance): + if not skip and filter_duration(instance): skip = True # If we aren't already skipping the file, call our custom function that can be overridden - if not skip and not instance.skip and filter_custom(instance): + if not skip and filter_custom(instance): log.info(f"Media: {instance.source} / {instance} has been skipped by Custom Filter") skip = True From abfa8ca042491bdef9ac72592e48fe29f540c573 Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 17:01:13 -0500 Subject: [PATCH 12/42] Do not log when the skip state remains unchanged --- tubesync/sync/filtering.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index 338b0a42..4ca8a382 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -122,10 +122,12 @@ def filter_max_cap(instance: Media): return False if instance.published <= max_cap_age: - log.info( - f"Media: {instance.source} / {instance} is too old for " - f"the download cap date, marking to be skipped" - ) + # log new media instances, not every media instance every time + if not instance.skip: + log.info( + f"Media: {instance.source} / {instance} is too old for " + f"the download cap date, marking to be skipped" + ) return True return False From f4c070ada721f35aeb76f9fc662e069d2ecec3bd Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 20:08:36 -0500 Subject: [PATCH 13/42] Reduce image size and keep pycache in a volume Not only are volumes typically faster to access than the image overlay, the cachee can be reused between containers. --- Dockerfile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4941821b..a472f37a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -138,6 +138,9 @@ COPY pip.conf /etc/pip.conf # Add Pipfile COPY Pipfile /app/Pipfile +# Do not include compiled byte-code +ENV PIP_ROOT_USER_ACTION='ignore' PIP_NO_COMPILE=1 + # Switch workdir to the the app WORKDIR /app @@ -163,7 +166,7 @@ RUN set -x && \ useradd -M -d /app -s /bin/false -g app app && \ # Install non-distro packages cp -at /tmp/ "${HOME}" && \ - PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock && \ + PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock --no-cache-dir && \ # Clean up rm /app/Pipfile && \ pipenv --clear && \ @@ -184,7 +187,7 @@ RUN set -x && \ apt-get -y autoclean && \ rm -rf /var/lib/apt/lists/* && \ rm -rf /var/cache/apt/* && \ - rm -rf /tmp/* + rm -rfv /tmp/* # Copy app @@ -196,11 +199,12 @@ RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ # Run any required app commands - /usr/bin/python3 /app/manage.py compilescss && \ - /usr/bin/python3 /app/manage.py collectstatic --no-input --link && \ + /usr/bin/python3 -B /app/manage.py compilescss && \ + /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ mkdir -v -p /config/media && \ + mkdir -v -p /config/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video @@ -219,7 +223,7 @@ COPY config/root / HEALTHCHECK --interval=1m --timeout=10s CMD /app/healthcheck.py http://127.0.0.1:8080/healthcheck # ENVS and ports -ENV PYTHONPATH="/app" +ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/pycache" EXPOSE 4848 # Volumes From a2c97d27912e5118797637c2776f9a9317609f9a Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 20:22:08 -0500 Subject: [PATCH 14/42] pipenv did not run with that option --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a472f37a..05fd7686 100644 --- a/Dockerfile +++ b/Dockerfile @@ -139,7 +139,9 @@ COPY pip.conf /etc/pip.conf COPY Pipfile /app/Pipfile # Do not include compiled byte-code -ENV PIP_ROOT_USER_ACTION='ignore' PIP_NO_COMPILE=1 +ENV PIP_NO_COMPILE=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_ROOT_USER_ACTION='ignore' # Switch workdir to the the app WORKDIR /app From 2c26936035d798dcfd598ccd2d50fe39b9e41c5b Mon Sep 17 00:00:00 2001 From: tcely Date: Sat, 21 Dec 2024 20:23:47 -0500 Subject: [PATCH 15/42] fixup: remove the --no-cache-dir option --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 05fd7686..0434d08b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -168,7 +168,7 @@ RUN set -x && \ useradd -M -d /app -s /bin/false -g app app && \ # Install non-distro packages cp -at /tmp/ "${HOME}" && \ - PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock --no-cache-dir && \ + PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock && \ # Clean up rm /app/Pipfile && \ pipenv --clear && \ From b9eb28fd36eaf2b1383651bab4fd544e3b2072f1 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 01:45:16 -0500 Subject: [PATCH 16/42] There is a /config/cache directory we can safely use --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0434d08b..b89a48b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -206,7 +206,7 @@ RUN set -x && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ mkdir -v -p /config/media && \ - mkdir -v -p /config/pycache && \ + mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video @@ -225,7 +225,7 @@ COPY config/root / HEALTHCHECK --interval=1m --timeout=10s CMD /app/healthcheck.py http://127.0.0.1:8080/healthcheck # ENVS and ports -ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/pycache" +ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/cache/pycache" EXPOSE 4848 # Volumes From 2f89c333a49339b9cc17de882ed58a3f0c8aa64e Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 01:52:15 -0500 Subject: [PATCH 17/42] yt_dlp controls this directory It's probably safe, and `yt_dlp` plays nicely when cleaning up, but this is more correct. --- tubesync/tubesync/local_settings.py.container | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index a0426a4c..84e29c5f 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -60,7 +60,7 @@ if BACKGROUND_TASK_ASYNC_THREADS > MAX_BACKGROUND_TASK_ASYNC_THREADS: MEDIA_ROOT = CONFIG_BASE_DIR / 'media' DOWNLOAD_ROOT = DOWNLOADS_BASE_DIR -YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache' +YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache/youtube' YOUTUBE_DL_TEMPDIR = DOWNLOAD_ROOT / 'cache' COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt' From 42a1033e7eeef1dd3e9a17cbda7ff60d5c971fc4 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 02:42:30 -0500 Subject: [PATCH 18/42] Rename old_youtube_cache_dirs It makes sense to do this here because anyone who had the previous setting was, more than likely, using this file. --- tubesync/tubesync/local_settings.py.container | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index 84e29c5f..e75778b8 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -88,3 +88,13 @@ SOURCE_DOWNLOAD_DIRECTORY_PREFIX = True if SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR VIDEO_HEIGHT_CUTOFF = int(os.getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", "240")) + + +# ensure that the current directory exists +if not YOUTUBE_DL_CACHEDIR.is_dir(): + YOUTUBE_DL_CACHEDIR.mkdir(parents=True) +# rename any old yt_dlp cache directories to the current directory +old_youtube_cache_dirs = list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube-*')) +for cache_dir in old_youtube_cache_dirs: + cache_dir.rename(YOUTUBE_DL_CACHEDIR / cache_dir.name) + From 6cce0520cbb7b3a5e25be8c9a01f6893a4279c52 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 04:28:43 -0500 Subject: [PATCH 19/42] Update ci.yaml * Use the current major version tag for actions. * Add current python versions. * Remove Python 3.7 as it isn't being setup. * Don't fail fast, so that one old version won't prevent testing other versions. --- .github/workflows/ci.yaml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a2b1225b..8dcefc78 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,12 +13,13 @@ jobs: test: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.8, 3.9, 3.10, 3.11, 3.12, 3.13] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -34,18 +35,18 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Log into GitHub Container Registry run: echo "${{ secrets.REGISTRY_ACCESS_TOKEN }}" | docker login https://ghcr.io -u ${{ github.actor }} --password-stdin - name: Lowercase github username for ghcr id: string - uses: ASzc/change-string-case-action@v1 + uses: ASzc/change-string-case-action@v6 with: string: ${{ github.actor }} - name: Build and push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 push: true From 7c77ed1d789c1152e03b9f045819e3736e4d67f4 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 04:48:53 -0500 Subject: [PATCH 20/42] Update ci.yaml We can keep testing with Python `3.7` by using `ubuntu-22.04` instead of `ubuntu-latest` according to the `setup-python` issue. More info: https://github.com/actions/setup-python/issues/962 --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8dcefc78..6f9ad6c8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,11 +11,11 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: fail-fast: false matrix: - python-version: [3.8, 3.9, 3.10, 3.11, 3.12, 3.13] + python-version: [3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13] steps: - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} From 256f3254d9c38ed07a1faf1c1f1774ea2068f779 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 05:21:28 -0500 Subject: [PATCH 21/42] Quote Python versions in ci.yaml --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6f9ad6c8..8d612e02 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} From a1248be4ae1a6cbcbf29e28900a868898df1e2f5 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 05:33:08 -0500 Subject: [PATCH 22/42] Update ci.yaml Python `3.13` is not expected to work until Django `5.1` and we are currently using `3.2`, so that's not worth testing. More info: https://code.djangoproject.com/ticket/34900 --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8d612e02..faf25319 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} From 4bbdfd2f24de4a55c0328efb9cdb8ae612e08c8a Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 09:44:27 -0500 Subject: [PATCH 23/42] Fix paths for when filename includes sub-directories --- tubesync/sync/models.py | 78 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 455a38a5..b969ec4e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1261,45 +1261,6 @@ class Media(models.Model): media_details = self.format_dict return media_format.format(**media_details) - @property - def thumbname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.jpg' - - @property - def thumbpath(self): - return self.source.directory_path / self.thumbname - - @property - def nfoname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.nfo' - - @property - def nfopath(self): - return self.source.directory_path / self.nfoname - - @property - def jsonname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.info.json' - - @property - def jsonpath(self): - return self.source.directory_path / self.jsonname - @property def directory_path(self): dirname = self.source.directory_path / self.filename @@ -1309,6 +1270,45 @@ class Media(models.Model): def filepath(self): return self.source.directory_path / self.filename + @property + def thumbname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.jpg' + + @property + def thumbpath(self): + return self.directory_path / self.thumbname + + @property + def nfoname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.nfo' + + @property + def nfopath(self): + return self.directory_path / self.nfoname + + @property + def jsonname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.info.json' + + @property + def jsonpath(self): + return self.directory_path / self.jsonname + @property def thumb_file_exists(self): if not self.thumb: From 8ebb333ea35d557f93d71cda5922110090aa4924 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 10:09:16 -0500 Subject: [PATCH 24/42] directory_path should be a Path, not str --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index b969ec4e..e0c23bf8 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1264,7 +1264,7 @@ class Media(models.Model): @property def directory_path(self): dirname = self.source.directory_path / self.filename - return os.path.dirname(str(dirname)) + return dirname.parent @property def filepath(self): From 20527aa3c2cc5f67da7e53999d5a28c1055793bc Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 10:26:41 -0500 Subject: [PATCH 25/42] Update tests to not expect strings --- tubesync/sync/tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 6aa0ccb6..8f0de6ef 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -597,11 +597,11 @@ class FilepathTestCase(TestCase): # Check child directories work self.source.media_format = '{yyyy}/{key}.{ext}' self.assertEqual(self.media.directory_path, - str(self.source.directory_path / '2017')) + self.source.directory_path / '2017') self.assertEqual(self.media.filename, '2017/mediakey.mkv') self.source.media_format = '{yyyy}/{yyyy_mm_dd}/{key}.{ext}' self.assertEqual(self.media.directory_path, - str(self.source.directory_path / '2017/2017-09-11')) + self.source.directory_path / '2017/2017-09-11') self.assertEqual(self.media.filename, '2017/2017-09-11/mediakey.mkv') # Check media specific media format keys work test_media = Media.objects.create( From 495f62f25a3efaf668b9758c1cb8ef02476bbbf0 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 11:14:32 -0500 Subject: [PATCH 26/42] Display only the name part `media.filename` is a `str` `media.filepath` is a `Path` The second has a simple way to remove directories, so use that instead of any custom string manipulation. --- tubesync/sync/templates/sync/media-item.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html index 0c78f9b4..d6a74d9f 100644 --- a/tubesync/sync/templates/sync/media-item.html +++ b/tubesync/sync/templates/sync/media-item.html @@ -98,7 +98,7 @@ {% if media.downloaded %} Filename - Filename
{{ media.filename }} + Filename
{{ media.filepath.name }} Directory From 8a04139da9f300d6560b2aefebf4f9a7d92a118e Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 11:46:29 -0500 Subject: [PATCH 27/42] Add data items to make comparison easier --- tubesync/sync/views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 94e91432..52090042 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -582,6 +582,8 @@ class MediaItemView(DetailView): data['video_exact'] = video_exact data['video_format'] = video_format data['youtube_dl_format'] = self.object.get_format_str() + data['filename_path'] = pathlib.Path(self.object.filename) + data['media_file_path'] = pathlib.Path(self.object.media_file.path) if self.object.media_file else None return data From d1c6236b9d7ba8848f61be1dae81b59ed7241c9c Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 11:54:56 -0500 Subject: [PATCH 28/42] Display the location the media was actually saved to --- tubesync/sync/templates/sync/media-item.html | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html index d6a74d9f..a326b09b 100644 --- a/tubesync/sync/templates/sync/media-item.html +++ b/tubesync/sync/templates/sync/media-item.html @@ -100,10 +100,17 @@ Filename Filename
{{ media.filepath.name }} - + Directory Directory
{{ media.directory_path }} + + Database Filepath + DB Filepath
{{ media_file_path }} + {% if media_file_path == media.filepath %} +  (matched) + {% endif %} + File size File size
{{ media.downloaded_filesize|filesizeformat }} From 67b4995448256dff1bc11c8f35339b3329c654b4 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 22 Dec 2024 16:16:06 -0500 Subject: [PATCH 29/42] Remove manual_skip check --- tubesync/sync/filtering.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index 4ca8a382..ff356edb 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -14,10 +14,6 @@ def filter_media(instance: Media): # Assume we aren't skipping it, if any of these conditions are true, we skip it skip = False - # Check if it has already been marked as skipped - if instance.manual_skip: - skip = True - # Check if it's published if not skip and filter_published(instance): skip = True From b19614cc9f95adda1e0247d5f51b7a68db016d6d Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 23 Dec 2024 01:28:46 -0500 Subject: [PATCH 30/42] Do not log download progress lines by default When you set debugging, the default progress output can be viewed and/or logged, depending on the other options. --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index cb612c69..446e456d 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -151,6 +151,7 @@ def download_media(url, media_format, extension, output_file, info_json, 'outtmpl': os.path.basename(output_file), 'quiet': False if settings.DEBUG else True, 'verbose': True if settings.DEBUG else False, + 'noprogress': None if settings.DEBUG else True, 'progress_hooks': [hook], 'writeinfojson': info_json, 'postprocessors': [], From 2799d95119fe29dc3eb8d2892f753591c852d70e Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 23 Dec 2024 10:17:58 -0500 Subject: [PATCH 31/42] Only log new media Channels with thousands of videos, that won't be downloaded, create large blocks in the logs without this. --- tubesync/sync/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 37983932..8fcc6630 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -191,7 +191,10 @@ def index_source_task(source_id): media.source = source try: media.save() - log.info(f'Indexed media: {source} / {media}') + log.debug(f'Indexed media: {source} / {media}') + # log the new media instances + if media.created >= source.last_crawl: + log.info(f'Indexed new media: {source} / {media}') except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') # Tack on a cleanup of old completed tasks From 3b41c8df8160197d9fb03b66fad9ef23e82c644d Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 23 Dec 2024 16:55:26 -0500 Subject: [PATCH 32/42] Remove duplicated ' to:' --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 37983932..10350562 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -430,7 +430,7 @@ def download_media(media_id): copyfile(media.thumb.path, media.thumbpath) # If selected, write an NFO file if media.source.write_nfo: - log.info(f'Writing media NFO file to: to: {media.nfopath}') + log.info(f'Writing media NFO file to: {media.nfopath}') write_text_file(media.nfopath, media.nfoxml) # Schedule a task to update media servers for mediaserver in MediaServer.objects.all(): From 99899aac9c611c548473ba30ee4d2f85aae45d4e Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 23 Dec 2024 20:13:56 -0500 Subject: [PATCH 33/42] Use title from DB We stored a stripped title string in the database, just use that instead of `.name` and calling `.strip()` again. --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 455a38a5..d558d4ec 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1353,7 +1353,7 @@ class Media(models.Model): nfo.text = '\n ' # title = media metadata title title = nfo.makeelement('title', {}) - title.text = clean_emoji(str(self.name).strip()) + title.text = clean_emoji(self.title) title.tail = '\n ' nfo.append(title) # showtitle = source name From 01f0d827884899d2ac9a4a0a276766a72c8906a6 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 00:07:57 -0500 Subject: [PATCH 34/42] Cleanup removed media that has not been downloaded too --- tubesync/sync/tasks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 37983932..fe692830 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -146,12 +146,12 @@ def cleanup_old_media(): def cleanup_removed_media(source, videos): - media_objects = Media.objects.filter(source=source, downloaded=True) - for item in media_objects: - matching_source_item = [video['id'] for video in videos if video['id'] == item.key] + media_objects = Media.objects.filter(source=source) + for media in media_objects: + matching_source_item = [video['id'] for video in videos if video['id'] == media.key] if not matching_source_item: - log.info(f'{item.title} is no longer in source, removing') - item.delete() + log.info(f'{media.name} is no longer in source, removing') + media.delete() @background(schedule=0) From 6c4d31765ae36565f8db37c982cf72c4f33cd1d9 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 00:16:18 -0500 Subject: [PATCH 35/42] Made messages more human friendly --- tubesync/sync/tasks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 37983932..176b9186 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -199,7 +199,7 @@ def index_source_task(source_id): # Tack on a cleanup of old media cleanup_old_media() if source.delete_removed_media: - log.info(f'Cleaning up media no longer in source {source}') + log.info(f'Cleaning up media no longer in source: {source}') cleanup_removed_media(source, videos) @@ -236,7 +236,7 @@ def download_source_images(source_id): f'source exists with ID: {source_id}') return avatar, banner = source.get_image_url - log.info(f'Thumbnail URL for source with ID: {source_id} ' + log.info(f'Thumbnail URL for source with ID: {source_id} / {source} ' f'Avatar: {avatar} ' f'Banner: {banner}') if banner != None: @@ -269,7 +269,7 @@ def download_source_images(source_id): with open(file_path, 'wb') as f: f.write(django_file.read()) - log.info(f'Thumbnail downloaded for source with ID: {source_id}') + log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') @background(schedule=0) @@ -285,7 +285,7 @@ def download_media_metadata(media_id): f'media exists with ID: {media_id}') return if media.manual_skip: - log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.') + log.info(f'Task for ID: {media_id} / {media} skipped, due to task being manually skipped.') return source = media.source metadata = media.index_metadata() @@ -306,7 +306,7 @@ def download_media_metadata(media_id): # Don't filter media here, the post_save signal will handle that media.save() log.info(f'Saved {len(media.metadata)} bytes of metadata for: ' - f'{source} / {media_id}') + f'{source} / {media}: {media_id}') @background(schedule=0) From 0795eb951d7918c3ee3333494f1952014068cbd5 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 00:21:58 -0500 Subject: [PATCH 36/42] Consistency fixes The `filepath` variable is defined before the `if` and used throughout the first block, so we should use it in the `else` block too. Otherwise, a change to that variable would change the first block but not the second block. --- tubesync/sync/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 10350562..f58fce36 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -359,7 +359,7 @@ def download_media(media_id): return if media.skip: # Media was toggled to be skipped after the task was scheduled - log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' + log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' f'it is now marked to be skipped, not downloading') return if media.downloaded and media.media_file: @@ -446,7 +446,7 @@ def download_media(media_id): else: # Expected file doesn't exist on disk err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, ' - f'expected outfile does not exist: {media.filepath}') + f'expected outfile does not exist: {filepath}') log.error(err) # Raising an error here triggers the task to be re-attempted (or fail) raise DownloadFailedException(err) From 7343e39f35bc466ca6efdea387787ee7083ec7f9 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 00:32:03 -0500 Subject: [PATCH 37/42] Verbose output is no longer needed I confirmed that the changes reduced the amount of files being removed. --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b89a48b6..b33af383 100644 --- a/Dockerfile +++ b/Dockerfile @@ -189,7 +189,7 @@ RUN set -x && \ apt-get -y autoclean && \ rm -rf /var/lib/apt/lists/* && \ rm -rf /var/cache/apt/* && \ - rm -rfv /tmp/* + rm -rf /tmp/* # Copy app From 8def84c7b4b4ee510eee8ae449ef031bb311bff3 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 01:17:04 -0500 Subject: [PATCH 38/42] Use filename_path It's more directly what we want here. If `filepath` ever changed to not use `filename` the value would be incorrect, so this is the cleaner way to display this value. --- tubesync/sync/templates/sync/media-item.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html index a326b09b..6f751be6 100644 --- a/tubesync/sync/templates/sync/media-item.html +++ b/tubesync/sync/templates/sync/media-item.html @@ -98,7 +98,7 @@ {% if media.downloaded %} Filename - Filename
{{ media.filepath.name }} + Filename
{{ filename_path.name }} Directory From 3cea9270a0a14fbf245d7f334212d65096a3944f Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 05:09:30 -0500 Subject: [PATCH 39/42] Bump ffmpeg --- Dockerfile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4941821b..120a8424 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,10 +8,10 @@ ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a90 ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f66f785" ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae" -ARG FFMPEG_DATE="autobuild-2024-12-09-14-16" -ARG FFMPEG_VERSION="118034-gd21134313f" -ARG SHA256_FFMPEG_AMD64="cd50122fb0939e913585282347a8f95074c2d5477ceb059cd90aca551f14e9ea" -ARG SHA256_FFMPEG_ARM64="33b4edebf9c23701473ba8db696b26072bb9b9c05fc4a156e115f94e44d361e0" +ARG FFMPEG_DATE="autobuild-2024-12-24-14-15" +ARG FFMPEG_VERSION="n7.1-62-gb168ed9b14" +ARG SHA256_FFMPEG_AMD64="56f7ae56ee3cf5906006fb5845d963cae3580513a22d84236e82bc307c3d6fd5" +ARG SHA256_FFMPEG_ARM64="2872f0ecfe791c9d9837b2563af4e77dc862c766abef5108c84e082cab5fad1f" ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ @@ -49,12 +49,16 @@ RUN decide_arch() { \ decide_url() { \ case "${1}" in \ (ffmpeg) printf -- \ - 'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-N-%s-linux%s-gpl.tar.xz' \ + 'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-%s-linux%s-gpl%s.tar.xz' \ "${FFMPEG_DATE}" \ "${FFMPEG_VERSION}" \ "$(case "${2}" in \ (amd64) printf -- '64' ;; \ (*) printf -- '%s' "${2}" ;; \ + esac)" \ + "$(case "${FFMPEG_VERSION%%-*}" in \ + (n*) printf -- '-%s\n' "${FFMPEG_VERSION#n}" | cut -d '-' -f 1,2 ;; \ + (*) printf -- '' ;; \ esac)" ;; \ (s6) printf -- \ 'https://github.com/just-containers/s6-overlay/releases/download/v%s/s6-overlay-%s.tar.xz' \ From 568698351dd6977879eaa6777561e41ba1b69a80 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 05:23:53 -0500 Subject: [PATCH 40/42] Use the N version of ffmpeg --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 120a8424..ad0faec2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,9 @@ ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f6 ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae" ARG FFMPEG_DATE="autobuild-2024-12-24-14-15" -ARG FFMPEG_VERSION="n7.1-62-gb168ed9b14" -ARG SHA256_FFMPEG_AMD64="56f7ae56ee3cf5906006fb5845d963cae3580513a22d84236e82bc307c3d6fd5" -ARG SHA256_FFMPEG_ARM64="2872f0ecfe791c9d9837b2563af4e77dc862c766abef5108c84e082cab5fad1f" +ARG FFMPEG_VERSION="N-118163-g954d55c2a4" +ARG SHA256_FFMPEG_AMD64="798a7e5a0724139e6bb70df8921522b23be27028f9f551dfa83c305ec4ffaf3a" +ARG SHA256_FFMPEG_ARM64="c3e6cc0fec42cc7e3804014fbb02c1384a1a31ef13f6f9a36121f2e1216240c0" ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ From 942452c6a28fd3d64fdcc6af6c212a112b0bb124 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 10:53:25 -0500 Subject: [PATCH 41/42] Skip media without any formats --- tubesync/sync/models.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 455a38a5..15daf5ba 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1499,7 +1499,16 @@ class Media(models.Model): if not callable(indexer): raise Exception(f'Media with source type f"{self.source.source_type}" ' f'has no indexer') - return indexer(self.url) + response = indexer(self.url) + no_formats_available = ( + not response or + "formats" not in response.keys() or + 0 == len(response["formats"]) + ) + if no_formats_available: + self.can_download = False + self.skip = True + return response def calculate_episode_number(self): if self.source.source_type == Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: From d74e6bf2cadaed4ac1c11800770367737e1f1a7b Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 25 Dec 2024 20:16:07 -0500 Subject: [PATCH 42/42] Avoid the unlikely possibility of None comparison --- tubesync/sync/tasks.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 8fcc6630..0dcacbbc 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -193,7 +193,12 @@ def index_source_task(source_id): media.save() log.debug(f'Indexed media: {source} / {media}') # log the new media instances - if media.created >= source.last_crawl: + new_media_instance = ( + media.created and + source.last_crawl and + media.created >= source.last_crawl + ) + if new_media_instance: log.info(f'Indexed new media: {source} / {media}') except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"')