From 6ae2676b8ff149fdd45ea2027d8eb41ab1361d47 Mon Sep 17 00:00:00 2001
From: Richard Hyde <email@richardhyde.net>
Date: Thu, 19 Dec 2024 18:18:33 +0000
Subject: [PATCH 01/42] Ignore case when checking the filter

---
 tubesync/sync/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index 2f116356..19fe409a 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -597,7 +597,7 @@ class Source(models.Model):
     def is_regex_match(self, media_item_title):
         if not self.filter_text:
             return True
-        return bool(re.search(self.filter_text, media_item_title))
+        return bool(re.search(self.filter_text, media_item_title, re.I))
 
     def get_index(self, type):
         indexer = self.INDEXERS.get(self.source_type, None)

From 435885ff61d72479b15d6e4079b4289e00bb69a1 Mon Sep 17 00:00:00 2001
From: Richard Hyde <email@richardhyde.net>
Date: Thu, 19 Dec 2024 18:39:19 +0000
Subject: [PATCH 02/42] added sync.tasks.download_media_metadata to the task
 map

---
 tubesync/sync/tasks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 37983932..767820ea 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -49,6 +49,7 @@ def map_task_to_instance(task):
         'sync.tasks.check_source_directory_exists': Source,
         'sync.tasks.download_media_thumbnail': Media,
         'sync.tasks.download_media': Media,
+        'sync.tasks.download_media_metadata': Media,
         'sync.tasks.save_all_media_for_source': Source,
     }
     MODEL_URL_MAP = {

From ca61df2e0bb4d07bc518d8f70dd4521ccdd1fe8c Mon Sep 17 00:00:00 2001
From: Richard Hyde <email@richardhyde.net>
Date: Thu, 19 Dec 2024 18:42:14 +0000
Subject: [PATCH 03/42] don't add the sync.tasks.download_media_metadata task
 if the video is skipped or there's already a task running

---
 tubesync/sync/signals.py | 4 ++--
 tubesync/sync/tasks.py   | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py
index 9c541e0a..aba445d0 100644
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@@ -12,7 +12,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
                     download_media_thumbnail, download_media_metadata,
                     map_task_to_instance, check_source_directory_exists,
                     download_media, rescan_media_server, download_source_images,
-                    save_all_media_for_source)
+                    save_all_media_for_source, get_media_metadata_task)
 from .utils import delete_file
 from .filtering import filter_media
 
@@ -133,7 +133,7 @@ def media_post_save(sender, instance, created, **kwargs):
         instance.save()
         post_save.connect(media_post_save, sender=Media)
     # If the media is missing metadata schedule it to be downloaded
-    if not instance.metadata:
+    if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk):
         log.info(f'Scheduling task to download metadata for: {instance.url}')
         verbose_name = _('Downloading metadata for "{}"')
         download_media_metadata(
diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 767820ea..9f3eea57 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -118,6 +118,12 @@ def get_media_download_task(media_id):
     except IndexError:
         return False
 
+def get_media_metadata_task(media_id):
+    try:
+        return Task.objects.get_task('sync.tasks.download_media_metadata',
+                                     args=(str(media_id),))[0]
+    except IndexError:
+        return False
 
 def delete_task_by_source(task_name, source_id):
     return Task.objects.filter(task_name=task_name, queue=str(source_id)).delete()

From 919d933d4d18041741f8d434b0d6e614ea750fc8 Mon Sep 17 00:00:00 2001
From: Richard Hyde <email@richardhyde.net>
Date: Thu, 19 Dec 2024 18:44:35 +0000
Subject: [PATCH 04/42] mark the video as skipped if the
 sync.tasks.download_media_metadata task fails

---
 tubesync/sync/signals.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py
index aba445d0..cd8cf621 100644
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@@ -102,6 +102,10 @@ def task_task_failed(sender, task_id, completed_task, **kwargs):
         obj.has_failed = True
         obj.save()
 
+    if isinstance(obj, Media) and completed_task.task_name == "sync.tasks.download_media_metadata":
+        log.error(f'Permanent failure for media: {obj} task: {completed_task}')
+        obj.skip = True
+        obj.save()
 
 @receiver(post_save, sender=Media)
 def media_post_save(sender, instance, created, **kwargs):

From 394f937fcbd6008a7466c7645fd72ab691cfa7ee Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Fri, 20 Dec 2024 10:07:24 -0500
Subject: [PATCH 05/42] Handle raise_no_formats exception

Catching this and checking the message may be the best way to skip members only videos.
---
 tubesync/sync/youtube.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py
index d51c7b45..33cdb795 100644
--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@@ -83,6 +83,7 @@ def get_media_info(url):
     '''
     opts = get_yt_opts()
     opts.update({
+        'ignore_no_formats_error': False,
         'skip_download': True,
         'forcejson': True,
         'simulate': True,
@@ -93,6 +94,11 @@ def get_media_info(url):
     with yt_dlp.YoutubeDL(opts) as y:
         try:
             response = y.extract_info(url, download=False)
+        except yt_dlp.utils.ExtractorError as e:
+            if not e.expected:
+                raise e
+            log.warn(e.msg)
+            pass
         except yt_dlp.utils.DownloadError as e:
             raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e
     if not response:

From 16e86ba79a90dcdfce1b4adeb5251e9de606045e Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 05:55:48 -0500
Subject: [PATCH 06/42] Accept limited metadata for subscriber_only videos

---
 tubesync/sync/youtube.py | 45 +++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py
index 33cdb795..6911eabd 100644
--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@@ -5,6 +5,7 @@
 
 
 import os
+import json
 from pathlib import Path
 from django.conf import settings
 from copy import copy
@@ -74,6 +75,28 @@ def get_channel_image_info(url):
             raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e
 
 
+def _subscriber_only(msg='', response=None):
+    if response is None:
+        # process msg only
+        msg = str(msg)
+        if 'access to members-only content' in msg:
+            return True
+        if ': Join this channel' in msg:
+            return True
+    else:
+        # ignore msg entirely
+        try:
+            data = json.loads(response)
+        except (TypeError, ValueError, AttributeError):
+            return False
+
+        if 'availability' not in data.keys():
+            return False
+
+        # check for the specific expected value
+        return 'subscriber_only' == data.get('availability')
+    return False
+
 
 def get_media_info(url):
     '''
@@ -83,7 +106,8 @@ def get_media_info(url):
     '''
     opts = get_yt_opts()
     opts.update({
-        'ignore_no_formats_error': False,
+        'ignoreerrors': False, # explicitly set this to catch exceptions
+        'ignore_no_formats_error': False, # we must fail first to try again with this enabled
         'skip_download': True,
         'forcejson': True,
         'simulate': True,
@@ -94,13 +118,20 @@ def get_media_info(url):
     with yt_dlp.YoutubeDL(opts) as y:
         try:
             response = y.extract_info(url, download=False)
-        except yt_dlp.utils.ExtractorError as e:
-            if not e.expected:
-                raise e
-            log.warn(e.msg)
-            pass
         except yt_dlp.utils.DownloadError as e:
-            raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e
+            if not _subscriber_only(msg=e.msg):
+                raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e
+            # adjust options and try again
+            opts.update({'ignore_no_formats_error': True,})
+            with yt_dlp.YoutubeDL(opts) as yy:
+                try:
+                    response = yy.extract_info(url, download=False)
+                except yt_dlp.utils.DownloadError as ee:
+                    raise YouTubeError(f'Failed (again) to extract_info for "{url}": {ee}') from ee
+                # validate the response is what we expected
+                if not _subscriber_only(response=response):
+                    response = {}
+
     if not response:
         raise YouTubeError(f'Failed to extract_info for "{url}": No metadata was '
                            f'returned by youtube-dl, check for error messages in the '

From 54f2663f820a7826ba3254ab3cc1b8efaf17f55e Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 06:50:24 -0500
Subject: [PATCH 07/42] json.loads() expects a string

---
 tubesync/sync/youtube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py
index 6911eabd..2a8de68f 100644
--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@@ -86,7 +86,7 @@ def _subscriber_only(msg='', response=None):
     else:
         # ignore msg entirely
         try:
-            data = json.loads(response)
+            data = json.loads(str(response))
         except (TypeError, ValueError, AttributeError):
             return False
 

From cf951a820aefcafb51e3e655576a4aa4c29f9e1c Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 07:53:23 -0500
Subject: [PATCH 08/42] response is a dict just now

I don't know what I was thinking.
Add some type checking while I am at it.
---
 tubesync/sync/youtube.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py
index 2a8de68f..4de2759f 100644
--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@@ -5,7 +5,6 @@
 
 
 import os
-import json
 from pathlib import Path
 from django.conf import settings
 from copy import copy
@@ -85,16 +84,14 @@ def _subscriber_only(msg='', response=None):
             return True
     else:
         # ignore msg entirely
-        try:
-            data = json.loads(str(response))
-        except (TypeError, ValueError, AttributeError):
-            return False
+        if not isinstance(response, dict):
+            raise TypeError(f'response must be a dict, got "{type(response)}" instead')
 
-        if 'availability' not in data.keys():
+        if 'availability' not in response.keys():
             return False
 
         # check for the specific expected value
-        return 'subscriber_only' == data.get('availability')
+        return 'subscriber_only' == response.get('availability')
     return False
 
 

From 8f0e6c381f3d95d203e129b3cfab09ccf51b7206 Mon Sep 17 00:00:00 2001
From: Richard Hyde <email@richardhyde.net>
Date: Sat, 21 Dec 2024 14:16:16 +0000
Subject: [PATCH 09/42] Removed filter ignore case change for pull request

---
 tubesync/sync/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index 19fe409a..2f116356 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -597,7 +597,7 @@ class Source(models.Model):
     def is_regex_match(self, media_item_title):
         if not self.filter_text:
             return True
-        return bool(re.search(self.filter_text, media_item_title, re.I))
+        return bool(re.search(self.filter_text, media_item_title))
 
     def get_index(self, type):
         indexer = self.INDEXERS.get(self.source_type, None)

From 6c757185db6ace30a9ad347941275abbc67eb5bb Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 16:19:26 -0500
Subject: [PATCH 10/42] Do not try to skip already skipped media

Logically these functions can only mark media instances as skipped, so running them for media instances that are already marked that way is a waste of resources.
---
 tubesync/sync/filtering.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py
index 45710fe9..b051b0a3 100644
--- a/tubesync/sync/filtering.py
+++ b/tubesync/sync/filtering.py
@@ -15,27 +15,27 @@ def filter_media(instance: Media):
     skip = False
 
     # Check if it's published
-    if filter_published(instance):
+    if not instance.skip and filter_published(instance):
         skip = True
 
     # Check if older than max_cap_age, skip
-    if filter_max_cap(instance):
+    if not instance.skip and filter_max_cap(instance):
         skip = True
 
     # Check if older than source_cutoff
-    if filter_source_cutoff(instance):
+    if not instance.skip and filter_source_cutoff(instance):
         skip = True
 
     # Check if we have filter_text and filter text matches
-    if filter_filter_text(instance):
+    if not instance.skip and filter_filter_text(instance):
         skip = True
 
     # Check if the video is longer than the max, or shorter than the min
-    if filter_duration(instance):
+    if not instance.skip and filter_duration(instance):
         skip = True
 
     # If we aren't already skipping the file, call our custom function that can be overridden
-    if not skip and filter_custom(instance):
+    if not skip and not instance.skip and filter_custom(instance):
         log.info(f"Media: {instance.source} / {instance} has been skipped by Custom Filter")
         skip = True
 

From f88df0d4e601c78b20cac9c9adb75348bc36eb6c Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 16:46:17 -0500
Subject: [PATCH 11/42] Manually skipped items do not need additional filtering

---
 tubesync/sync/filtering.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py
index b051b0a3..338b0a42 100644
--- a/tubesync/sync/filtering.py
+++ b/tubesync/sync/filtering.py
@@ -14,28 +14,32 @@ def filter_media(instance: Media):
     # Assume we aren't skipping it, if any of these conditions are true, we skip it
     skip = False
 
+    # Check if it has already been marked as skipped
+    if instance.manual_skip:
+        skip = True
+
     # Check if it's published
-    if not instance.skip and filter_published(instance):
+    if not skip and filter_published(instance):
         skip = True
 
     # Check if older than max_cap_age, skip
-    if not instance.skip and filter_max_cap(instance):
+    if not skip and filter_max_cap(instance):
         skip = True
 
     # Check if older than source_cutoff
-    if not instance.skip and filter_source_cutoff(instance):
+    if not skip and filter_source_cutoff(instance):
         skip = True
 
     # Check if we have filter_text and filter text matches
-    if not instance.skip and filter_filter_text(instance):
+    if not skip and filter_filter_text(instance):
         skip = True
 
     # Check if the video is longer than the max, or shorter than the min
-    if not instance.skip and filter_duration(instance):
+    if not skip and filter_duration(instance):
         skip = True
 
     # If we aren't already skipping the file, call our custom function that can be overridden
-    if not skip and not instance.skip and filter_custom(instance):
+    if not skip and filter_custom(instance):
         log.info(f"Media: {instance.source} / {instance} has been skipped by Custom Filter")
         skip = True
 

From abfa8ca042491bdef9ac72592e48fe29f540c573 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 17:01:13 -0500
Subject: [PATCH 12/42] Do not log when the skip state remains unchanged

---
 tubesync/sync/filtering.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py
index 338b0a42..4ca8a382 100644
--- a/tubesync/sync/filtering.py
+++ b/tubesync/sync/filtering.py
@@ -122,10 +122,12 @@ def filter_max_cap(instance: Media):
         return False
 
     if instance.published <= max_cap_age:
-        log.info(
-            f"Media: {instance.source} / {instance} is too old for "
-            f"the download cap date, marking to be skipped"
-        )
+        # log new media instances, not every media instance every time
+        if not instance.skip:
+            log.info(
+                f"Media: {instance.source} / {instance} is too old for "
+                f"the download cap date, marking to be skipped"
+            )
         return True
 
     return False

From f4c070ada721f35aeb76f9fc662e069d2ecec3bd Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 20:08:36 -0500
Subject: [PATCH 13/42] Reduce image size and keep pycache in a volume

Not only are volumes typically faster to access than the image overlay, the cachee can be reused between containers.
---
 Dockerfile | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 4941821b..a472f37a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -138,6 +138,9 @@ COPY pip.conf /etc/pip.conf
 # Add Pipfile
 COPY Pipfile /app/Pipfile
 
+# Do not include compiled byte-code
+ENV PIP_ROOT_USER_ACTION='ignore' PIP_NO_COMPILE=1
+
 # Switch workdir to the the app
 WORKDIR /app
 
@@ -163,7 +166,7 @@ RUN set -x && \
   useradd -M -d /app -s /bin/false -g app app && \
   # Install non-distro packages
   cp -at /tmp/ "${HOME}" && \
-  PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock && \
+  PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock --no-cache-dir && \
   # Clean up
   rm /app/Pipfile && \
   pipenv --clear && \
@@ -184,7 +187,7 @@ RUN set -x && \
   apt-get -y autoclean && \
   rm -rf /var/lib/apt/lists/* && \
   rm -rf /var/cache/apt/* && \
-  rm -rf /tmp/*
+  rm -rfv /tmp/*
 
 
 # Copy app
@@ -196,11 +199,12 @@ RUN set -x && \
   # Make absolutely sure we didn't accidentally bundle a SQLite dev database
   rm -rf /app/db.sqlite3 && \
   # Run any required app commands
-  /usr/bin/python3 /app/manage.py compilescss && \
-  /usr/bin/python3 /app/manage.py collectstatic --no-input --link && \
+  /usr/bin/python3 -B /app/manage.py compilescss && \
+  /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \
   # Create config, downloads and run dirs
   mkdir -v -p /run/app && \
   mkdir -v -p /config/media && \
+  mkdir -v -p /config/pycache && \
   mkdir -v -p /downloads/audio && \
   mkdir -v -p /downloads/video
 
@@ -219,7 +223,7 @@ COPY config/root /
 HEALTHCHECK --interval=1m --timeout=10s CMD /app/healthcheck.py http://127.0.0.1:8080/healthcheck
 
 # ENVS and ports
-ENV PYTHONPATH="/app"
+ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/pycache"
 EXPOSE 4848
 
 # Volumes

From a2c97d27912e5118797637c2776f9a9317609f9a Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 20:22:08 -0500
Subject: [PATCH 14/42] pipenv did not run with that option

---
 Dockerfile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index a472f37a..05fd7686 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -139,7 +139,9 @@ COPY pip.conf /etc/pip.conf
 COPY Pipfile /app/Pipfile
 
 # Do not include compiled byte-code
-ENV PIP_ROOT_USER_ACTION='ignore' PIP_NO_COMPILE=1
+ENV PIP_NO_COMPILE=1 \
+  PIP_NO_CACHE_DIR=1 \
+  PIP_ROOT_USER_ACTION='ignore'
 
 # Switch workdir to the the app
 WORKDIR /app

From 2c26936035d798dcfd598ccd2d50fe39b9e41c5b Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sat, 21 Dec 2024 20:23:47 -0500
Subject: [PATCH 15/42] fixup: remove the --no-cache-dir option

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 05fd7686..0434d08b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -168,7 +168,7 @@ RUN set -x && \
   useradd -M -d /app -s /bin/false -g app app && \
   # Install non-distro packages
   cp -at /tmp/ "${HOME}" && \
-  PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock --no-cache-dir && \
+  PIPENV_VERBOSITY=64 HOME="/tmp/${HOME#/}" pipenv install --system --skip-lock && \
   # Clean up
   rm /app/Pipfile && \
   pipenv --clear && \

From b9eb28fd36eaf2b1383651bab4fd544e3b2072f1 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 01:45:16 -0500
Subject: [PATCH 16/42] There is a /config/cache directory we can safely use

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0434d08b..b89a48b6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -206,7 +206,7 @@ RUN set -x && \
   # Create config, downloads and run dirs
   mkdir -v -p /run/app && \
   mkdir -v -p /config/media && \
-  mkdir -v -p /config/pycache && \
+  mkdir -v -p /config/cache/pycache && \
   mkdir -v -p /downloads/audio && \
   mkdir -v -p /downloads/video
 
@@ -225,7 +225,7 @@ COPY config/root /
 HEALTHCHECK --interval=1m --timeout=10s CMD /app/healthcheck.py http://127.0.0.1:8080/healthcheck
 
 # ENVS and ports
-ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/pycache"
+ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/cache/pycache"
 EXPOSE 4848
 
 # Volumes

From 2f89c333a49339b9cc17de882ed58a3f0c8aa64e Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 01:52:15 -0500
Subject: [PATCH 17/42] yt_dlp controls this directory

It's probably safe, and `yt_dlp` plays nicely when cleaning up, but this is more correct.
---
 tubesync/tubesync/local_settings.py.container | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container
index a0426a4c..84e29c5f 100644
--- a/tubesync/tubesync/local_settings.py.container
+++ b/tubesync/tubesync/local_settings.py.container
@@ -60,7 +60,7 @@ if BACKGROUND_TASK_ASYNC_THREADS > MAX_BACKGROUND_TASK_ASYNC_THREADS:
 
 MEDIA_ROOT = CONFIG_BASE_DIR / 'media'
 DOWNLOAD_ROOT = DOWNLOADS_BASE_DIR
-YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache'
+YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache/youtube'
 YOUTUBE_DL_TEMPDIR = DOWNLOAD_ROOT / 'cache'
 COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt'
 

From 42a1033e7eeef1dd3e9a17cbda7ff60d5c971fc4 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 02:42:30 -0500
Subject: [PATCH 18/42] Rename old_youtube_cache_dirs

It makes sense to do this here because anyone who had the previous setting was, more than likely, using this file.
---
 tubesync/tubesync/local_settings.py.container | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container
index 84e29c5f..e75778b8 100644
--- a/tubesync/tubesync/local_settings.py.container
+++ b/tubesync/tubesync/local_settings.py.container
@@ -88,3 +88,13 @@ SOURCE_DOWNLOAD_DIRECTORY_PREFIX = True if SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR
 
 
 VIDEO_HEIGHT_CUTOFF = int(os.getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", "240"))
+
+
+# ensure that the current directory exists
+if not YOUTUBE_DL_CACHEDIR.is_dir():
+    YOUTUBE_DL_CACHEDIR.mkdir(parents=True)
+# rename any old yt_dlp cache directories to the current directory
+old_youtube_cache_dirs = list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube-*'))
+for cache_dir in old_youtube_cache_dirs:
+    cache_dir.rename(YOUTUBE_DL_CACHEDIR / cache_dir.name)
+

From 6cce0520cbb7b3a5e25be8c9a01f6893a4279c52 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 04:28:43 -0500
Subject: [PATCH 19/42] Update ci.yaml

* Use the current major version tag for actions.
* Add current python versions.
* Remove Python 3.7 as it isn't being setup.
* Don't fail fast, so that one old version won't prevent testing other versions.
---
 .github/workflows/ci.yaml | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index a2b1225b..8dcefc78 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -13,12 +13,13 @@ jobs:
   test:
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9, 3.10, 3.11, 3.12, 3.13]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Install Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
@@ -34,18 +35,18 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v1
+        uses: docker/setup-qemu-action@v3
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v3
       - name: Log into GitHub Container Registry
         run: echo "${{ secrets.REGISTRY_ACCESS_TOKEN }}" | docker login https://ghcr.io -u ${{ github.actor }} --password-stdin
       - name: Lowercase github username for ghcr
         id: string
-        uses: ASzc/change-string-case-action@v1
+        uses: ASzc/change-string-case-action@v6
         with:
           string: ${{ github.actor }}
       - name: Build and push
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v6
         with:
           platforms: linux/amd64,linux/arm64
           push: true

From 7c77ed1d789c1152e03b9f045819e3736e4d67f4 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 04:48:53 -0500
Subject: [PATCH 20/42] Update ci.yaml

We can keep testing with Python `3.7` by using `ubuntu-22.04` instead of `ubuntu-latest` according to the `setup-python` issue.

More info: https://github.com/actions/setup-python/issues/962
---
 .github/workflows/ci.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 8dcefc78..6f9ad6c8 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -11,11 +11,11 @@ on:
 
 jobs:
   test:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8, 3.9, 3.10, 3.11, 3.12, 3.13]
+        python-version: [3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13]
     steps:
       - uses: actions/checkout@v4
       - name: Install Python ${{ matrix.python-version }}

From 256f3254d9c38ed07a1faf1c1f1774ea2068f779 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 05:21:28 -0500
Subject: [PATCH 21/42] Quote Python versions in ci.yaml

---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 6f9ad6c8..8d612e02 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13]
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
     steps:
       - uses: actions/checkout@v4
       - name: Install Python ${{ matrix.python-version }}

From a1248be4ae1a6cbcbf29e28900a868898df1e2f5 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 05:33:08 -0500
Subject: [PATCH 22/42] Update ci.yaml

Python `3.13` is not expected to work until Django `5.1` and we are currently using `3.2`, so that's not worth testing.

More info: https://code.djangoproject.com/ticket/34900
---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 8d612e02..faf25319 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
     steps:
       - uses: actions/checkout@v4
       - name: Install Python ${{ matrix.python-version }}

From 4bbdfd2f24de4a55c0328efb9cdb8ae612e08c8a Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 09:44:27 -0500
Subject: [PATCH 23/42] Fix paths for when filename includes sub-directories

---
 tubesync/sync/models.py | 78 ++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index 455a38a5..b969ec4e 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -1261,45 +1261,6 @@ class Media(models.Model):
         media_details = self.format_dict
         return media_format.format(**media_details)
 
-    @property
-    def thumbname(self):
-        if self.downloaded and self.media_file:
-            filename = os.path.basename(self.media_file.path)
-        else:
-            filename = self.filename
-        prefix, ext = os.path.splitext(filename)
-        return f'{prefix}.jpg'
-
-    @property
-    def thumbpath(self):
-        return self.source.directory_path / self.thumbname
-
-    @property
-    def nfoname(self):
-        if self.downloaded and self.media_file:
-            filename = os.path.basename(self.media_file.path)
-        else:
-            filename = self.filename
-        prefix, ext = os.path.splitext(filename)
-        return f'{prefix}.nfo'
-    
-    @property
-    def nfopath(self):
-        return self.source.directory_path / self.nfoname
-
-    @property
-    def jsonname(self):
-        if self.downloaded and self.media_file:
-            filename = os.path.basename(self.media_file.path)
-        else:
-            filename = self.filename
-        prefix, ext = os.path.splitext(filename)
-        return f'{prefix}.info.json'
-    
-    @property
-    def jsonpath(self):
-        return self.source.directory_path / self.jsonname
-
     @property
     def directory_path(self):
         dirname = self.source.directory_path / self.filename
@@ -1309,6 +1270,45 @@ class Media(models.Model):
     def filepath(self):
         return self.source.directory_path / self.filename
 
+    @property
+    def thumbname(self):
+        if self.downloaded and self.media_file:
+            filename = self.media_file.path
+        else:
+            filename = self.filename
+        prefix, ext = os.path.splitext(os.path.basename(filename))
+        return f'{prefix}.jpg'
+
+    @property
+    def thumbpath(self):
+        return self.directory_path / self.thumbname
+
+    @property
+    def nfoname(self):
+        if self.downloaded and self.media_file:
+            filename = self.media_file.path
+        else:
+            filename = self.filename
+        prefix, ext = os.path.splitext(os.path.basename(filename))
+        return f'{prefix}.nfo'
+    
+    @property
+    def nfopath(self):
+        return self.directory_path / self.nfoname
+
+    @property
+    def jsonname(self):
+        if self.downloaded and self.media_file:
+            filename = self.media_file.path
+        else:
+            filename = self.filename
+        prefix, ext = os.path.splitext(os.path.basename(filename))
+        return f'{prefix}.info.json'
+    
+    @property
+    def jsonpath(self):
+        return self.directory_path / self.jsonname
+
     @property
     def thumb_file_exists(self):
         if not self.thumb:

From 8ebb333ea35d557f93d71cda5922110090aa4924 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 10:09:16 -0500
Subject: [PATCH 24/42] directory_path should be a Path, not str

---
 tubesync/sync/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index b969ec4e..e0c23bf8 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -1264,7 +1264,7 @@ class Media(models.Model):
     @property
     def directory_path(self):
         dirname = self.source.directory_path / self.filename
-        return os.path.dirname(str(dirname))
+        return dirname.parent
 
     @property
     def filepath(self):

From 20527aa3c2cc5f67da7e53999d5a28c1055793bc Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 10:26:41 -0500
Subject: [PATCH 25/42] Update tests to not expect strings

---
 tubesync/sync/tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py
index 6aa0ccb6..8f0de6ef 100644
--- a/tubesync/sync/tests.py
+++ b/tubesync/sync/tests.py
@@ -597,11 +597,11 @@ class FilepathTestCase(TestCase):
         # Check child directories work
         self.source.media_format = '{yyyy}/{key}.{ext}'
         self.assertEqual(self.media.directory_path,
-                         str(self.source.directory_path / '2017'))
+                         self.source.directory_path / '2017')
         self.assertEqual(self.media.filename, '2017/mediakey.mkv')
         self.source.media_format = '{yyyy}/{yyyy_mm_dd}/{key}.{ext}'
         self.assertEqual(self.media.directory_path,
-                         str(self.source.directory_path / '2017/2017-09-11'))
+                         self.source.directory_path / '2017/2017-09-11')
         self.assertEqual(self.media.filename, '2017/2017-09-11/mediakey.mkv')
         # Check media specific media format keys work
         test_media = Media.objects.create(

From 495f62f25a3efaf668b9758c1cb8ef02476bbbf0 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 11:14:32 -0500
Subject: [PATCH 26/42] Display only the name part

`media.filename` is a `str`
`media.filepath` is a `Path`

The second has a simple way to remove directories, so use that instead of any custom string manipulation.
---
 tubesync/sync/templates/sync/media-item.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html
index 0c78f9b4..d6a74d9f 100644
--- a/tubesync/sync/templates/sync/media-item.html
+++ b/tubesync/sync/templates/sync/media-item.html
@@ -98,7 +98,7 @@
       {% if media.downloaded %}
       <tr title="The filename the media will be downloaded as">
         <td class="hide-on-small-only">Filename</td>
-        <td><span class="hide-on-med-and-up">Filename<br></span><strong>{{ media.filename }}</strong></td>
+        <td><span class="hide-on-med-and-up">Filename<br></span><strong>{{ media.filepath.name }}</strong></td>
       </tr>
       <tr title="The filename the media will be downloaded as">
         <td class="hide-on-small-only">Directory</td>

From 8a04139da9f300d6560b2aefebf4f9a7d92a118e Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 11:46:29 -0500
Subject: [PATCH 27/42] Add data items to make comparison easier

---
 tubesync/sync/views.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py
index 94e91432..52090042 100644
--- a/tubesync/sync/views.py
+++ b/tubesync/sync/views.py
@@ -582,6 +582,8 @@ class MediaItemView(DetailView):
         data['video_exact'] = video_exact
         data['video_format'] = video_format
         data['youtube_dl_format'] = self.object.get_format_str()
+        data['filename_path'] = pathlib.Path(self.object.filename)
+        data['media_file_path'] = pathlib.Path(self.object.media_file.path) if self.object.media_file else None
         return data
 
 

From d1c6236b9d7ba8848f61be1dae81b59ed7241c9c Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 11:54:56 -0500
Subject: [PATCH 28/42] Display the location the media was actually saved to

---
 tubesync/sync/templates/sync/media-item.html | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html
index d6a74d9f..a326b09b 100644
--- a/tubesync/sync/templates/sync/media-item.html
+++ b/tubesync/sync/templates/sync/media-item.html
@@ -100,10 +100,17 @@
         <td class="hide-on-small-only">Filename</td>
         <td><span class="hide-on-med-and-up">Filename<br></span><strong>{{ media.filepath.name }}</strong></td>
       </tr>
-      <tr title="The filename the media will be downloaded as">
+      <tr title="The directory the media will be downloaded to">
         <td class="hide-on-small-only">Directory</td>
         <td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ media.directory_path }}</strong></td>
       </tr>
+      <tr title="The filepath the media was saved to">
+        <td class="hide-on-small-only">Database&nbsp;Filepath</td>
+        <td><span class="hide-on-med-and-up">DB&nbsp;Filepath<br></span><strong>{{ media_file_path }}</strong>
+          {% if media_file_path == media.filepath %}
+          <span class="green-text">&nbsp;(matched)</span>
+          {% endif %}
+        </td>                                                                   </tr>
       <tr title="Size of the file on disk">
         <td class="hide-on-small-only">File size</td>
         <td><span class="hide-on-med-and-up">File size<br></span><strong>{{ media.downloaded_filesize|filesizeformat }}</strong></td>

From 67b4995448256dff1bc11c8f35339b3329c654b4 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Sun, 22 Dec 2024 16:16:06 -0500
Subject: [PATCH 29/42] Remove manual_skip check

---
 tubesync/sync/filtering.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py
index 4ca8a382..ff356edb 100644
--- a/tubesync/sync/filtering.py
+++ b/tubesync/sync/filtering.py
@@ -14,10 +14,6 @@ def filter_media(instance: Media):
     # Assume we aren't skipping it, if any of these conditions are true, we skip it
     skip = False
 
-    # Check if it has already been marked as skipped
-    if instance.manual_skip:
-        skip = True
-
     # Check if it's published
     if not skip and filter_published(instance):
         skip = True

From b19614cc9f95adda1e0247d5f51b7a68db016d6d Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Mon, 23 Dec 2024 01:28:46 -0500
Subject: [PATCH 30/42] Do not log download progress lines by default

When you set debugging, the default progress output can be viewed and/or logged, depending on the other options.
---
 tubesync/sync/youtube.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py
index cb612c69..446e456d 100644
--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@@ -151,6 +151,7 @@ def download_media(url, media_format, extension, output_file, info_json,
         'outtmpl': os.path.basename(output_file),
         'quiet': False if settings.DEBUG else True,
         'verbose': True if settings.DEBUG else False,
+        'noprogress': None if settings.DEBUG else True,
         'progress_hooks': [hook],
         'writeinfojson': info_json,
         'postprocessors': [],

From 2799d95119fe29dc3eb8d2892f753591c852d70e Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Mon, 23 Dec 2024 10:17:58 -0500
Subject: [PATCH 31/42] Only log new media

Channels with thousands of videos, that won't be downloaded, create large blocks in the logs without this.
---
 tubesync/sync/tasks.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 37983932..8fcc6630 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -191,7 +191,10 @@ def index_source_task(source_id):
         media.source = source
         try:
             media.save()
-            log.info(f'Indexed media: {source} / {media}')
+            log.debug(f'Indexed media: {source} / {media}')
+            # log the new media instances
+            if media.created >= source.last_crawl:
+                log.info(f'Indexed new media: {source} / {media}')
         except IntegrityError as e:
             log.error(f'Index media failed: {source} / {media} with "{e}"')
     # Tack on a cleanup of old completed tasks

From 3b41c8df8160197d9fb03b66fad9ef23e82c644d Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Mon, 23 Dec 2024 16:55:26 -0500
Subject: [PATCH 32/42] Remove duplicated ' to:'

---
 tubesync/sync/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 37983932..10350562 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -430,7 +430,7 @@ def download_media(media_id):
             copyfile(media.thumb.path, media.thumbpath)
         # If selected, write an NFO file
         if media.source.write_nfo:
-            log.info(f'Writing media NFO file to: to: {media.nfopath}')
+            log.info(f'Writing media NFO file to: {media.nfopath}')
             write_text_file(media.nfopath, media.nfoxml)
         # Schedule a task to update media servers
         for mediaserver in MediaServer.objects.all():

From 99899aac9c611c548473ba30ee4d2f85aae45d4e Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Mon, 23 Dec 2024 20:13:56 -0500
Subject: [PATCH 33/42] Use title from DB

We stored a stripped title string in the database, just use that instead of `.name` and calling `.strip()` again.
---
 tubesync/sync/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index 455a38a5..d558d4ec 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -1353,7 +1353,7 @@ class Media(models.Model):
         nfo.text = '\n  '
         # title = media metadata title
         title = nfo.makeelement('title', {})
-        title.text = clean_emoji(str(self.name).strip())
+        title.text = clean_emoji(self.title)
         title.tail = '\n  '
         nfo.append(title)
         # showtitle = source name

From 01f0d827884899d2ac9a4a0a276766a72c8906a6 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 00:07:57 -0500
Subject: [PATCH 34/42] Cleanup removed media that has not been downloaded too

---
 tubesync/sync/tasks.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 37983932..fe692830 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -146,12 +146,12 @@ def cleanup_old_media():
 
 
 def cleanup_removed_media(source, videos):
-    media_objects = Media.objects.filter(source=source, downloaded=True)
-    for item in media_objects:
-        matching_source_item = [video['id'] for video in videos if video['id'] == item.key]
+    media_objects = Media.objects.filter(source=source)
+    for media in media_objects:
+        matching_source_item = [video['id'] for video in videos if video['id'] == media.key]
         if not matching_source_item:
-            log.info(f'{item.title} is no longer in source, removing')
-            item.delete()
+            log.info(f'{media.name} is no longer in source, removing')
+            media.delete()
 
 
 @background(schedule=0)

From 6c4d31765ae36565f8db37c982cf72c4f33cd1d9 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 00:16:18 -0500
Subject: [PATCH 35/42] Made messages more human friendly

---
 tubesync/sync/tasks.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 37983932..176b9186 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -199,7 +199,7 @@ def index_source_task(source_id):
     # Tack on a cleanup of old media
     cleanup_old_media()
     if source.delete_removed_media:
-        log.info(f'Cleaning up media no longer in source {source}')
+        log.info(f'Cleaning up media no longer in source: {source}')
         cleanup_removed_media(source, videos)
 
 
@@ -236,7 +236,7 @@ def download_source_images(source_id):
                   f'source exists with ID: {source_id}')
         return
     avatar, banner = source.get_image_url
-    log.info(f'Thumbnail URL for source with ID: {source_id} '
+    log.info(f'Thumbnail URL for source with ID: {source_id} / {source} '
         f'Avatar: {avatar} '
         f'Banner: {banner}')
     if banner != None:
@@ -269,7 +269,7 @@ def download_source_images(source_id):
             with open(file_path, 'wb') as f:
                 f.write(django_file.read())
 
-    log.info(f'Thumbnail downloaded for source with ID: {source_id}')
+    log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}')
 
 
 @background(schedule=0)
@@ -285,7 +285,7 @@ def download_media_metadata(media_id):
                   f'media exists with ID: {media_id}')
         return
     if media.manual_skip:
-        log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.')
+        log.info(f'Task for ID: {media_id} / {media} skipped, due to task being manually skipped.')
         return
     source = media.source
     metadata = media.index_metadata()
@@ -306,7 +306,7 @@ def download_media_metadata(media_id):
     # Don't filter media here, the post_save signal will handle that
     media.save()
     log.info(f'Saved {len(media.metadata)} bytes of metadata for: '
-             f'{source} / {media_id}')
+             f'{source} / {media}: {media_id}')
 
 
 @background(schedule=0)

From 0795eb951d7918c3ee3333494f1952014068cbd5 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 00:21:58 -0500
Subject: [PATCH 36/42] Consistency fixes

The `filepath` variable is defined before the `if` and used throughout the first block, so we should use it in the `else` block too.

Otherwise, a change to that variable would change the first block but not the second block.
---
 tubesync/sync/tasks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 10350562..f58fce36 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -359,7 +359,7 @@ def download_media(media_id):
         return
     if media.skip:
         # Media was toggled to be skipped after the task was scheduled
-        log.warn(f'Download task triggered for  media: {media} (UUID: {media.pk}) but '
+        log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but '
                  f'it is now marked to be skipped, not downloading')
         return
     if media.downloaded and media.media_file:
@@ -446,7 +446,7 @@ def download_media(media_id):
     else:
         # Expected file doesn't exist on disk
         err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, '
-               f'expected outfile does not exist: {media.filepath}')
+               f'expected outfile does not exist: {filepath}')
         log.error(err)
         # Raising an error here triggers the task to be re-attempted (or fail)
         raise DownloadFailedException(err)

From 7343e39f35bc466ca6efdea387787ee7083ec7f9 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 00:32:03 -0500
Subject: [PATCH 37/42] Verbose output is no longer needed

I confirmed that the changes reduced the amount of files being removed.
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index b89a48b6..b33af383 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -189,7 +189,7 @@ RUN set -x && \
   apt-get -y autoclean && \
   rm -rf /var/lib/apt/lists/* && \
   rm -rf /var/cache/apt/* && \
-  rm -rfv /tmp/*
+  rm -rf /tmp/*
 
 
 # Copy app

From 8def84c7b4b4ee510eee8ae449ef031bb311bff3 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 01:17:04 -0500
Subject: [PATCH 38/42] Use filename_path

It's more directly what we want here.

If `filepath` ever changed to not use `filename` the value would be incorrect, so this is the cleaner way to display this value.
---
 tubesync/sync/templates/sync/media-item.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html
index a326b09b..6f751be6 100644
--- a/tubesync/sync/templates/sync/media-item.html
+++ b/tubesync/sync/templates/sync/media-item.html
@@ -98,7 +98,7 @@
       {% if media.downloaded %}
       <tr title="The filename the media will be downloaded as">
         <td class="hide-on-small-only">Filename</td>
-        <td><span class="hide-on-med-and-up">Filename<br></span><strong>{{ media.filepath.name }}</strong></td>
+        <td><span class="hide-on-med-and-up">Filename<br></span><strong>{{ filename_path.name }}</strong></td>
       </tr>
       <tr title="The directory the media will be downloaded to">
         <td class="hide-on-small-only">Directory</td>

From 3cea9270a0a14fbf245d7f334212d65096a3944f Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 05:09:30 -0500
Subject: [PATCH 39/42] Bump ffmpeg

---
 Dockerfile | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 4941821b..120a8424 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,10 +8,10 @@ ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a90
 ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f66f785"
 ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae"
 
-ARG FFMPEG_DATE="autobuild-2024-12-09-14-16"
-ARG FFMPEG_VERSION="118034-gd21134313f"
-ARG SHA256_FFMPEG_AMD64="cd50122fb0939e913585282347a8f95074c2d5477ceb059cd90aca551f14e9ea"
-ARG SHA256_FFMPEG_ARM64="33b4edebf9c23701473ba8db696b26072bb9b9c05fc4a156e115f94e44d361e0"
+ARG FFMPEG_DATE="autobuild-2024-12-24-14-15"
+ARG FFMPEG_VERSION="n7.1-62-gb168ed9b14"
+ARG SHA256_FFMPEG_AMD64="56f7ae56ee3cf5906006fb5845d963cae3580513a22d84236e82bc307c3d6fd5"
+ARG SHA256_FFMPEG_ARM64="2872f0ecfe791c9d9837b2563af4e77dc862c766abef5108c84e082cab5fad1f"
 
 ENV S6_VERSION="${S6_VERSION}" \
   FFMPEG_DATE="${FFMPEG_DATE}" \
@@ -49,12 +49,16 @@ RUN decide_arch() { \
     decide_url() { \
       case "${1}" in \
         (ffmpeg) printf -- \
-          'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-N-%s-linux%s-gpl.tar.xz' \
+          'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-%s-linux%s-gpl%s.tar.xz' \
           "${FFMPEG_DATE}" \
           "${FFMPEG_VERSION}" \
           "$(case "${2}" in \
             (amd64) printf -- '64' ;; \
             (*) printf -- '%s' "${2}" ;; \
+          esac)" \
+          "$(case "${FFMPEG_VERSION%%-*}" in \
+            (n*) printf -- '-%s\n' "${FFMPEG_VERSION#n}" | cut -d '-' -f 1,2 ;; \
+            (*) printf -- '' ;; \
           esac)" ;; \
         (s6) printf -- \
           'https://github.com/just-containers/s6-overlay/releases/download/v%s/s6-overlay-%s.tar.xz' \

From 568698351dd6977879eaa6777561e41ba1b69a80 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 05:23:53 -0500
Subject: [PATCH 40/42] Use the N version of ffmpeg

---
 Dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 120a8424..ad0faec2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,9 +9,9 @@ ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f6
 ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae"
 
 ARG FFMPEG_DATE="autobuild-2024-12-24-14-15"
-ARG FFMPEG_VERSION="n7.1-62-gb168ed9b14"
-ARG SHA256_FFMPEG_AMD64="56f7ae56ee3cf5906006fb5845d963cae3580513a22d84236e82bc307c3d6fd5"
-ARG SHA256_FFMPEG_ARM64="2872f0ecfe791c9d9837b2563af4e77dc862c766abef5108c84e082cab5fad1f"
+ARG FFMPEG_VERSION="N-118163-g954d55c2a4"
+ARG SHA256_FFMPEG_AMD64="798a7e5a0724139e6bb70df8921522b23be27028f9f551dfa83c305ec4ffaf3a"
+ARG SHA256_FFMPEG_ARM64="c3e6cc0fec42cc7e3804014fbb02c1384a1a31ef13f6f9a36121f2e1216240c0"
 
 ENV S6_VERSION="${S6_VERSION}" \
   FFMPEG_DATE="${FFMPEG_DATE}" \

From 942452c6a28fd3d64fdcc6af6c212a112b0bb124 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 10:53:25 -0500
Subject: [PATCH 41/42] Skip media without any formats

---
 tubesync/sync/models.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py
index 455a38a5..15daf5ba 100644
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@@ -1499,7 +1499,16 @@ class Media(models.Model):
         if not callable(indexer):
             raise Exception(f'Media with source type f"{self.source.source_type}" '
                             f'has no indexer')
-        return indexer(self.url)
+        response = indexer(self.url)
+        no_formats_available = (
+            not response or
+            "formats" not in response.keys() or
+            0 == len(response["formats"])
+        )
+        if no_formats_available:
+            self.can_download = False
+            self.skip = True
+        return response
 
     def calculate_episode_number(self):
         if self.source.source_type == Source.SOURCE_TYPE_YOUTUBE_PLAYLIST:

From d74e6bf2cadaed4ac1c11800770367737e1f1a7b Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Wed, 25 Dec 2024 20:16:07 -0500
Subject: [PATCH 42/42] Avoid the unlikely possibility of None comparison

---
 tubesync/sync/tasks.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py
index 8fcc6630..0dcacbbc 100644
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@@ -193,7 +193,12 @@ def index_source_task(source_id):
             media.save()
             log.debug(f'Indexed media: {source} / {media}')
             # log the new media instances
-            if media.created >= source.last_crawl:
+            new_media_instance = (
+                media.created and
+                source.last_crawl and
+                media.created >= source.last_crawl
+            )
+            if new_media_instance:
                 log.info(f'Indexed new media: {source} / {media}')
         except IntegrityError as e:
             log.error(f'Index media failed: {source} / {media} with "{e}"')