From ea8223d86b89e25dce656fb613d35112ab27e159 Mon Sep 17 00:00:00 2001 From: Tim Date: Thu, 11 Jul 2024 16:17:29 +0800 Subject: [PATCH] move filtering to own module. Add filtering for days_to_keep --- tubesync/sync/filtering.py | 98 ++++++++++++++++++++++++++++++++++++++ tubesync/sync/signals.py | 69 +-------------------------- tubesync/sync/tests.py | 14 +++--- 3 files changed, 107 insertions(+), 74 deletions(-) create mode 100644 tubesync/sync/filtering.py diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py new file mode 100644 index 00000000..591b4e57 --- /dev/null +++ b/tubesync/sync/filtering.py @@ -0,0 +1,98 @@ +''' + All the logic for filtering media from channels to work out if we should skip downloading it or not +''' + +from common.logger import log +from .models import Source, Media, MediaServer +from datetime import datetime, timedelta +from django.utils import timezone + +# Check the filter conditions for instance, return is if the Skip property has changed so we can do other things +def filter_media(instance: Media): + # Assume we aren't skipping it, if any of these conditions are true, we skip it + skip = False + + # Check if it's published + if filter_published(instance): + skip = True + + # Check if older than max_cap_age, skip + if filter_max_cap(instance): + skip = True + + # Check if older than source_cutoff + if filter_source_cutoff(instance): + skip = True + + # Check if we have filter_text and filter text matches, set unskip + if filter_filter_text(instance): + skip = True + + # Check if skipping + if instance.skip != skip: + instance.skip = skip + log.warn(f'Media: {instance.source} / {instance} has changed skip setting to {skip}') + return True + + return False + + +def filter_published(instance: Media): + # Check if the instance is not published, we have to skip then + if not instance.published: + log.warn(f'Media: {instance.source} / {instance} has no published date ' + f'set, marking to be skipped') + return True + return False + + +# Return True if we are to skip downloading it based on video title not matching the filter text +def filter_filter_text(instance: Media): + filter_text = instance.source.filter_text.strip() + + if not filter_text: + return False + + # We match the filter text, so don't skip downloading this + if instance.source.is_regex_match(instance.title): + log.info(f'Media: {instance.source} / {instance} has a valid ' + f'title filter, marking to be unskipped') + return False + + log.info(f'Media: {instance.source} / {instance} doesn\'t match ' + f'title filter, marking to be skipped') + + return True + + +def filter_max_cap(instance: Media): + max_cap_age = instance.source.download_cap_date + if not max_cap_age: + log.debug(f'Media: {instance.source} / {instance} has not max_cap_age ' + f'so not skipping based on max_cap_age') + return False + + if instance.published <= max_cap_age: + log.info(f'Media: {instance.source} / {instance} is too old for ' + f'the download cap date, marking to be skipped') + return True + + return False + + +# If the source has a cut-off, check the upload date is within the allowed delta +def filter_source_cutoff(instance: Media): + if instance.source.delete_old_media and instance.source.days_to_keep > 0: + if not isinstance(instance.published, datetime): + # Media has no known published date or incomplete metadata + log.warn(f'Media: {instance.source} / {instance} has no published date, skipping') + return True + + delta = timezone.now() - timedelta(days=instance.source.days_to_keep) + if instance.published < delta: + # Media was published after the cutoff date, skip it + log.warn(f'Media: {instance.source} / {instance} is older than ' + f'{instance.source.days_to_keep} days, skipping') + return True + + return False \ No newline at end of file diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index df9e7224..5d436517 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -13,6 +13,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images) from .utils import delete_file +from .filtering import filter_media @receiver(pre_save, sender=Source) @@ -110,7 +111,7 @@ def media_post_save(sender, instance, created, **kwargs): # Reset the skip flag if the download cap has changed if the media has not # already been downloaded if not instance.downloaded and instance.metadata: - skip_changed = filter_instance(instance) + skip_changed = filter_media(instance) # Recalculate the "can_download" flag, this may # need to change if the source specifications have been changed @@ -172,73 +173,7 @@ def media_post_save(sender, instance, created, **kwargs): ) -# Check the filter conditions for instance, return is if the Skip property has changed so we can do other things -def filter_instance(instance): - # Assume we aren't skipping it, if any of these conditions are true, we skip it - skip = False - # Check if it's published - if filter_instance_published(instance): - skip = True - - # Check if older than max_cap_age, skip - if filter_instance_max_cap(instance): - skip = True - - # Check if we have filter_text and filter text matches, set unskip - if filter_instance_filter_text(instance): - skip = True - - # Check if skipping - if instance.skip != skip: - instance.skip = skip - log.warn(f'Media: {instance.source} / {instance} has changed skip setting to {skip}') - return True - - return False - - -def filter_instance_published(instance): - # Check if the instance is not published, we have to skip then - if not instance.published: - log.warn(f'Media: {instance.source} / {instance} has no published date ' - f'set, marking to be skipped') - return True - return False - - -# Return True if we are to skip downloading it based on filter text not matching -def filter_instance_filter_text(instance): - filter_text = instance.source.filter_text.strip() - - if not filter_text: - return False - - # We match the filter text, so don't skip downloading this - if instance.source.is_regex_match(instance.title): - log.info(f'Media: {instance.source} / {instance} has a valid ' - f'title filter, marking to be unskipped') - return False - - log.info(f'Media: {instance.source} / {instance} doesn\'t match ' - f'title filter, marking to be skipped') - - return True - - -def filter_instance_max_cap(instance): - max_cap_age = instance.source.download_cap_date - if not max_cap_age: - log.debug(f'Media: {instance.source} / {instance} has not max_cap_age ' - f'so not skipping based on max_cap_age') - return False - - if instance.published <= max_cap_age: - log.info(f'Media: {instance.source} / {instance} is too old for ' - f'the download cap date, marking to be skipped') - return True - - return False @receiver(pre_delete, sender=Media) def media_pre_delete(sender, instance, **kwargs): diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index e1ea7494..e9d8d986 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -15,7 +15,7 @@ from django.utils import timezone from background_task.models import Task from .models import Source, Media from .tasks import cleanup_old_media -from .signals import filter_instance +from .filtering import filter_media class FrontEndTestCase(TestCase): @@ -735,7 +735,7 @@ class MediaFilterTestCase(TestCase): # Check if unpublished that we skip download it self.media.skip = False self.media.published = False - changed = filter_instance(self.media) + changed = filter_media(self.media) self.assertTrue(changed) self.assertTrue(self.media.skip) @@ -744,7 +744,7 @@ class MediaFilterTestCase(TestCase): self.media.skip = True self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, minute=1, second=1)) - changed = filter_instance(self.media) + changed = filter_media(self.media) self.assertTrue(changed) self.assertFalse(self.media.skip) @@ -754,7 +754,7 @@ class MediaFilterTestCase(TestCase): self.media.skip = False self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, minute=1, second=1)) - changed = filter_instance(self.media) + changed = filter_media(self.media) self.assertTrue(changed) self.assertTrue(self.media.skip) @@ -764,7 +764,7 @@ class MediaFilterTestCase(TestCase): self.media.skip = True self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, minute=1, second=1)) - changed = filter_instance(self.media) + changed = filter_media(self.media) self.assertTrue(changed) self.assertFalse(self.media.skip) @@ -774,7 +774,7 @@ class MediaFilterTestCase(TestCase): self.media.skip = False self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, minute=1, second=1)) - changed = filter_instance(self.media) + changed = filter_media(self.media) self.assertTrue(changed) self.assertTrue(self.media.skip) @@ -783,7 +783,7 @@ class MediaFilterTestCase(TestCase): self.media.source.download_cap = 3600 self.media.skip = True self.media.published = timezone.now() - changed = filter_instance(self.media) + changed = filter_media(self.media) self.assertTrue(changed) self.assertFalse(self.media.skip)