move filtering to own module. Add filtering for days_to_keep

This commit is contained in:
Tim 2024-07-11 16:17:29 +08:00
parent 6e8c680770
commit ea8223d86b
3 changed files with 107 additions and 74 deletions

View File

@ -0,0 +1,98 @@
'''
All the logic for filtering media from channels to work out if we should skip downloading it or not
'''
from common.logger import log
from .models import Source, Media, MediaServer
from datetime import datetime, timedelta
from django.utils import timezone
# Check the filter conditions for instance, return is if the Skip property has changed so we can do other things
def filter_media(instance: Media):
# Assume we aren't skipping it, if any of these conditions are true, we skip it
skip = False
# Check if it's published
if filter_published(instance):
skip = True
# Check if older than max_cap_age, skip
if filter_max_cap(instance):
skip = True
# Check if older than source_cutoff
if filter_source_cutoff(instance):
skip = True
# Check if we have filter_text and filter text matches, set unskip
if filter_filter_text(instance):
skip = True
# Check if skipping
if instance.skip != skip:
instance.skip = skip
log.warn(f'Media: {instance.source} / {instance} has changed skip setting to {skip}')
return True
return False
def filter_published(instance: Media):
# Check if the instance is not published, we have to skip then
if not instance.published:
log.warn(f'Media: {instance.source} / {instance} has no published date '
f'set, marking to be skipped')
return True
return False
# Return True if we are to skip downloading it based on video title not matching the filter text
def filter_filter_text(instance: Media):
filter_text = instance.source.filter_text.strip()
if not filter_text:
return False
# We match the filter text, so don't skip downloading this
if instance.source.is_regex_match(instance.title):
log.info(f'Media: {instance.source} / {instance} has a valid '
f'title filter, marking to be unskipped')
return False
log.info(f'Media: {instance.source} / {instance} doesn\'t match '
f'title filter, marking to be skipped')
return True
def filter_max_cap(instance: Media):
max_cap_age = instance.source.download_cap_date
if not max_cap_age:
log.debug(f'Media: {instance.source} / {instance} has not max_cap_age '
f'so not skipping based on max_cap_age')
return False
if instance.published <= max_cap_age:
log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped')
return True
return False
# If the source has a cut-off, check the upload date is within the allowed delta
def filter_source_cutoff(instance: Media):
if instance.source.delete_old_media and instance.source.days_to_keep > 0:
if not isinstance(instance.published, datetime):
# Media has no known published date or incomplete metadata
log.warn(f'Media: {instance.source} / {instance} has no published date, skipping')
return True
delta = timezone.now() - timedelta(days=instance.source.days_to_keep)
if instance.published < delta:
# Media was published after the cutoff date, skip it
log.warn(f'Media: {instance.source} / {instance} is older than '
f'{instance.source.days_to_keep} days, skipping')
return True
return False

View File

@ -13,6 +13,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
map_task_to_instance, check_source_directory_exists, map_task_to_instance, check_source_directory_exists,
download_media, rescan_media_server, download_source_images) download_media, rescan_media_server, download_source_images)
from .utils import delete_file from .utils import delete_file
from .filtering import filter_media
@receiver(pre_save, sender=Source) @receiver(pre_save, sender=Source)
@ -110,7 +111,7 @@ def media_post_save(sender, instance, created, **kwargs):
# Reset the skip flag if the download cap has changed if the media has not # Reset the skip flag if the download cap has changed if the media has not
# already been downloaded # already been downloaded
if not instance.downloaded and instance.metadata: if not instance.downloaded and instance.metadata:
skip_changed = filter_instance(instance) skip_changed = filter_media(instance)
# Recalculate the "can_download" flag, this may # Recalculate the "can_download" flag, this may
# need to change if the source specifications have been changed # need to change if the source specifications have been changed
@ -172,73 +173,7 @@ def media_post_save(sender, instance, created, **kwargs):
) )
# Check the filter conditions for instance, return is if the Skip property has changed so we can do other things
def filter_instance(instance):
# Assume we aren't skipping it, if any of these conditions are true, we skip it
skip = False
# Check if it's published
if filter_instance_published(instance):
skip = True
# Check if older than max_cap_age, skip
if filter_instance_max_cap(instance):
skip = True
# Check if we have filter_text and filter text matches, set unskip
if filter_instance_filter_text(instance):
skip = True
# Check if skipping
if instance.skip != skip:
instance.skip = skip
log.warn(f'Media: {instance.source} / {instance} has changed skip setting to {skip}')
return True
return False
def filter_instance_published(instance):
# Check if the instance is not published, we have to skip then
if not instance.published:
log.warn(f'Media: {instance.source} / {instance} has no published date '
f'set, marking to be skipped')
return True
return False
# Return True if we are to skip downloading it based on filter text not matching
def filter_instance_filter_text(instance):
filter_text = instance.source.filter_text.strip()
if not filter_text:
return False
# We match the filter text, so don't skip downloading this
if instance.source.is_regex_match(instance.title):
log.info(f'Media: {instance.source} / {instance} has a valid '
f'title filter, marking to be unskipped')
return False
log.info(f'Media: {instance.source} / {instance} doesn\'t match '
f'title filter, marking to be skipped')
return True
def filter_instance_max_cap(instance):
max_cap_age = instance.source.download_cap_date
if not max_cap_age:
log.debug(f'Media: {instance.source} / {instance} has not max_cap_age '
f'so not skipping based on max_cap_age')
return False
if instance.published <= max_cap_age:
log.info(f'Media: {instance.source} / {instance} is too old for '
f'the download cap date, marking to be skipped')
return True
return False
@receiver(pre_delete, sender=Media) @receiver(pre_delete, sender=Media)
def media_pre_delete(sender, instance, **kwargs): def media_pre_delete(sender, instance, **kwargs):

View File

@ -15,7 +15,7 @@ from django.utils import timezone
from background_task.models import Task from background_task.models import Task
from .models import Source, Media from .models import Source, Media
from .tasks import cleanup_old_media from .tasks import cleanup_old_media
from .signals import filter_instance from .filtering import filter_media
class FrontEndTestCase(TestCase): class FrontEndTestCase(TestCase):
@ -735,7 +735,7 @@ class MediaFilterTestCase(TestCase):
# Check if unpublished that we skip download it # Check if unpublished that we skip download it
self.media.skip = False self.media.skip = False
self.media.published = False self.media.published = False
changed = filter_instance(self.media) changed = filter_media(self.media)
self.assertTrue(changed) self.assertTrue(changed)
self.assertTrue(self.media.skip) self.assertTrue(self.media.skip)
@ -744,7 +744,7 @@ class MediaFilterTestCase(TestCase):
self.media.skip = True self.media.skip = True
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
minute=1, second=1)) minute=1, second=1))
changed = filter_instance(self.media) changed = filter_media(self.media)
self.assertTrue(changed) self.assertTrue(changed)
self.assertFalse(self.media.skip) self.assertFalse(self.media.skip)
@ -754,7 +754,7 @@ class MediaFilterTestCase(TestCase):
self.media.skip = False self.media.skip = False
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
minute=1, second=1)) minute=1, second=1))
changed = filter_instance(self.media) changed = filter_media(self.media)
self.assertTrue(changed) self.assertTrue(changed)
self.assertTrue(self.media.skip) self.assertTrue(self.media.skip)
@ -764,7 +764,7 @@ class MediaFilterTestCase(TestCase):
self.media.skip = True self.media.skip = True
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
minute=1, second=1)) minute=1, second=1))
changed = filter_instance(self.media) changed = filter_media(self.media)
self.assertTrue(changed) self.assertTrue(changed)
self.assertFalse(self.media.skip) self.assertFalse(self.media.skip)
@ -774,7 +774,7 @@ class MediaFilterTestCase(TestCase):
self.media.skip = False self.media.skip = False
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1, self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
minute=1, second=1)) minute=1, second=1))
changed = filter_instance(self.media) changed = filter_media(self.media)
self.assertTrue(changed) self.assertTrue(changed)
self.assertTrue(self.media.skip) self.assertTrue(self.media.skip)
@ -783,7 +783,7 @@ class MediaFilterTestCase(TestCase):
self.media.source.download_cap = 3600 self.media.source.download_cap = 3600
self.media.skip = True self.media.skip = True
self.media.published = timezone.now() self.media.published = timezone.now()
changed = filter_instance(self.media) changed = filter_media(self.media)
self.assertTrue(changed) self.assertTrue(changed)
self.assertFalse(self.media.skip) self.assertFalse(self.media.skip)