mirror of
https://github.com/meeb/tubesync.git
synced 2025-06-24 14:06:36 +00:00
move filtering to own module. Add filtering for days_to_keep
This commit is contained in:
parent
6e8c680770
commit
ea8223d86b
98
tubesync/sync/filtering.py
Normal file
98
tubesync/sync/filtering.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
'''
|
||||||
|
All the logic for filtering media from channels to work out if we should skip downloading it or not
|
||||||
|
'''
|
||||||
|
|
||||||
|
from common.logger import log
|
||||||
|
from .models import Source, Media, MediaServer
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
# Check the filter conditions for instance, return is if the Skip property has changed so we can do other things
|
||||||
|
def filter_media(instance: Media):
|
||||||
|
# Assume we aren't skipping it, if any of these conditions are true, we skip it
|
||||||
|
skip = False
|
||||||
|
|
||||||
|
# Check if it's published
|
||||||
|
if filter_published(instance):
|
||||||
|
skip = True
|
||||||
|
|
||||||
|
# Check if older than max_cap_age, skip
|
||||||
|
if filter_max_cap(instance):
|
||||||
|
skip = True
|
||||||
|
|
||||||
|
# Check if older than source_cutoff
|
||||||
|
if filter_source_cutoff(instance):
|
||||||
|
skip = True
|
||||||
|
|
||||||
|
# Check if we have filter_text and filter text matches, set unskip
|
||||||
|
if filter_filter_text(instance):
|
||||||
|
skip = True
|
||||||
|
|
||||||
|
# Check if skipping
|
||||||
|
if instance.skip != skip:
|
||||||
|
instance.skip = skip
|
||||||
|
log.warn(f'Media: {instance.source} / {instance} has changed skip setting to {skip}')
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def filter_published(instance: Media):
|
||||||
|
# Check if the instance is not published, we have to skip then
|
||||||
|
if not instance.published:
|
||||||
|
log.warn(f'Media: {instance.source} / {instance} has no published date '
|
||||||
|
f'set, marking to be skipped')
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# Return True if we are to skip downloading it based on video title not matching the filter text
|
||||||
|
def filter_filter_text(instance: Media):
|
||||||
|
filter_text = instance.source.filter_text.strip()
|
||||||
|
|
||||||
|
if not filter_text:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# We match the filter text, so don't skip downloading this
|
||||||
|
if instance.source.is_regex_match(instance.title):
|
||||||
|
log.info(f'Media: {instance.source} / {instance} has a valid '
|
||||||
|
f'title filter, marking to be unskipped')
|
||||||
|
return False
|
||||||
|
|
||||||
|
log.info(f'Media: {instance.source} / {instance} doesn\'t match '
|
||||||
|
f'title filter, marking to be skipped')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def filter_max_cap(instance: Media):
|
||||||
|
max_cap_age = instance.source.download_cap_date
|
||||||
|
if not max_cap_age:
|
||||||
|
log.debug(f'Media: {instance.source} / {instance} has not max_cap_age '
|
||||||
|
f'so not skipping based on max_cap_age')
|
||||||
|
return False
|
||||||
|
|
||||||
|
if instance.published <= max_cap_age:
|
||||||
|
log.info(f'Media: {instance.source} / {instance} is too old for '
|
||||||
|
f'the download cap date, marking to be skipped')
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# If the source has a cut-off, check the upload date is within the allowed delta
|
||||||
|
def filter_source_cutoff(instance: Media):
|
||||||
|
if instance.source.delete_old_media and instance.source.days_to_keep > 0:
|
||||||
|
if not isinstance(instance.published, datetime):
|
||||||
|
# Media has no known published date or incomplete metadata
|
||||||
|
log.warn(f'Media: {instance.source} / {instance} has no published date, skipping')
|
||||||
|
return True
|
||||||
|
|
||||||
|
delta = timezone.now() - timedelta(days=instance.source.days_to_keep)
|
||||||
|
if instance.published < delta:
|
||||||
|
# Media was published after the cutoff date, skip it
|
||||||
|
log.warn(f'Media: {instance.source} / {instance} is older than '
|
||||||
|
f'{instance.source.days_to_keep} days, skipping')
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
@ -13,6 +13,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
|
|||||||
map_task_to_instance, check_source_directory_exists,
|
map_task_to_instance, check_source_directory_exists,
|
||||||
download_media, rescan_media_server, download_source_images)
|
download_media, rescan_media_server, download_source_images)
|
||||||
from .utils import delete_file
|
from .utils import delete_file
|
||||||
|
from .filtering import filter_media
|
||||||
|
|
||||||
|
|
||||||
@receiver(pre_save, sender=Source)
|
@receiver(pre_save, sender=Source)
|
||||||
@ -110,7 +111,7 @@ def media_post_save(sender, instance, created, **kwargs):
|
|||||||
# Reset the skip flag if the download cap has changed if the media has not
|
# Reset the skip flag if the download cap has changed if the media has not
|
||||||
# already been downloaded
|
# already been downloaded
|
||||||
if not instance.downloaded and instance.metadata:
|
if not instance.downloaded and instance.metadata:
|
||||||
skip_changed = filter_instance(instance)
|
skip_changed = filter_media(instance)
|
||||||
|
|
||||||
# Recalculate the "can_download" flag, this may
|
# Recalculate the "can_download" flag, this may
|
||||||
# need to change if the source specifications have been changed
|
# need to change if the source specifications have been changed
|
||||||
@ -172,73 +173,7 @@ def media_post_save(sender, instance, created, **kwargs):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Check the filter conditions for instance, return is if the Skip property has changed so we can do other things
|
|
||||||
def filter_instance(instance):
|
|
||||||
# Assume we aren't skipping it, if any of these conditions are true, we skip it
|
|
||||||
skip = False
|
|
||||||
|
|
||||||
# Check if it's published
|
|
||||||
if filter_instance_published(instance):
|
|
||||||
skip = True
|
|
||||||
|
|
||||||
# Check if older than max_cap_age, skip
|
|
||||||
if filter_instance_max_cap(instance):
|
|
||||||
skip = True
|
|
||||||
|
|
||||||
# Check if we have filter_text and filter text matches, set unskip
|
|
||||||
if filter_instance_filter_text(instance):
|
|
||||||
skip = True
|
|
||||||
|
|
||||||
# Check if skipping
|
|
||||||
if instance.skip != skip:
|
|
||||||
instance.skip = skip
|
|
||||||
log.warn(f'Media: {instance.source} / {instance} has changed skip setting to {skip}')
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def filter_instance_published(instance):
|
|
||||||
# Check if the instance is not published, we have to skip then
|
|
||||||
if not instance.published:
|
|
||||||
log.warn(f'Media: {instance.source} / {instance} has no published date '
|
|
||||||
f'set, marking to be skipped')
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# Return True if we are to skip downloading it based on filter text not matching
|
|
||||||
def filter_instance_filter_text(instance):
|
|
||||||
filter_text = instance.source.filter_text.strip()
|
|
||||||
|
|
||||||
if not filter_text:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# We match the filter text, so don't skip downloading this
|
|
||||||
if instance.source.is_regex_match(instance.title):
|
|
||||||
log.info(f'Media: {instance.source} / {instance} has a valid '
|
|
||||||
f'title filter, marking to be unskipped')
|
|
||||||
return False
|
|
||||||
|
|
||||||
log.info(f'Media: {instance.source} / {instance} doesn\'t match '
|
|
||||||
f'title filter, marking to be skipped')
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def filter_instance_max_cap(instance):
|
|
||||||
max_cap_age = instance.source.download_cap_date
|
|
||||||
if not max_cap_age:
|
|
||||||
log.debug(f'Media: {instance.source} / {instance} has not max_cap_age '
|
|
||||||
f'so not skipping based on max_cap_age')
|
|
||||||
return False
|
|
||||||
|
|
||||||
if instance.published <= max_cap_age:
|
|
||||||
log.info(f'Media: {instance.source} / {instance} is too old for '
|
|
||||||
f'the download cap date, marking to be skipped')
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
@receiver(pre_delete, sender=Media)
|
@receiver(pre_delete, sender=Media)
|
||||||
def media_pre_delete(sender, instance, **kwargs):
|
def media_pre_delete(sender, instance, **kwargs):
|
||||||
|
@ -15,7 +15,7 @@ from django.utils import timezone
|
|||||||
from background_task.models import Task
|
from background_task.models import Task
|
||||||
from .models import Source, Media
|
from .models import Source, Media
|
||||||
from .tasks import cleanup_old_media
|
from .tasks import cleanup_old_media
|
||||||
from .signals import filter_instance
|
from .filtering import filter_media
|
||||||
|
|
||||||
|
|
||||||
class FrontEndTestCase(TestCase):
|
class FrontEndTestCase(TestCase):
|
||||||
@ -735,7 +735,7 @@ class MediaFilterTestCase(TestCase):
|
|||||||
# Check if unpublished that we skip download it
|
# Check if unpublished that we skip download it
|
||||||
self.media.skip = False
|
self.media.skip = False
|
||||||
self.media.published = False
|
self.media.published = False
|
||||||
changed = filter_instance(self.media)
|
changed = filter_media(self.media)
|
||||||
self.assertTrue(changed)
|
self.assertTrue(changed)
|
||||||
self.assertTrue(self.media.skip)
|
self.assertTrue(self.media.skip)
|
||||||
|
|
||||||
@ -744,7 +744,7 @@ class MediaFilterTestCase(TestCase):
|
|||||||
self.media.skip = True
|
self.media.skip = True
|
||||||
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
||||||
minute=1, second=1))
|
minute=1, second=1))
|
||||||
changed = filter_instance(self.media)
|
changed = filter_media(self.media)
|
||||||
self.assertTrue(changed)
|
self.assertTrue(changed)
|
||||||
self.assertFalse(self.media.skip)
|
self.assertFalse(self.media.skip)
|
||||||
|
|
||||||
@ -754,7 +754,7 @@ class MediaFilterTestCase(TestCase):
|
|||||||
self.media.skip = False
|
self.media.skip = False
|
||||||
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
||||||
minute=1, second=1))
|
minute=1, second=1))
|
||||||
changed = filter_instance(self.media)
|
changed = filter_media(self.media)
|
||||||
self.assertTrue(changed)
|
self.assertTrue(changed)
|
||||||
self.assertTrue(self.media.skip)
|
self.assertTrue(self.media.skip)
|
||||||
|
|
||||||
@ -764,7 +764,7 @@ class MediaFilterTestCase(TestCase):
|
|||||||
self.media.skip = True
|
self.media.skip = True
|
||||||
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
||||||
minute=1, second=1))
|
minute=1, second=1))
|
||||||
changed = filter_instance(self.media)
|
changed = filter_media(self.media)
|
||||||
self.assertTrue(changed)
|
self.assertTrue(changed)
|
||||||
self.assertFalse(self.media.skip)
|
self.assertFalse(self.media.skip)
|
||||||
|
|
||||||
@ -774,7 +774,7 @@ class MediaFilterTestCase(TestCase):
|
|||||||
self.media.skip = False
|
self.media.skip = False
|
||||||
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
self.media.published = timezone.make_aware(datetime(year=2020, month=1, day=1, hour=1,
|
||||||
minute=1, second=1))
|
minute=1, second=1))
|
||||||
changed = filter_instance(self.media)
|
changed = filter_media(self.media)
|
||||||
self.assertTrue(changed)
|
self.assertTrue(changed)
|
||||||
self.assertTrue(self.media.skip)
|
self.assertTrue(self.media.skip)
|
||||||
|
|
||||||
@ -783,7 +783,7 @@ class MediaFilterTestCase(TestCase):
|
|||||||
self.media.source.download_cap = 3600
|
self.media.source.download_cap = 3600
|
||||||
self.media.skip = True
|
self.media.skip = True
|
||||||
self.media.published = timezone.now()
|
self.media.published = timezone.now()
|
||||||
changed = filter_instance(self.media)
|
changed = filter_media(self.media)
|
||||||
self.assertTrue(changed)
|
self.assertTrue(changed)
|
||||||
self.assertFalse(self.media.skip)
|
self.assertFalse(self.media.skip)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user