From 195a1bef4d284eb2c24c922060ddc536f0010ac2 Mon Sep 17 00:00:00 2001 From: meeb Date: Tue, 8 Dec 2020 16:56:43 +1100 Subject: [PATCH] refactoring --- app/sync/matching.py | 245 ++++++++++++++++++++++ app/sync/models.py | 268 +++--------------------- app/sync/signals.py | 6 +- app/sync/tasks.py | 21 +- app/sync/templates/sync/media-item.html | 8 + app/sync/utils.py | 5 + app/sync/views.py | 9 +- 7 files changed, 315 insertions(+), 247 deletions(-) create mode 100644 app/sync/matching.py diff --git a/app/sync/matching.py b/app/sync/matching.py new file mode 100644 index 00000000..0ef10c14 --- /dev/null +++ b/app/sync/matching.py @@ -0,0 +1,245 @@ +''' + Match functions take a single Media object instance as its only argument and return + two boolean values. The first value is if the match was exact or "best fit", the + second argument is the ID of the format that was matched. +''' + + +from django.conf import settings + + +min_height = getattr(settings, 'VIDEO_HEIGHT_CUTOFF', 360) +fallback_hd_cutoff = getattr(settings, 'VIDEO_HEIGHT_IS_HD', 500) + + +def get_best_combined_format(media): + ''' + Attempts to see if there is a single, combined audio and video format that + exactly matches the source requirements. This is used over separate audio + and video formats if possible. Combined formats are the easiest to check + for as they must exactly match the source profile be be valid. + ''' + for fmt in media.iter_formats(): + # Check height matches + if media.source.source_resolution.strip().upper() != fmt['format']: + continue + # Check the video codec matches + if media.source.source_vcodec != fmt['vcodec']: + continue + # Check the audio codec matches + if media.source.source_acodec != fmt['acodec']: + continue + # if the source prefers 60fps, check for it + if media.source.prefer_60fps: + if not fmt['is_60fps']: + continue + # If the source prefers HDR, check for it + if media.source.prefer_hdr: + if not fmt['is_hdr']: + continue + # If we reach here, we have a combined match! + return True, fmt['id'] + return False, False + + +def get_best_audio_format(media): + ''' + Finds the best match for the source required audio format. If the source + has a 'fallback' of fail this can return no match. + ''' + # Order all audio-only formats by bitrate + audio_formats = [] + for fmt in media.iter_formats(): + # If the format has a video stream, skip it + if fmt['vcodec']: + continue + audio_formats.append(fmt) + audio_formats = list(reversed(sorted(audio_formats, key=lambda k: k['abr']))) + if not audio_formats: + # Media has no audio formats at all + return False, False + # Find the highest bitrate audio format with a matching codec + for fmt in audio_formats: + if media.source.source_acodec == fmt['acodec']: + # Matched! + return True, fmt['id'] + # No codecs matched + if media.source.can_fallback: + # Can fallback, find the next highest bitrate non-matching codec + return False, audio_formats[0] + else: + # Can't fallback + return False, False + + +def get_best_video_format(media): + ''' + Finds the best match for the source required video format. If the source + has a 'fallback' of fail this can return no match. Resolution is treated + as the most important factor to match. This is pretty verbose due to the + 'soft' matching requirements for prefer_hdr and prefer_60fps. + ''' + # Check if the source wants audio only, fast path to return + if media.source.is_audio: + return False, False + # Filter video-only formats by resolution that matches the source + video_formats = [] + for fmt in media.iter_formats(): + # If the format has an audio stream, skip it + if fmt['acodec']: + continue + if media.source.source_resolution.strip().upper() == fmt['format']: + video_formats.append(fmt) + # Check we matched some streams + if not video_formats: + # No streams match the requested resolution, see if we can fallback + if media.source.can_fallback: + # Find the next-best format matches by height + for fmt in media.iter_formats(): + # If the format has an audio stream, skip it + if fmt['acodec']: + continue + if (fmt['height'] <= media.source.source_resolution_height and + fmt['height'] >= min_height): + video_formats.append(fmt) + else: + # Can't fallback + return False, False + video_formats = list(reversed(sorted(video_formats, key=lambda k: k['height']))) + print('height', media.source.source_resolution_height) + print('video_formats', video_formats) + if not video_formats: + # Still no matches + return False, False + exact_match, best_match = None, None + # Of our filtered video formats, check for resolution + codec + hdr + fps match + if media.source.prefer_60fps and media.source.prefer_hdr: + for fmt in video_formats: + # Check for an exact match + if (media.source.source_resolution.strip().upper() == fmt['format'] and + media.source.source_vcodec == fmt['vcodec'] and + fmt['is_hdr'] and + fmt['is_60fps']): + # Exact match + exact_match, best_match = True, fmt + break + if media.source.can_fallback: + if not best_match: + for fmt in video_formats: + # Check for a codec, hdr and fps match but drop the resolution + if (media.source.source_vcodec == fmt['vcodec'] and + fmt['is_hdr'] and fmt['is_60fps']): + # Close match + exact_match, best_match = False, fmt + break + if not best_match: + for fmt in video_formats: + # Check for hdr and fps match but drop the resolution and codec + if fmt['is_hdr'] and fmt['is_60fps']: + exact_match, best_match = False, fmt + break + if not best_match: + for fmt in video_formats: + # Check for fps match but drop the resolution and codec and hdr + if fmt['is_hdr'] and fmt['is_60fps']: + exact_match, best_match = False, fmt + break + if not best_match: + # Match the highest resolution + exact_match, best_match = False, video_formats[0] + # Check for resolution + codec + fps match + if media.source.prefer_60fps and not media.source.prefer_hdr: + for fmt in video_formats: + # Check for an exact match + if (media.source.source_resolution.strip().upper() == fmt['format'] and + media.source.source_vcodec == fmt['vcodec'] and + fmt['is_60fps']): + # Exact match + exact_match, best_match = True, fmt + break + if media.source.can_fallback: + if not best_match: + for fmt in video_formats: + # Check for a codec and fps match but drop the resolution + if (media.source.source_vcodec == fmt['vcodec'] and + fmt['is_60fps']): + exact_match, best_match = False, fmt + break + if not best_match: + for fmt in video_formats: + # Check for an fps match but drop the resolution and codec + if fmt['is_60fps']: + exact_match, best_match = False, fmt + break + if not best_match: + # Match the highest resolution + exact_match, best_match = False, video_formats[0] + # Check for resolution + codec + hdr + if media.source.prefer_hdr and not media.source.prefer_60fps: + for fmt in video_formats: + # Check for an exact match + if (media.source.source_resolution.strip().upper() == fmt['format'] and + media.source.source_vcodec == fmt['vcodec'] and + fmt['is_hdr']): + # Exact match + exact_match, best_match = True, fmt + break + if media.source.can_fallback: + if not best_match: + for fmt in video_formats: + # Check for a codec and hdr match but drop the resolution + if (media.source.source_vcodec == fmt['vcodec'] and + fmt['is_hdr']): + exact_match, best_match = True, fmt + break + if not best_match: + for fmt in video_formats: + # Check for an hdr match but drop the resolution and codec + if fmt['is_hdr']: + exact_match, best_match = False, fmt + break + if not best_match: + # Match the highest resolution + exact_match, best_match = False, video_formats[0] + # check for resolution + codec + if not media.source.prefer_hdr and not media.source.prefer_60fps: + for fmt in video_formats: + # Check for an exact match + if (media.source.source_resolution.strip().upper() == fmt['format'] and + media.source.source_vcodec == fmt['vcodec'] and + not fmt['is_60fps']): + # Exact match + exact_match, best_match = True, fmt + break + if media.source.can_fallback: + if not best_match: + for fmt in video_formats: + # Check for a codec match without 60fps and drop the resolution + if (media.source.source_vcodec == fmt['vcodec'] and + not fmt['is_60fps']): + exact_match, best_match = False, fmt + break + if not best_match: + for fmt in video_formats: + # Check for a codec match but drop the resolution + if media.source.source_vcodec == fmt['vcodec']: + # Close match + exact_match, best_match = False, fmt + break + if not best_match: + # Match the highest resolution + exact_match, best_match = False, video_formats[0] + # See if we found a match + if best_match: + # Final check to see if the match we found was good enough + if exact_match: + return True, best_match['id'] + elif media.source.can_fallback: + # Allow the fallback if it meets requirements + if (media.source.fallback == media.source.FALLBACK_NEXT_BEST_HD and + best_match['height'] >= fallback_hd_cutoff): + return False, best_match['id'] + elif media.source.fallback == media.source.FALLBACK_NEXT_BEST: + return False, best_match['id'] + # Nope, failed to find match + return False, False diff --git a/app/sync/models.py b/app/sync/models.py index f1286357..49dfa4e7 100644 --- a/app/sync/models.py +++ b/app/sync/models.py @@ -8,6 +8,8 @@ from django.utils.text import slugify from django.utils.translation import gettext_lazy as _ from .youtube import get_media_info as get_youtube_media_info from .utils import seconds_to_timestr, parse_media_format +from .matching import (get_best_combined_format, get_best_audio_format, + get_best_video_format) class Source(models.Model): @@ -259,7 +261,7 @@ class Source(models.Model): depending on audio codec. ''' if self.is_audio: - if self.source_acodec == self.SOURCE_ACODEC_M4A: + if self.source_acodec == self.SOURCE_ACODEC_MP4A: return 'm4a' elif self.source_acodec == self.SOURCE_ACODEC_OPUS: return 'ogg' @@ -284,9 +286,9 @@ class Source(models.Model): else: vc = self.source_vcodec ac = self.source_acodec - f = '60FPS' if self.prefer_60fps else '' - h = 'HDR' if self.prefer_hdr else '' - return f'{self.source_resolution} (video:{vc}, audio:{ac}) {f} {h}'.strip() + f = ' 60FPS' if self.is_video and self.prefer_60fps else '' + h = ' HDR' if self.is_video and self.prefer_hdr else '' + return f'{self.source_resolution} (video:{vc}, audio:{ac}){f}{h}'.strip() @property def directory_path(self): @@ -506,251 +508,47 @@ class Media(models.Model): yield parse_media_format(fmt) def get_best_combined_format(self): - ''' - Attempts to see if there is a single, combined audio and video format that - exactly matches the source requirements. This is used over separate audio - and video formats if possible. Combined formats are the easiest to check - for as they must exactly match the source profile be be valid. - ''' - for fmt in self.iter_formats(): - # Check height matches - if self.source.source_resolution.strip().upper() != fmt['format']: - continue - # Check the video codec matches - if self.source.source_vcodec != fmt['vcodec']: - continue - # Check the audio codec matches - if self.source.source_acodec != fmt['acodec']: - continue - # if the source prefers 60fps, check for it - if self.source.prefer_60fps: - if not fmt['is_60fps']: - continue - # If the source prefers HDR, check for it - if self.source.prefer_hdr: - if not fmt['is_hdr']: - continue - # If we reach here, we have a combined match! - return True, fmt['id'] - return False, False + return get_best_combined_format(self) def get_best_audio_format(self): - ''' - Finds the best match for the source required audio format. If the source - has a 'fallback' of fail this can return no match. - ''' - # Order all audio-only formats by bitrate - audio_formats = [] - for fmt in self.iter_formats(): - # If the format has a video stream, skip it - if fmt['vcodec']: - continue - audio_formats.append(fmt) - audio_formats = list(reversed(sorted(audio_formats, key=lambda k: k['abr']))) - if not audio_formats: - # Media has no audio formats at all - return False, False - # Find the highest bitrate audio format with a matching codec - for fmt in audio_formats: - if self.source.source_acodec == fmt['acodec']: - # Matched! - return True, fmt['id'] - # No codecs matched - if self.source.can_fallback: - # Can fallback, find the next highest bitrate non-matching codec - return False, audio_formats[0] - else: - # Can't fallback - return False, False - + return get_best_audio_format(self) def get_best_video_format(self): - ''' - Finds the best match for the source required video format. If the source - has a 'fallback' of fail this can return no match. Resolution is treated - as the most important factor to match. - ''' - min_height = getattr(settings, 'VIDEO_HEIGHT_CUTOFF', 360) - fallback_hd_cutoff = getattr(settings, 'VIDEO_HEIGHT_IS_HD', 500) - # Filter video-only formats by resolution that matches the source - video_formats = [] - for fmt in self.iter_formats(): - # If the format has an audio stream, skip it - if fmt['acodec']: - continue - if self.source.source_resolution.strip().upper() == fmt['format']: - video_formats.append(fmt) - # Check we matched some streams - if not video_formats: - # No streams match the requested resolution, see if we can fallback - if self.source.can_fallback: - # Find the next-best format matches by height - for fmt in self.iter_formats(): - # If the format has an audio stream, skip it - if fmt['acodec']: - continue - if (fmt['height'] <= self.source.source_resolution_height and - fmt['height'] >= min_height): - video_formats.append(fmt) - else: - # Can't fallback - return False, False - video_formats = list(reversed(sorted(video_formats, key=lambda k: k['height']))) - if not video_formats: - # Still no matches - return False, False - exact_match, best_match = None, None - # Of our filtered video formats, check for resolution + codec + hdr + fps match - if self.source.prefer_60fps and self.source.prefer_hdr: - for fmt in video_formats: - # Check for an exact match - if (self.source.source_resolution.strip().upper() == fmt['format'] and - self.source.source_vcodec == fmt['vcodec'] and - fmt['is_hdr'] and - fmt['is_60fps']): - # Exact match - exact_match, best_match = True, fmt - break - if self.source.can_fallback: - if not best_match: - for fmt in video_formats: - # Check for a codec, hdr and fps match but drop the resolution - if (self.source.source_vcodec == fmt['vcodec'] and - fmt['is_hdr'] and fmt['is_60fps']): - # Close match - exact_match, best_match = False, fmt - break - if not best_match: - for fmt in video_formats: - # Check for hdr and fps match but drop the resolution and codec - if fmt['is_hdr'] and fmt['is_60fps']: - exact_match, best_match = False, fmt - break - if not best_match: - for fmt in video_formats: - # Check for fps match but drop the resolution and codec and hdr - if fmt['is_hdr'] and fmt['is_60fps']: - exact_match, best_match = False, fmt - break - if not best_match: - # Match the highest resolution - exact_match, best_match = False, video_formats[0] - # Check for resolution + codec + fps match - if self.source.prefer_60fps and not self.source.prefer_hdr: - for fmt in video_formats: - # Check for an exact match - if (self.source.source_resolution.strip().upper() == fmt['format'] and - self.source.source_vcodec == fmt['vcodec'] and - fmt['is_60fps']): - # Exact match - exact_match, best_match = True, fmt - break - if self.source.can_fallback: - if not best_match: - for fmt in video_formats: - # Check for a codec and fps match but drop the resolution - if (self.source.source_vcodec == fmt['vcodec'] and - fmt['is_60fps']): - exact_match, best_match = False, fmt - break - if not best_match: - for fmt in video_formats: - # Check for an fps match but drop the resolution and codec - if fmt['is_60fps']: - exact_match, best_match = False, fmt - break - if not best_match: - # Match the highest resolution - exact_match, best_match = False, video_formats[0] - # Check for resolution + codec + hdr - if self.source.prefer_hdr and not self.source.prefer_60fps: - for fmt in video_formats: - # Check for an exact match - if (self.source.source_resolution.strip().upper() == fmt['format'] and - self.source.source_vcodec == fmt['vcodec'] and - fmt['is_hdr']): - # Exact match - exact_match, best_match = True, fmt - break - if self.source.can_fallback: - if not best_match: - for fmt in video_formats: - # Check for a codec and hdr match but drop the resolution - if (self.source.source_vcodec == fmt['vcodec'] and - fmt['is_hdr']): - exact_match, best_match = True, fmt - break - if not best_match: - for fmt in video_formats: - # Check for an hdr match but drop the resolution and codec - if fmt['is_hdr']: - exact_match, best_match = False, fmt - break - if not best_match: - # Match the highest resolution - exact_match, best_match = False, video_formats[0] - # check for resolution + codec - if not self.source.prefer_hdr and not self.source.prefer_60fps: - for fmt in video_formats: - # Check for an exact match - if (self.source.source_resolution.strip().upper() == fmt['format'] and - self.source.source_vcodec == fmt['vcodec'] and - not fmt['is_60fps']): - # Exact match - exact_match, best_match = True, fmt - break - if self.source.can_fallback: - if not best_match: - for fmt in video_formats: - # Check for a codec match without 60fps and drop the resolution - if (self.source.source_vcodec == fmt['vcodec'] and - not fmt['is_60fps']): - exact_match, best_match = False, fmt - break - if not best_match: - for fmt in video_formats: - # Check for a codec match but drop the resolution - if self.source.source_vcodec == fmt['vcodec']: - # Close match - exact_match, best_match = False, fmt - break - if not best_match: - # Match the highest resolution - exact_match, best_match = False, video_formats[0] - # See if we found a match - if best_match: - # Final check to see if the match we found was good enough - if exact_match: - return True, best_match['id'] - elif self.source.can_fallback: - # Allow the fallback if it meets requirements - if (self.source.fallback == self.source.FALLBACK_NEXT_BEST_HD and - best_match['height'] >= fallback_hd_cutoff): - return False, best_match['id'] - elif self.source.fallback == self.source.FALLBACK_NEXT_BEST: - return False, best_match['id'] - # Nope, failed to find match - return False, False - + return get_best_video_format(self) def get_format_str(self): ''' Returns a youtube-dl compatible format string for the best matches combination of source requirements and available audio and video formats. + Returns boolean False if there is no valid downloadable combo. ''' if self.source.is_audio: - audio_format = self.get_best_audio_format() - return 'a' - else: - combined_format = self.get_best_combined_format() - if combined_format: - return 'c' + audio_match, audio_format = self.get_best_audio_format() + if audio_format: + return str(audio_format) else: - audio_format = self.get_best_audio_format() - video_format = self.get_best_video_format() - return 'a+v' + return False + else: + combined_match, combined_format = self.get_best_combined_format() + if combined_format: + return str(combined_format) + else: + audio_match, audio_format = self.get_best_audio_format() + video_match, video_format = self.get_best_video_format() + if audio_format and video_format: + return f'{audio_format}+{video_format}' + else: + return False return False + @property + def can_download(self): + ''' + Returns boolean True if the media can be downloaded, that is, the media + has stored formats which are compatible with the source requirements. + ''' + return self.get_format_str() is not False + @property def loaded_metadata(self): if self.pk in _metadata_cache: diff --git a/app/sync/signals.py b/app/sync/signals.py index 109247a1..f40b0186 100644 --- a/app/sync/signals.py +++ b/app/sync/signals.py @@ -49,7 +49,7 @@ def source_post_save(sender, instance, created, **kwargs): @receiver(pre_delete, sender=Source) -def source_post_delete(sender, instance, **kwargs): +def source_pre_delete(sender, instance, **kwargs): # Triggered before a source is deleted, delete all media objects to trigger # the Media models post_delete signal for media in Media.objects.filter(source=instance): @@ -66,7 +66,7 @@ def source_post_delete(sender, instance, **kwargs): @receiver(task_failed, sender=Task) def task_task_failed(sender, task_id, completed_task, **kwargs): - # Triggered after a source fails by reaching its max retry attempts + # Triggered after a task fails by reaching its max retry attempts obj, url = map_task_to_instance(completed_task) if isinstance(obj, Source): log.error(f'Permanent failure for source: {obj} task: {completed_task}') @@ -78,7 +78,7 @@ def task_task_failed(sender, task_id, completed_task, **kwargs): def media_post_save(sender, instance, created, **kwargs): # Triggered after media is saved if created: - # If the media is newly created fire a task off to download its thumbnail + # If the media is newly created start a task to download its thumbnail metadata = instance.loaded_metadata thumbnail_url = metadata.get('thumbnail', '') if thumbnail_url: diff --git a/app/sync/tasks.py b/app/sync/tasks.py index 9213bc99..68e25854 100644 --- a/app/sync/tasks.py +++ b/app/sync/tasks.py @@ -33,7 +33,7 @@ def get_hash(task_name, pk): def map_task_to_instance(task): ''' - Reverse-maps an scheduled backgrond task to an instance. Requires the task name + Reverse-maps a scheduled backgrond task to an instance. Requires the task name to be a known task function and the first argument to be a UUID. This is used because UUID's are incompatible with background_task's "creator" feature. ''' @@ -45,6 +45,17 @@ def map_task_to_instance(task): Source: 'sync:source', Media: 'sync:media-item', } + # If the task has a UUID set in its .queue it's probably a link to a Source + if task.queue: + try: + queue_uuid = uuid.UUID(task.queue) + try: + return Source.objects.get(pk=task.queue) + except Source.DoesNotExist: + pass + except (TypeError, ValueError, AttributeError): + pass + # Unpack task_func, task_args_str = task.task_name, task.task_params model = TASK_MAP.get(task_func, None) if not model: @@ -75,7 +86,8 @@ def map_task_to_instance(task): def get_error_message(task): ''' - Extract an error message from a failed task. + Extract an error message from a failed task. This is the last line of the + last_error field with the method name removed. ''' if not task.has_error(): return '' @@ -92,8 +104,7 @@ def get_source_completed_tasks(source_id, only_errors=False): ''' Returns a queryset of CompletedTask objects for a source by source ID. ''' - source_hash = get_hash('sync.tasks.index_source_task', source_id) - q = {'task_hash': source_hash} + q = {'queue': source_id} if only_errors: q['failed_at__isnull'] = False return CompletedTask.objects.filter(**q).order_by('-failed_at') @@ -163,7 +174,7 @@ def index_source_task(source_id): @background(schedule=0) def download_media_thumbnail(media_id, url): ''' - Downloads an image from a URL and saves it as a local thumbnail attached to a + Downloads an image from a URL and save it as a local thumbnail attached to a Media object. ''' try: diff --git a/app/sync/templates/sync/media-item.html b/app/sync/templates/sync/media-item.html index f74926d0..ae15d3ee 100644 --- a/app/sync/templates/sync/media-item.html +++ b/app/sync/templates/sync/media-item.html @@ -41,6 +41,10 @@ Downloaded Downloaded
{% if media.downloaded %}{% else %}{% endif %} + + Can download + Can download
{% if youtube_dl_format %}{% else %}{% endif %} + Available formats Available formats
@@ -59,6 +63,10 @@ Video: {% if video_format %}{{ video_format }} {% if video_exact %}(exact match){% else %}(fallback){% endif %}{% else %}No match{% endif %} + + youtube-dl format + youtube-dl format
{% if youtube_dl_format %}{{ youtube_dl_format }}{% else %}No matching formats{% endif %} + diff --git a/app/sync/utils.py b/app/sync/utils.py index f202fe10..19683665 100644 --- a/app/sync/utils.py +++ b/app/sync/utils.py @@ -126,6 +126,11 @@ def seconds_to_timestr(seconds): def parse_media_format(format_dict): + ''' + This parser primarily adapts the format dict returned by youtube-dl into a + standard form used by the matchers in matching.py. If youtube-dl changes + any internals, update it here. + ''' vcodec_full = format_dict.get('vcodec', '') vcodec_parts = vcodec_full.split('.') if len(vcodec_parts) > 0: diff --git a/app/sync/views.py b/app/sync/views.py index 1949b7be..34eb6b72 100644 --- a/app/sync/views.py +++ b/app/sync/views.py @@ -23,7 +23,7 @@ from . import youtube class DashboardView(TemplateView): ''' - The dashboard shows non-interactive totals and summaries, nothing more. + The dashboard shows non-interactive totals and summaries. ''' template_name = 'sync/dashboard.html' @@ -349,8 +349,8 @@ class MediaView(ListView): class MediaThumbView(DetailView): ''' - Shows a media thumbnail. Whitenose doesn't support post-start media image - serving and the images here are pretty small, just serve them manually. This + Shows a media thumbnail. Whitenoise doesn't support post-start media image + serving and the images here are pretty small so just serve them manually. This isn't fast, but it's not likely to be a serious bottleneck. ''' @@ -389,12 +389,13 @@ class MediaItemView(DetailView): data['audio_format'] = audio_format data['video_exact'] = video_exact data['video_format'] = video_format + data['youtube_dl_format'] = self.object.get_format_str() return data class TasksView(ListView): ''' - A list of tasks queued to be completed. Typically, this is scraping for new + A list of tasks queued to be completed. This is, for example, scraping for new media or downloading media. '''