diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index 8d6eb34a..3f7023c1 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -130,14 +130,7 @@ def filter_max_cap(instance: Media): return False max_cap_age = instance.source.download_cap_date - if not max_cap_age: - log.debug( - f"Media: {instance.source} / {instance} has not max_cap_age " - f"so not skipping based on max_cap_age" - ) - return False - - if instance.published <= max_cap_age: + if max_cap_age and instance.published <= max_cap_age: # log new media instances, not every media instance every time if not instance.skip: log.info( diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 15083606..f3c051fa 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1037,6 +1037,40 @@ class Media(models.Model): except Exception as e: return {} + @property + def refresh_formats(self): + data = self.loaded_metadata + metadata_seconds = data.get('epoch', None) + if not metadata_seconds: + self.metadata = None + return False + + now = timezone.now() + formats_seconds = data.get('formats_epoch', metadata_seconds) + metadata_dt = self.metadata_published(formats_seconds) + if (now - metadata_dt) < timedelta(seconds=self.source.index_schedule): + return False + + self.skip = False + metadata = self.index_metadata() + if self.skip: + return False + + response = metadata + if getattr(settings, 'SHRINK_NEW_MEDIA_METADATA', False): + response = filter_response(metadata, True) + + field = self.get_metadata_field('formats') + data[field] = response.get(field, []) + if data.get('availability', 'public') != response.get('availability', 'public'): + data['availability'] = response.get('availability', 'public') + data['formats_epoch'] = response.get('epoch', formats_seconds) + + from common.utils import json_serial + compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) + self.metadata = compact_json + return True + @property def url(self): url = self.URLS.get(self.source.source_type, '') diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 59c4851e..6780e4e4 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -553,6 +553,8 @@ def download_media(media_id): err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, ' f'expected outfile does not exist: {filepath}') log.error(err) + # Try refreshing formats + media.refresh_formats # Raising an error here triggers the task to be re-attempted (or fail) raise DownloadFailedException(err) @@ -587,10 +589,25 @@ def save_all_media_for_source(source_id): log.error(f'Task save_all_media_for_source(pk={source_id}) called but no ' f'source exists with ID: {source_id}') return + + already_saved = set() + mqs = Media.objects.filter(source=source) + refresh_qs = mqs.filter( + can_download=False, + skip=False, + manual_skip=False, + downloaded=False, + ) + for media in refresh_qs: + media.refresh_formats + media.save() + already_saved.add(media.uuid) + # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated - for media in Media.objects.filter(source=source): - media.save() + for media in mqs: + if media.uuid not in already_saved: + media.save() @background(schedule=0, remove_existing_tasks=True) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 8842423f..5d0cf19e 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -85,6 +85,7 @@ def get_channel_image_info(url): 'simulate': True, 'logger': log, 'extract_flat': True, # Change to False to get detailed info + 'check_formats': False, }) with yt_dlp.YoutubeDL(opts) as y: @@ -143,7 +144,11 @@ def get_media_info(url): 'simulate': True, 'logger': log, 'extract_flat': True, - 'extractor_args': {'youtubetab': {'approximate_date': ['true']}}, + 'check_formats': True, + 'extractor_args': { + 'youtube': {'formats': ['missing_pot']}, + 'youtubetab': {'approximate_date': ['true']}, + }, }) response = {} with yt_dlp.YoutubeDL(opts) as y: @@ -248,7 +253,7 @@ def download_media( 'writeautomaticsub': auto_subtitles, 'subtitleslangs': sub_langs.split(','), 'writethumbnail': embed_thumbnail, - 'check_formats': False, + 'check_formats': None, 'overwrites': None, 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20), 'max_sleep_interval': settings.DOWNLOAD_MEDIA_DELAY,