From 16cd0983c97d88ea46ae331df953a0c0a15e6369 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Feb 2025 17:18:20 -0500 Subject: [PATCH 1/6] Add `refresh_formats` property --- tubesync/sync/models.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 15083606..57a5242d 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1037,6 +1037,40 @@ class Media(models.Model): except Exception as e: return {} + @property + def refresh_formats(self): + data = self.loaded_metadata + metadata_seconds = data.get('epoch', None) + if not metadata_seconds: + self.metadata = None + return False + + now = timezone.now() + formats_seconds = data.get('formats_epoch', metadata_seconds) + metadata_dt = self.metadata_published(formats_seconds) + if (now - metadata_dt) < self.source.index_schedule: + return False + + self.skip = False + metadata = self.index_metadata() + if self.skip: + return False + + response = metadata + if getattr(settings, 'SHRINK_NEW_MEDIA_METADATA', False): + response = filter_response(metadata, True) + + field = self.get_metadata_field('formats') + data[field] = response.get(field, []) + if data.get('availability', 'public') != response.get('availability', 'public'): + data['availability'] = response.get('availability', 'public') + data['formats_epoch'] = response.get('epoch', formats_seconds) + + from common.utils import json_serial + compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) + self.metadata = compact_json + return True + @property def url(self): url = self.URLS.get(self.source.source_type, '') From ad8fa7212351c105e58258e417c6914d321196af Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Feb 2025 17:34:20 -0500 Subject: [PATCH 2/6] Try to refresh formats while checking media --- tubesync/sync/tasks.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 59c4851e..f9d615cb 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -587,10 +587,25 @@ def save_all_media_for_source(source_id): log.error(f'Task save_all_media_for_source(pk={source_id}) called but no ' f'source exists with ID: {source_id}') return + + already_saved = set() + mqs = Media.objects.filter(source=source) + refresh_qs = mqs.filter( + can_download=False, + skip=False, + manual_skip=False, + downloaded=False, + ) + for media in refresh_qs: + media.refresh_formats + media.save() + already_saved.add(media.uuid) + # Trigger the post_save signal for each media item linked to this source as various # flags may need to be recalculated - for media in Media.objects.filter(source=source): - media.save() + for media in mqs: + if media.uuid not in already_saved: + media.save() @background(schedule=0, remove_existing_tasks=True) From b01af700986df02f3918264e87493599b6e7eb0b Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Feb 2025 17:48:18 -0500 Subject: [PATCH 3/6] Try to refresh formats after download failed --- tubesync/sync/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index f9d615cb..6780e4e4 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -553,6 +553,8 @@ def download_media(media_id): err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, ' f'expected outfile does not exist: {filepath}') log.error(err) + # Try refreshing formats + media.refresh_formats # Raising an error here triggers the task to be re-attempted (or fail) raise DownloadFailedException(err) From 5f81c3619fd7e6dc9e813a3d39118b147711ec39 Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Feb 2025 18:03:17 -0500 Subject: [PATCH 4/6] fixup: convert seconds with timedelta --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 57a5242d..f3c051fa 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1048,7 +1048,7 @@ class Media(models.Model): now = timezone.now() formats_seconds = data.get('formats_epoch', metadata_seconds) metadata_dt = self.metadata_published(formats_seconds) - if (now - metadata_dt) < self.source.index_schedule: + if (now - metadata_dt) < timedelta(seconds=self.source.index_schedule): return False self.skip = False From b65a0ff7598652f4356afea790b933ff87070fea Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Feb 2025 18:30:45 -0500 Subject: [PATCH 5/6] Remove the useless "not skipping" logged messages --- tubesync/sync/filtering.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index 8d6eb34a..3f7023c1 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -130,14 +130,7 @@ def filter_max_cap(instance: Media): return False max_cap_age = instance.source.download_cap_date - if not max_cap_age: - log.debug( - f"Media: {instance.source} / {instance} has not max_cap_age " - f"so not skipping based on max_cap_age" - ) - return False - - if instance.published <= max_cap_age: + if max_cap_age and instance.published <= max_cap_age: # log new media instances, not every media instance every time if not instance.skip: log.info( From 4591d3867bb9a188f7ceaa0e3764f47ee500d46e Mon Sep 17 00:00:00 2001 From: tcely Date: Wed, 19 Feb 2025 21:37:55 -0500 Subject: [PATCH 6/6] Ask for formats that might not work and test them --- tubesync/sync/youtube.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 8842423f..5d0cf19e 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -85,6 +85,7 @@ def get_channel_image_info(url): 'simulate': True, 'logger': log, 'extract_flat': True, # Change to False to get detailed info + 'check_formats': False, }) with yt_dlp.YoutubeDL(opts) as y: @@ -143,7 +144,11 @@ def get_media_info(url): 'simulate': True, 'logger': log, 'extract_flat': True, - 'extractor_args': {'youtubetab': {'approximate_date': ['true']}}, + 'check_formats': True, + 'extractor_args': { + 'youtube': {'formats': ['missing_pot']}, + 'youtubetab': {'approximate_date': ['true']}, + }, }) response = {} with yt_dlp.YoutubeDL(opts) as y: @@ -248,7 +253,7 @@ def download_media( 'writeautomaticsub': auto_subtitles, 'subtitleslangs': sub_langs.split(','), 'writethumbnail': embed_thumbnail, - 'check_formats': False, + 'check_formats': None, 'overwrites': None, 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20), 'max_sleep_interval': settings.DOWNLOAD_MEDIA_DELAY,