From 6390f5a1c39a5353710d773bcebb4b10c17648ae Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 16:15:48 -0500 Subject: [PATCH 1/6] Try to include `timestamp` --- tubesync/sync/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index fd3795a1..d33de76c 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -143,6 +143,7 @@ def get_media_info(url): 'simulate': True, 'logger': log, 'extract_flat': True, + 'extractor_args': {'youtubetab': {'approximate_date': ['true']}}, }) response = {} with yt_dlp.YoutubeDL(opts) as y: From 83dc375810855d5c3855fa92e41cbc45a7935bd9 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 9 Feb 2025 19:32:13 -0500 Subject: [PATCH 2/6] Save `duration`, `timestamp`, and `title` to Media instances --- tubesync/sync/tasks.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a5e3f135..96ecec75 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -10,7 +10,7 @@ import math import uuid from io import BytesIO from hashlib import sha1 -from datetime import timedelta, datetime +from datetime import datetime, timedelta, timezone as tz from shutil import copyfile from PIL import Image from django.conf import settings @@ -202,6 +202,7 @@ def index_source_task(source_id): source.last_crawl = timezone.now() source.save() log.info(f'Found {len(videos)} media items for source: {source}') + fields = lambda x, t=source.source_type: Media.METADATA_FIELDS.get(x, dict()).get(t, x) for video in videos: # Create or update each video as a Media object key = video.get(source.key_field, None) @@ -213,6 +214,18 @@ def index_source_task(source_id): except Media.DoesNotExist: media = Media(key=key) media.source = source + media.duration = float(video.get(fields('duration'), 0)) or None + media.title = str(video.get(fields('title'), '')) + timestamp = video.get(fields('timestamp'), None) + if timestamp is not None: + try: + timestamp_float = float(timestamp) + posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + published_dt = posix_epoch + timedelta(seconds=timestamp_float) + except Exception as e: + log.warn(f'Could not set published for: {source} / {media} with "{e}"') + else: + media.published = published_dt try: media.save() log.debug(f'Indexed media: {source} / {media}') From b6334ce41cdb99dde71d638a7c2e02c49fb40109 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:18:22 -0500 Subject: [PATCH 3/6] Add `timestamp` to mappings Return `field` instead of '' so that a missing mapping returns itself. --- tubesync/sync/models.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2daeb094..f406e5e0 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -664,6 +664,11 @@ class Media(models.Model): Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date', Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date', }, + 'timestamp': { + Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp', + Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp', + Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp', + }, 'title': { Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title', Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title', @@ -944,7 +949,7 @@ class Media(models.Model): def get_metadata_field(self, field): fields = self.METADATA_FIELDS.get(field, {}) - return fields.get(self.source.source_type, '') + return fields.get(self.source.source_type, field) def iter_formats(self): for fmt in self.formats: From b8f8d9d7fab01d0b6ba720d54f023228d9f7ad80 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:27:10 -0500 Subject: [PATCH 4/6] Use `Media.get_metadata_field` --- tubesync/sync/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 96ecec75..6f6bd800 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -202,7 +202,7 @@ def index_source_task(source_id): source.last_crawl = timezone.now() source.save() log.info(f'Found {len(videos)} media items for source: {source}') - fields = lambda x, t=source.source_type: Media.METADATA_FIELDS.get(x, dict()).get(t, x) + fields = lambda f, m: m.get_metadata_field(f) for video in videos: # Create or update each video as a Media object key = video.get(source.key_field, None) @@ -214,9 +214,9 @@ def index_source_task(source_id): except Media.DoesNotExist: media = Media(key=key) media.source = source - media.duration = float(video.get(fields('duration'), 0)) or None - media.title = str(video.get(fields('title'), '')) - timestamp = video.get(fields('timestamp'), None) + media.duration = float(video.get(fields('duration', media), 0)) or None + media.title = str(video.get(fields('title', media), '')) + timestamp = video.get(fields('timestamp', media), None) if timestamp is not None: try: timestamp_float = float(timestamp) From 488294475a3cea94e87916407115dc38557bddd7 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:48:16 -0500 Subject: [PATCH 5/6] Limit the `metadata_title` string to fit in the `title` column --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f406e5e0..a61b1379 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -935,7 +935,7 @@ class Media(models.Model): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Trigger an update of derived fields from metadata if self.metadata: - self.title = self.metadata_title + self.title = self.metadata_title[:200] self.duration = self.metadata_duration if update_fields is not None and "metadata" in update_fields: # If only some fields are being updated, make sure we update title and duration if metadata changes From f963c556106c7efa4538bcf902ec198d763ee51b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Feb 2025 08:52:09 -0500 Subject: [PATCH 6/6] Limit the `title` string to fit in the database column --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 6f6bd800..d44eee0e 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -215,7 +215,7 @@ def index_source_task(source_id): media = Media(key=key) media.source = source media.duration = float(video.get(fields('duration', media), 0)) or None - media.title = str(video.get(fields('title', media), '')) + media.title = str(video.get(fields('title', media), ''))[:200] timestamp = video.get(fields('timestamp', media), None) if timestamp is not None: try: