From b71236651a3c593cc61560ba224629d2b4564fa2 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 07:52:12 -0500 Subject: [PATCH 01/10] Load `fulltitle` from the `metadata` --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index d8a43649..82aa7bbc 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -558,7 +558,7 @@ class Media(models.Model): METADATA_FIELDS = { **(_same_name('upload_date')), **(_same_name('timestamp')), - **(_same_name('title')), + **(_same_name('fulltitle', 'title')), **(_same_name('description')), **(_same_name('duration')), **(_same_name('formats')), From 695c1f6e594917c4bfb4f0369187f6f9ff3b82ce Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 08:07:46 -0500 Subject: [PATCH 02/10] Add `metadata_published` property --- tubesync/sync/models.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index d8a43649..8ae84a4e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1032,6 +1032,20 @@ class Media(models.Model): field = self.get_metadata_field('title') return self.loaded_metadata.get(field, '').strip() + @property + def metadata_published(self): + published_dt = None + field = self.get_metadata_field('timestamp') + timestamp = self.loaded_metadata.get(field, None) + if timestamp is not None: + try: + timestamp_float = float(timestamp) + posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + published_dt = posix_epoch + timedelta(seconds=timestamp_float) + except Exception as e: + log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') + return published_dt + @property def slugtitle(self): replaced = self.title.replace('_', '-').replace('&', 'and').replace('+', 'and') From 0a1b08b6548ddad8cc439db189c725aaad3e45ea Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 08:13:20 -0500 Subject: [PATCH 03/10] Use `Media.metadata_published` --- tubesync/sync/tasks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a773abf7..b90113cb 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -386,6 +386,9 @@ def download_media_metadata(media_id): # Media must have a valid upload date if upload_date: media.published = timezone.make_aware(upload_date) + published = media.metadata_published + if published: + media.published = published # Store title in DB so it's fast to access if media.metadata_title: From 494250625c6c9c652f1e9902a239e6fb5d557ae0 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 08:19:19 -0500 Subject: [PATCH 04/10] Add an optional argument to `metadata_published` --- tubesync/sync/models.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 8ae84a4e..6330360c 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1033,10 +1033,11 @@ class Media(models.Model): return self.loaded_metadata.get(field, '').strip() @property - def metadata_published(self): + def metadata_published(self, timestamp=None): published_dt = None - field = self.get_metadata_field('timestamp') - timestamp = self.loaded_metadata.get(field, None) + if timestamp is None: + field = self.get_metadata_field('timestamp') + timestamp = self.loaded_metadata.get(field, None) if timestamp is not None: try: timestamp_float = float(timestamp) From 37b15ed625838c2bdfe2cda066597e6924fcac6a Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 08:23:49 -0500 Subject: [PATCH 05/10] Use `Media.metadata_published` for indexing also --- tubesync/sync/tasks.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index b90113cb..fd6ec53b 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -217,15 +217,9 @@ def index_source_task(source_id): media.duration = float(video.get(fields('duration', media), None) or 0) or None media.title = str(video.get(fields('title', media), ''))[:200] timestamp = video.get(fields('timestamp', media), None) - if timestamp is not None: - try: - timestamp_float = float(timestamp) - posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) - published_dt = posix_epoch + timedelta(seconds=timestamp_float) - except Exception as e: - log.warn(f'Could not set published for: {source} / {media} with "{e}"') - else: - media.published = published_dt + published_dt = media.metadata_published(timestamp) + if published_dt is not None: + media.published = published_dt try: media.save() log.debug(f'Indexed media: {source} / {media}') From 35a4d2fd391bfc73c9d2d73954c4cb7f16d7c9b7 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 08:34:37 -0500 Subject: [PATCH 06/10] Use a function instead of a property --- tubesync/sync/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 6330360c..8ca8d6ab 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1032,7 +1032,6 @@ class Media(models.Model): field = self.get_metadata_field('title') return self.loaded_metadata.get(field, '').strip() - @property def metadata_published(self, timestamp=None): published_dt = None if timestamp is None: From 226d886557b6cb035a349b177db152fb5c1dd745 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 08:35:56 -0500 Subject: [PATCH 07/10] fixup: call the function --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index fd6ec53b..2bb3e75e 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -380,7 +380,7 @@ def download_media_metadata(media_id): # Media must have a valid upload date if upload_date: media.published = timezone.make_aware(upload_date) - published = media.metadata_published + published = media.metadata_published() if published: media.published = published From a42505117408b01ab4d82d486e8e84ce70b4c337 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 22:36:34 -0500 Subject: [PATCH 08/10] Add import for `tz` --- tubesync/sync/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 8ca8d6ab..19d6dec0 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -4,7 +4,7 @@ import json import re from xml.etree import ElementTree from collections import OrderedDict -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone as tz from pathlib import Path from django.conf import settings from django.db import models From 6aa392118d164f77dd51db13cebf3f30e2812678 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 22:38:00 -0500 Subject: [PATCH 09/10] Remove import for `tz` --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 2bb3e75e..0cdb2f32 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -10,7 +10,7 @@ import math import uuid from io import BytesIO from hashlib import sha1 -from datetime import datetime, timedelta, timezone as tz +from datetime import datetime, timedelta from shutil import copyfile from PIL import Image from django.conf import settings From 03f0680d0bd02200c3a1aa40dfbe271b76064288 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 17 Feb 2025 23:32:06 -0500 Subject: [PATCH 10/10] Prefer `fulltitle` but do not require it --- tubesync/sync/models.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 65a6cbdd..1809d18a 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -558,7 +558,8 @@ class Media(models.Model): METADATA_FIELDS = { **(_same_name('upload_date')), **(_same_name('timestamp')), - **(_same_name('fulltitle', 'title')), + **(_same_name('title')), + **(_same_name('fulltitle')), **(_same_name('description')), **(_same_name('duration')), **(_same_name('formats')), @@ -1029,8 +1030,14 @@ class Media(models.Model): @property def metadata_title(self): - field = self.get_metadata_field('title') - return self.loaded_metadata.get(field, '').strip() + result = '' + for key in ('fulltitle', 'title'): + field = self.get_metadata_field(key) + value = self.loaded_metadata.get(field, '').strip() + if value: + result = value + break + return result def metadata_published(self, timestamp=None): published_dt = None