Merge pull request #704 from tcely/patch-12

Try to include `timestamp`
This commit is contained in:
meeb 2025-02-12 05:50:57 +11:00 committed by GitHub
commit 1cc2905168
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 22 additions and 3 deletions

View File

@ -664,6 +664,11 @@ class Media(models.Model):
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date', Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date', Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date',
}, },
'timestamp': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp',
},
'title': { 'title': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title', Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title', Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title',
@ -930,7 +935,7 @@ class Media(models.Model):
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
# Trigger an update of derived fields from metadata # Trigger an update of derived fields from metadata
if self.metadata: if self.metadata:
self.title = self.metadata_title self.title = self.metadata_title[:200]
self.duration = self.metadata_duration self.duration = self.metadata_duration
if update_fields is not None and "metadata" in update_fields: if update_fields is not None and "metadata" in update_fields:
# If only some fields are being updated, make sure we update title and duration if metadata changes # If only some fields are being updated, make sure we update title and duration if metadata changes
@ -944,7 +949,7 @@ class Media(models.Model):
def get_metadata_field(self, field): def get_metadata_field(self, field):
fields = self.METADATA_FIELDS.get(field, {}) fields = self.METADATA_FIELDS.get(field, {})
return fields.get(self.source.source_type, '') return fields.get(self.source.source_type, field)
def iter_formats(self): def iter_formats(self):
for fmt in self.formats: for fmt in self.formats:

View File

@ -10,7 +10,7 @@ import math
import uuid import uuid
from io import BytesIO from io import BytesIO
from hashlib import sha1 from hashlib import sha1
from datetime import timedelta, datetime from datetime import datetime, timedelta, timezone as tz
from shutil import copyfile from shutil import copyfile
from PIL import Image from PIL import Image
from django.conf import settings from django.conf import settings
@ -202,6 +202,7 @@ def index_source_task(source_id):
source.last_crawl = timezone.now() source.last_crawl = timezone.now()
source.save() source.save()
log.info(f'Found {len(videos)} media items for source: {source}') log.info(f'Found {len(videos)} media items for source: {source}')
fields = lambda f, m: m.get_metadata_field(f)
for video in videos: for video in videos:
# Create or update each video as a Media object # Create or update each video as a Media object
key = video.get(source.key_field, None) key = video.get(source.key_field, None)
@ -213,6 +214,18 @@ def index_source_task(source_id):
except Media.DoesNotExist: except Media.DoesNotExist:
media = Media(key=key) media = Media(key=key)
media.source = source media.source = source
media.duration = float(video.get(fields('duration', media), 0)) or None
media.title = str(video.get(fields('title', media), ''))[:200]
timestamp = video.get(fields('timestamp', media), None)
if timestamp is not None:
try:
timestamp_float = float(timestamp)
posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
published_dt = posix_epoch + timedelta(seconds=timestamp_float)
except Exception as e:
log.warn(f'Could not set published for: {source} / {media} with "{e}"')
else:
media.published = published_dt
try: try:
media.save() media.save()
log.debug(f'Indexed media: {source} / {media}') log.debug(f'Indexed media: {source} / {media}')

View File

@ -143,6 +143,7 @@ def get_media_info(url):
'simulate': True, 'simulate': True,
'logger': log, 'logger': log,
'extract_flat': True, 'extract_flat': True,
'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
}) })
response = {} response = {}
with yt_dlp.YoutubeDL(opts) as y: with yt_dlp.YoutubeDL(opts) as y: