Merge pull request #704 from tcely/patch-12

Try to include `timestamp`
This commit is contained in:
meeb 2025-02-12 05:50:57 +11:00 committed by GitHub
commit 1cc2905168
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 22 additions and 3 deletions

View File

@ -664,6 +664,11 @@ class Media(models.Model):
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date',
},
'timestamp': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp',
},
'title': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title',
@ -930,7 +935,7 @@ class Media(models.Model):
def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
# Trigger an update of derived fields from metadata
if self.metadata:
self.title = self.metadata_title
self.title = self.metadata_title[:200]
self.duration = self.metadata_duration
if update_fields is not None and "metadata" in update_fields:
# If only some fields are being updated, make sure we update title and duration if metadata changes
@ -944,7 +949,7 @@ class Media(models.Model):
def get_metadata_field(self, field):
fields = self.METADATA_FIELDS.get(field, {})
return fields.get(self.source.source_type, '')
return fields.get(self.source.source_type, field)
def iter_formats(self):
for fmt in self.formats:

View File

@ -10,7 +10,7 @@ import math
import uuid
from io import BytesIO
from hashlib import sha1
from datetime import timedelta, datetime
from datetime import datetime, timedelta, timezone as tz
from shutil import copyfile
from PIL import Image
from django.conf import settings
@ -202,6 +202,7 @@ def index_source_task(source_id):
source.last_crawl = timezone.now()
source.save()
log.info(f'Found {len(videos)} media items for source: {source}')
fields = lambda f, m: m.get_metadata_field(f)
for video in videos:
# Create or update each video as a Media object
key = video.get(source.key_field, None)
@ -213,6 +214,18 @@ def index_source_task(source_id):
except Media.DoesNotExist:
media = Media(key=key)
media.source = source
media.duration = float(video.get(fields('duration', media), 0)) or None
media.title = str(video.get(fields('title', media), ''))[:200]
timestamp = video.get(fields('timestamp', media), None)
if timestamp is not None:
try:
timestamp_float = float(timestamp)
posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
published_dt = posix_epoch + timedelta(seconds=timestamp_float)
except Exception as e:
log.warn(f'Could not set published for: {source} / {media} with "{e}"')
else:
media.published = published_dt
try:
media.save()
log.debug(f'Indexed media: {source} / {media}')

View File

@ -143,6 +143,7 @@ def get_media_info(url):
'simulate': True,
'logger': log,
'extract_flat': True,
'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
})
response = {}
with yt_dlp.YoutubeDL(opts) as y: