Merge pull request #286 from meeb/main

Update to upstream main
This commit is contained in:
tcely 2025-02-11 22:33:56 -05:00 committed by GitHub
commit 50762d04f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 73 additions and 13 deletions

View File

@ -27,6 +27,9 @@ class BaseStatus:
return status in cls.valid return status in cls.valid
def __init__(self, hook_status_dict=None): def __init__(self, hook_status_dict=None):
self.media_key = None
self.task_status = '[Started: 0%]'
self.task_verbose_name = None
self._status_dict = hook_status_dict or self.status_dict self._status_dict = hook_status_dict or self.status_dict
self._registered_keys = set() self._registered_keys = set()
@ -43,6 +46,24 @@ class BaseStatus:
if key in self._status_dict: if key in self._status_dict:
del self._status_dict[key] del self._status_dict[key]
def update_task(self):
if self.media_key is None:
return
from .models import Media
from .tasks import get_media_download_task
media = Media.objects.get(key=self.media_key)
task = get_media_download_task(str(media.pk))
if task:
if self.task_verbose_name is None:
# clean up any previously prepended task_status
# this happened because of duplicated tasks on my test system
s = task.verbose_name
cleaned = s[1+s.find(' Downloading '):]
self.task_verbose_name = cleaned
task.verbose_name = f'{self.task_status} {self.task_verbose_name}'
task.save()
class ProgressHookStatus(BaseStatus): class ProgressHookStatus(BaseStatus):
status_dict = progress_hook['status'] status_dict = progress_hook['status']
valid = frozenset(( valid = frozenset((
@ -121,6 +142,10 @@ def yt_dlp_progress_hook(event):
percent = round(100 * downloaded_bytes / total_bytes) percent = round(100 * downloaded_bytes / total_bytes)
if percent and (status.next_progress() < percent) and (0 == percent % 5): if percent and (status.next_progress() < percent) and (0 == percent % 5):
status.download_progress = percent status.download_progress = percent
if key:
status.media_key = key
status.task_status = f'[downloading: {percent_str}]'
status.update_task()
log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' log.info(f'[youtube-dl] downloading: {filename} - {percent_str} '
f'of {total} at {speed}, {eta} remaining') f'of {total} at {speed}, {eta} remaining')
elif 'finished' == event['status']: elif 'finished' == event['status']:
@ -171,6 +196,11 @@ def yt_dlp_postprocessor_hook(event):
del event['info_dict']['automatic_captions'] del event['info_dict']['automatic_captions']
log.debug(repr(event['info_dict'])) log.debug(repr(event['info_dict']))
if 'Unknown' != key:
status.media_key = key
status.task_status = f'[{event["postprocessor"]}: {event["status"]}]'
status.update_task()
log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}') log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}')
if 'finished' == event['status']: if 'finished' == event['status']:
status.cleanup() status.cleanup()

View File

@ -664,6 +664,11 @@ class Media(models.Model):
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date', Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date', Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date',
}, },
'timestamp': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp',
Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp',
},
'title': { 'title': {
Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title', Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title',
Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title', Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title',
@ -930,7 +935,7 @@ class Media(models.Model):
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
# Trigger an update of derived fields from metadata # Trigger an update of derived fields from metadata
if self.metadata: if self.metadata:
self.title = self.metadata_title self.title = self.metadata_title[:200]
self.duration = self.metadata_duration self.duration = self.metadata_duration
if update_fields is not None and "metadata" in update_fields: if update_fields is not None and "metadata" in update_fields:
# If only some fields are being updated, make sure we update title and duration if metadata changes # If only some fields are being updated, make sure we update title and duration if metadata changes
@ -944,7 +949,7 @@ class Media(models.Model):
def get_metadata_field(self, field): def get_metadata_field(self, field):
fields = self.METADATA_FIELDS.get(field, {}) fields = self.METADATA_FIELDS.get(field, {})
return fields.get(self.source.source_type, '') return fields.get(self.source.source_type, field)
def iter_formats(self): def iter_formats(self):
for fmt in self.formats: for fmt in self.formats:
@ -1561,6 +1566,8 @@ class Media(models.Model):
if self.downloaded and self.media_file: if self.downloaded and self.media_file:
old_video_path = Path(self.media_file.path) old_video_path = Path(self.media_file.path)
new_video_path = Path(get_media_file_path(self, None)) new_video_path = Path(get_media_file_path(self, None))
if old_video_path == new_video_path:
return
if old_video_path.exists() and not new_video_path.exists(): if old_video_path.exists() and not new_video_path.exists():
old_video_path = old_video_path.resolve(strict=True) old_video_path = old_video_path.resolve(strict=True)

View File

@ -14,7 +14,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
map_task_to_instance, check_source_directory_exists, map_task_to_instance, check_source_directory_exists,
download_media, rescan_media_server, download_source_images, download_media, rescan_media_server, download_source_images,
save_all_media_for_source, rename_all_media_for_source, save_all_media_for_source, rename_all_media_for_source,
get_media_metadata_task) get_media_metadata_task, get_media_download_task)
from .utils import delete_file, glob_quote from .utils import delete_file, glob_quote
from .filtering import filter_media from .filtering import filter_media
@ -156,8 +156,9 @@ def media_post_save(sender, instance, created, **kwargs):
post_save.disconnect(media_post_save, sender=Media) post_save.disconnect(media_post_save, sender=Media)
instance.save() instance.save()
post_save.connect(media_post_save, sender=Media) post_save.connect(media_post_save, sender=Media)
existing_media_metadata_task = get_media_metadata_task(str(instance.pk))
# If the media is missing metadata schedule it to be downloaded # If the media is missing metadata schedule it to be downloaded
if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk): if not (instance.skip or instance.metadata or existing_media_metadata_task):
log.info(f'Scheduling task to download metadata for: {instance.url}') log.info(f'Scheduling task to download metadata for: {instance.url}')
verbose_name = _('Downloading metadata for "{}"') verbose_name = _('Downloading metadata for "{}"')
download_media_metadata( download_media_metadata(
@ -183,13 +184,13 @@ def media_post_save(sender, instance, created, **kwargs):
verbose_name=verbose_name.format(instance.name), verbose_name=verbose_name.format(instance.name),
remove_existing_tasks=True remove_existing_tasks=True
) )
existing_media_download_task = get_media_download_task(str(instance.pk))
# If the media has not yet been downloaded schedule it to be downloaded # If the media has not yet been downloaded schedule it to be downloaded
if not instance.media_file_exists: if not (instance.media_file_exists or existing_media_download_task):
instance.downloaded = False instance.downloaded = False
instance.media_file = None instance.media_file = None
if (not instance.downloaded and instance.can_download and not instance.skip if (instance.source.download_media and instance.can_download) and not (
and instance.source.download_media): instance.skip or instance.downloaded or existing_media_download_task):
delete_task_by_media('sync.tasks.download_media', (str(instance.pk),))
verbose_name = _('Downloading media for "{}"') verbose_name = _('Downloading media for "{}"')
download_media( download_media(
str(instance.pk), str(instance.pk),
@ -225,6 +226,11 @@ def media_post_delete(sender, instance, **kwargs):
other_path = video_path.with_suffix(f'.{suffix}').resolve() other_path = video_path.with_suffix(f'.{suffix}').resolve()
log.info(f'Deleting file for: {instance} path: {other_path!s}') log.info(f'Deleting file for: {instance} path: {other_path!s}')
delete_file(other_path) delete_file(other_path)
# subtitles include language code
subtitle_files = video_path.parent.glob(f'{glob_quote(video_path.with_suffix("").name)}*.vtt')
for file in subtitle_files:
log.info(f'Deleting file for: {instance} path: {file}')
delete_file(file)
# Jellyfin creates .trickplay directories and posters # Jellyfin creates .trickplay directories and posters
for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)): for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)):
# with_suffix insists on suffix beginning with '.' for no good reason # with_suffix insists on suffix beginning with '.' for no good reason

View File

@ -10,7 +10,7 @@ import math
import uuid import uuid
from io import BytesIO from io import BytesIO
from hashlib import sha1 from hashlib import sha1
from datetime import timedelta, datetime from datetime import datetime, timedelta, timezone as tz
from shutil import copyfile from shutil import copyfile
from PIL import Image from PIL import Image
from django.conf import settings from django.conf import settings
@ -27,7 +27,6 @@ from common.utils import json_serial
from .models import Source, Media, MediaServer from .models import Source, Media, MediaServer
from .utils import (get_remote_image, resize_image_to_height, delete_file, from .utils import (get_remote_image, resize_image_to_height, delete_file,
write_text_file, filter_response) write_text_file, filter_response)
from .filtering import filter_media
from .youtube import YouTubeError from .youtube import YouTubeError
@ -202,6 +201,7 @@ def index_source_task(source_id):
source.last_crawl = timezone.now() source.last_crawl = timezone.now()
source.save() source.save()
log.info(f'Found {len(videos)} media items for source: {source}') log.info(f'Found {len(videos)} media items for source: {source}')
fields = lambda f, m: m.get_metadata_field(f)
for video in videos: for video in videos:
# Create or update each video as a Media object # Create or update each video as a Media object
key = video.get(source.key_field, None) key = video.get(source.key_field, None)
@ -213,6 +213,18 @@ def index_source_task(source_id):
except Media.DoesNotExist: except Media.DoesNotExist:
media = Media(key=key) media = Media(key=key)
media.source = source media.source = source
media.duration = float(video.get(fields('duration', media), 0)) or None
media.title = str(video.get(fields('title', media), ''))[:200]
timestamp = video.get(fields('timestamp', media), None)
if timestamp is not None:
try:
timestamp_float = float(timestamp)
posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
published_dt = posix_epoch + timedelta(seconds=timestamp_float)
except Exception as e:
log.warn(f'Could not set published for: {source} / {media} with "{e}"')
else:
media.published = published_dt
try: try:
media.save() media.save()
log.debug(f'Indexed media: {source} / {media}') log.debug(f'Indexed media: {source} / {media}')

View File

@ -206,11 +206,11 @@ def normalize_codec(codec_str):
def list_of_dictionaries(arg_list, arg_function=lambda x: x): def list_of_dictionaries(arg_list, arg_function=lambda x: x):
assert callable(arg_function) assert callable(arg_function)
if isinstance(arg_list, list): if isinstance(arg_list, list):
def _assert_and_call(arg_dict): def _call_func_with_dict(arg_dict):
if isinstance(arg_dict, dict): if isinstance(arg_dict, dict):
return arg_function(arg_dict) return arg_function(arg_dict)
return arg_dict return arg_dict
return (True, list(map(_assert_and_call, arg_list)),) return (True, list(map(_call_func_with_dict, arg_list)),)
return (False, arg_list,) return (False, arg_list,)

View File

@ -143,6 +143,7 @@ def get_media_info(url):
'simulate': True, 'simulate': True,
'logger': log, 'logger': log,
'extract_flat': True, 'extract_flat': True,
'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
}) })
response = {} response = {}
with yt_dlp.YoutubeDL(opts) as y: with yt_dlp.YoutubeDL(opts) as y:
@ -224,6 +225,10 @@ def download_media(
'sponskrub': False, 'sponskrub': False,
}) })
pp_opts.exec_cmd.update(
opts.get('exec_cmd', default_opts.exec_cmd)
)
if skip_sponsors: if skip_sponsors:
# Let yt_dlp convert from human for us. # Let yt_dlp convert from human for us.
pp_opts.sponsorblock_mark = yt_dlp.parse_options( pp_opts.sponsorblock_mark = yt_dlp.parse_options(
@ -242,7 +247,7 @@ def download_media(
'writesubtitles': write_subtitles, 'writesubtitles': write_subtitles,
'writeautomaticsub': auto_subtitles, 'writeautomaticsub': auto_subtitles,
'subtitleslangs': sub_langs.split(','), 'subtitleslangs': sub_langs.split(','),
'writethumbnail': True, 'writethumbnail': embed_thumbnail,
'check_formats': False, 'check_formats': False,
'overwrites': None, 'overwrites': None,
'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20), 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),