diff --git a/tubesync/sync/hooks.py b/tubesync/sync/hooks.py index 3e8722a9..96c9f98d 100644 --- a/tubesync/sync/hooks.py +++ b/tubesync/sync/hooks.py @@ -27,6 +27,9 @@ class BaseStatus: return status in cls.valid def __init__(self, hook_status_dict=None): + self.media_key = None + self.task_status = '[Started: 0%]' + self.task_verbose_name = None self._status_dict = hook_status_dict or self.status_dict self._registered_keys = set() @@ -43,6 +46,24 @@ class BaseStatus: if key in self._status_dict: del self._status_dict[key] + def update_task(self): + if self.media_key is None: + return + from .models import Media + from .tasks import get_media_download_task + + media = Media.objects.get(key=self.media_key) + task = get_media_download_task(str(media.pk)) + if task: + if self.task_verbose_name is None: + # clean up any previously prepended task_status + # this happened because of duplicated tasks on my test system + s = task.verbose_name + cleaned = s[1+s.find(' Downloading '):] + self.task_verbose_name = cleaned + task.verbose_name = f'{self.task_status} {self.task_verbose_name}' + task.save() + class ProgressHookStatus(BaseStatus): status_dict = progress_hook['status'] valid = frozenset(( @@ -121,6 +142,10 @@ def yt_dlp_progress_hook(event): percent = round(100 * downloaded_bytes / total_bytes) if percent and (status.next_progress() < percent) and (0 == percent % 5): status.download_progress = percent + if key: + status.media_key = key + status.task_status = f'[downloading: {percent_str}]' + status.update_task() log.info(f'[youtube-dl] downloading: {filename} - {percent_str} ' f'of {total} at {speed}, {eta} remaining') elif 'finished' == event['status']: @@ -171,6 +196,11 @@ def yt_dlp_postprocessor_hook(event): del event['info_dict']['automatic_captions'] log.debug(repr(event['info_dict'])) + if 'Unknown' != key: + status.media_key = key + status.task_status = f'[{event["postprocessor"]}: {event["status"]}]' + status.update_task() + log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}') if 'finished' == event['status']: status.cleanup() diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2daeb094..c914534a 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -664,6 +664,11 @@ class Media(models.Model): Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date', Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date', }, + 'timestamp': { + Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp', + Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp', + Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp', + }, 'title': { Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title', Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title', @@ -930,7 +935,7 @@ class Media(models.Model): def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Trigger an update of derived fields from metadata if self.metadata: - self.title = self.metadata_title + self.title = self.metadata_title[:200] self.duration = self.metadata_duration if update_fields is not None and "metadata" in update_fields: # If only some fields are being updated, make sure we update title and duration if metadata changes @@ -944,7 +949,7 @@ class Media(models.Model): def get_metadata_field(self, field): fields = self.METADATA_FIELDS.get(field, {}) - return fields.get(self.source.source_type, '') + return fields.get(self.source.source_type, field) def iter_formats(self): for fmt in self.formats: @@ -1561,6 +1566,8 @@ class Media(models.Model): if self.downloaded and self.media_file: old_video_path = Path(self.media_file.path) new_video_path = Path(get_media_file_path(self, None)) + if old_video_path == new_video_path: + return if old_video_path.exists() and not new_video_path.exists(): old_video_path = old_video_path.resolve(strict=True) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 4f811add..fe245be5 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -14,7 +14,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images, save_all_media_for_source, rename_all_media_for_source, - get_media_metadata_task) + get_media_metadata_task, get_media_download_task) from .utils import delete_file, glob_quote from .filtering import filter_media @@ -156,8 +156,9 @@ def media_post_save(sender, instance, created, **kwargs): post_save.disconnect(media_post_save, sender=Media) instance.save() post_save.connect(media_post_save, sender=Media) + existing_media_metadata_task = get_media_metadata_task(str(instance.pk)) # If the media is missing metadata schedule it to be downloaded - if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk): + if not (instance.skip or instance.metadata or existing_media_metadata_task): log.info(f'Scheduling task to download metadata for: {instance.url}') verbose_name = _('Downloading metadata for "{}"') download_media_metadata( @@ -183,13 +184,13 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name=verbose_name.format(instance.name), remove_existing_tasks=True ) + existing_media_download_task = get_media_download_task(str(instance.pk)) # If the media has not yet been downloaded schedule it to be downloaded - if not instance.media_file_exists: + if not (instance.media_file_exists or existing_media_download_task): instance.downloaded = False instance.media_file = None - if (not instance.downloaded and instance.can_download and not instance.skip - and instance.source.download_media): - delete_task_by_media('sync.tasks.download_media', (str(instance.pk),)) + if (instance.source.download_media and instance.can_download) and not ( + instance.skip or instance.downloaded or existing_media_download_task): verbose_name = _('Downloading media for "{}"') download_media( str(instance.pk), @@ -225,6 +226,11 @@ def media_post_delete(sender, instance, **kwargs): other_path = video_path.with_suffix(f'.{suffix}').resolve() log.info(f'Deleting file for: {instance} path: {other_path!s}') delete_file(other_path) + # subtitles include language code + subtitle_files = video_path.parent.glob(f'{glob_quote(video_path.with_suffix("").name)}*.vtt') + for file in subtitle_files: + log.info(f'Deleting file for: {instance} path: {file}') + delete_file(file) # Jellyfin creates .trickplay directories and posters for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)): # with_suffix insists on suffix beginning with '.' for no good reason diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index a5e3f135..e59efcce 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -10,7 +10,7 @@ import math import uuid from io import BytesIO from hashlib import sha1 -from datetime import timedelta, datetime +from datetime import datetime, timedelta, timezone as tz from shutil import copyfile from PIL import Image from django.conf import settings @@ -27,7 +27,6 @@ from common.utils import json_serial from .models import Source, Media, MediaServer from .utils import (get_remote_image, resize_image_to_height, delete_file, write_text_file, filter_response) -from .filtering import filter_media from .youtube import YouTubeError @@ -202,6 +201,7 @@ def index_source_task(source_id): source.last_crawl = timezone.now() source.save() log.info(f'Found {len(videos)} media items for source: {source}') + fields = lambda f, m: m.get_metadata_field(f) for video in videos: # Create or update each video as a Media object key = video.get(source.key_field, None) @@ -213,6 +213,18 @@ def index_source_task(source_id): except Media.DoesNotExist: media = Media(key=key) media.source = source + media.duration = float(video.get(fields('duration', media), 0)) or None + media.title = str(video.get(fields('title', media), ''))[:200] + timestamp = video.get(fields('timestamp', media), None) + if timestamp is not None: + try: + timestamp_float = float(timestamp) + posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + published_dt = posix_epoch + timedelta(seconds=timestamp_float) + except Exception as e: + log.warn(f'Could not set published for: {source} / {media} with "{e}"') + else: + media.published = published_dt try: media.save() log.debug(f'Indexed media: {source} / {media}') diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index fa904c5f..9f599672 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -206,11 +206,11 @@ def normalize_codec(codec_str): def list_of_dictionaries(arg_list, arg_function=lambda x: x): assert callable(arg_function) if isinstance(arg_list, list): - def _assert_and_call(arg_dict): + def _call_func_with_dict(arg_dict): if isinstance(arg_dict, dict): return arg_function(arg_dict) return arg_dict - return (True, list(map(_assert_and_call, arg_list)),) + return (True, list(map(_call_func_with_dict, arg_list)),) return (False, arg_list,) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index fd3795a1..8842423f 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -143,6 +143,7 @@ def get_media_info(url): 'simulate': True, 'logger': log, 'extract_flat': True, + 'extractor_args': {'youtubetab': {'approximate_date': ['true']}}, }) response = {} with yt_dlp.YoutubeDL(opts) as y: @@ -224,6 +225,10 @@ def download_media( 'sponskrub': False, }) + pp_opts.exec_cmd.update( + opts.get('exec_cmd', default_opts.exec_cmd) + ) + if skip_sponsors: # Let yt_dlp convert from human for us. pp_opts.sponsorblock_mark = yt_dlp.parse_options( @@ -242,7 +247,7 @@ def download_media( 'writesubtitles': write_subtitles, 'writeautomaticsub': auto_subtitles, 'subtitleslangs': sub_langs.split(','), - 'writethumbnail': True, + 'writethumbnail': embed_thumbnail, 'check_formats': False, 'overwrites': None, 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),