Merge pull request #286 from meeb/main

Update to upstream main
2025-06-22 13:06:34 +00:00 · 2025-02-11 22:33:56 -05:00 · 2025-02-11 22:33:56 -05:00 · 50762d04f0
commit 50762d04f0
parent 96737c4de3 b87c1253a0
6 changed files with 73 additions and 13 deletions
--- a/tubesync/sync/hooks.py
+++ b/tubesync/sync/hooks.py
@ -27,6 +27,9 @@ class BaseStatus:
        return status in cls.valid

    def __init__(self, hook_status_dict=None):
+        self.media_key = None
+        self.task_status = '[Started: 0%]'
+        self.task_verbose_name = None
        self._status_dict = hook_status_dict or self.status_dict
        self._registered_keys = set()

@ -43,6 +46,24 @@ class BaseStatus:
            if key in self._status_dict:
                del self._status_dict[key]

+    def update_task(self):
+        if self.media_key is None:
+            return
+        from .models import Media
+        from .tasks import get_media_download_task
+
+        media = Media.objects.get(key=self.media_key)
+        task = get_media_download_task(str(media.pk))
+        if task:
+            if self.task_verbose_name is None:
+                # clean up any previously prepended task_status
+                # this happened because of duplicated tasks on my test system
+                s = task.verbose_name
+                cleaned = s[1+s.find(' Downloading '):]
+                self.task_verbose_name = cleaned
+            task.verbose_name = f'{self.task_status} {self.task_verbose_name}'
+            task.save()
+
 class ProgressHookStatus(BaseStatus):
    status_dict = progress_hook['status']
    valid = frozenset((
@ -121,6 +142,10 @@ def yt_dlp_progress_hook(event):
            percent = round(100 * downloaded_bytes / total_bytes)
        if percent and (status.next_progress() < percent) and (0 == percent % 5):
            status.download_progress = percent
+            if key:
+                status.media_key = key
+            status.task_status = f'[downloading: {percent_str}]'
+            status.update_task()
            log.info(f'[youtube-dl] downloading: {filename} - {percent_str} '
                     f'of {total} at {speed}, {eta} remaining')
    elif 'finished' == event['status']:
@ -171,6 +196,11 @@ def yt_dlp_postprocessor_hook(event):
            del event['info_dict']['automatic_captions']
        log.debug(repr(event['info_dict']))

+    if 'Unknown' != key:
+        status.media_key = key
+    status.task_status = f'[{event["postprocessor"]}: {event["status"]}]'
+    status.update_task()
+
    log.info(f'[{event["postprocessor"]}] {event["status"]} for: {name}')
    if 'finished' == event['status']:
        status.cleanup()
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@ -664,6 +664,11 @@ class Media(models.Model):
            Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'upload_date',
            Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'upload_date',
        },
+        'timestamp': {
+            Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'timestamp',
+            Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'timestamp',
+            Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: 'timestamp',
+        },
        'title': {
            Source.SOURCE_TYPE_YOUTUBE_CHANNEL: 'title',
            Source.SOURCE_TYPE_YOUTUBE_CHANNEL_ID: 'title',
@ -930,7 +935,7 @@ class Media(models.Model):
    def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
        # Trigger an update of derived fields from metadata
        if self.metadata:
-            self.title = self.metadata_title
+            self.title = self.metadata_title[:200]
            self.duration = self.metadata_duration
        if update_fields is not None and "metadata" in update_fields:
            # If only some fields are being updated, make sure we update title and duration if metadata changes
@ -944,7 +949,7 @@ class Media(models.Model):

    def get_metadata_field(self, field):
        fields = self.METADATA_FIELDS.get(field, {})
-        return fields.get(self.source.source_type, '')
+        return fields.get(self.source.source_type, field)

    def iter_formats(self):
        for fmt in self.formats:
@ -1561,6 +1566,8 @@ class Media(models.Model):
        if self.downloaded and self.media_file:
            old_video_path = Path(self.media_file.path)
            new_video_path = Path(get_media_file_path(self, None))
+            if old_video_path == new_video_path:
+                return
            if old_video_path.exists() and not new_video_path.exists():
                old_video_path = old_video_path.resolve(strict=True)

--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@ -14,7 +14,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
                    map_task_to_instance, check_source_directory_exists,
                    download_media, rescan_media_server, download_source_images,
                    save_all_media_for_source, rename_all_media_for_source,
-                    get_media_metadata_task)
+                    get_media_metadata_task, get_media_download_task)
 from .utils import delete_file, glob_quote
 from .filtering import filter_media

@ -156,8 +156,9 @@ def media_post_save(sender, instance, created, **kwargs):
        post_save.disconnect(media_post_save, sender=Media)
        instance.save()
        post_save.connect(media_post_save, sender=Media)
+    existing_media_metadata_task = get_media_metadata_task(str(instance.pk))
    # If the media is missing metadata schedule it to be downloaded
-    if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk):
+    if not (instance.skip or instance.metadata or existing_media_metadata_task):
        log.info(f'Scheduling task to download metadata for: {instance.url}')
        verbose_name = _('Downloading metadata for "{}"')
        download_media_metadata(
@ -183,13 +184,13 @@ def media_post_save(sender, instance, created, **kwargs):
                verbose_name=verbose_name.format(instance.name),
                remove_existing_tasks=True
            )
+    existing_media_download_task = get_media_download_task(str(instance.pk))
    # If the media has not yet been downloaded schedule it to be downloaded
-    if not instance.media_file_exists:
+    if not (instance.media_file_exists or existing_media_download_task):
        instance.downloaded = False
        instance.media_file = None
-    if (not instance.downloaded and instance.can_download and not instance.skip
-        and instance.source.download_media):
-        delete_task_by_media('sync.tasks.download_media', (str(instance.pk),))
+    if (instance.source.download_media and instance.can_download) and not (
+        instance.skip or instance.downloaded or existing_media_download_task):
        verbose_name = _('Downloading media for "{}"')
        download_media(
            str(instance.pk),
@ -225,6 +226,11 @@ def media_post_delete(sender, instance, **kwargs):
            other_path = video_path.with_suffix(f'.{suffix}').resolve()
            log.info(f'Deleting file for: {instance} path: {other_path!s}')
            delete_file(other_path)
+        # subtitles include language code
+        subtitle_files = video_path.parent.glob(f'{glob_quote(video_path.with_suffix("").name)}*.vtt')
+        for file in subtitle_files:
+            log.info(f'Deleting file for: {instance} path: {file}')
+            delete_file(file)
        # Jellyfin creates .trickplay directories and posters
        for suffix in frozenset(('.trickplay', '-poster.jpg', '-poster.webp',)):
            # with_suffix insists on suffix beginning with '.' for no good reason
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@ -10,7 +10,7 @@ import math
 import uuid
 from io import BytesIO
 from hashlib import sha1
-from datetime import timedelta, datetime
+from datetime import datetime, timedelta, timezone as tz
 from shutil import copyfile
 from PIL import Image
 from django.conf import settings
@ -27,7 +27,6 @@ from common.utils import json_serial
 from .models import Source, Media, MediaServer
 from .utils import (get_remote_image, resize_image_to_height, delete_file,
                    write_text_file, filter_response)
-from .filtering import filter_media
 from .youtube import YouTubeError


@ -202,6 +201,7 @@ def index_source_task(source_id):
    source.last_crawl = timezone.now()
    source.save()
    log.info(f'Found {len(videos)} media items for source: {source}')
+    fields = lambda f, m: m.get_metadata_field(f)
    for video in videos:
        # Create or update each video as a Media object
        key = video.get(source.key_field, None)
@ -213,6 +213,18 @@ def index_source_task(source_id):
        except Media.DoesNotExist:
            media = Media(key=key)
        media.source = source
+        media.duration = float(video.get(fields('duration', media), 0)) or None
+        media.title = str(video.get(fields('title', media), ''))[:200]
+        timestamp = video.get(fields('timestamp', media), None)
+        if timestamp is not None:
+            try:
+                timestamp_float = float(timestamp)
+                posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
+                published_dt = posix_epoch + timedelta(seconds=timestamp_float)
+            except Exception as e:
+                log.warn(f'Could not set published for: {source} / {media} with "{e}"')
+            else:
+                media.published = published_dt
        try:
            media.save()
            log.debug(f'Indexed media: {source} / {media}')
--- a/tubesync/sync/utils.py
+++ b/tubesync/sync/utils.py
@ -206,11 +206,11 @@ def normalize_codec(codec_str):
 def list_of_dictionaries(arg_list, arg_function=lambda x: x):
    assert callable(arg_function)
    if isinstance(arg_list, list):
-        def _assert_and_call(arg_dict):
+        def _call_func_with_dict(arg_dict):
            if isinstance(arg_dict, dict):
                return arg_function(arg_dict)
            return arg_dict
-        return (True, list(map(_assert_and_call, arg_list)),)
+        return (True, list(map(_call_func_with_dict, arg_list)),)
    return (False, arg_list,)


--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@ -143,6 +143,7 @@ def get_media_info(url):
        'simulate': True,
        'logger': log,
        'extract_flat': True,
+        'extractor_args': {'youtubetab': {'approximate_date': ['true']}},
    })
    response = {}
    with yt_dlp.YoutubeDL(opts) as y:
@ -224,6 +225,10 @@ def download_media(
        'sponskrub': False,
    })

+    pp_opts.exec_cmd.update(
+        opts.get('exec_cmd', default_opts.exec_cmd)
+    )
+
    if skip_sponsors:
        # Let yt_dlp convert from human for us.
        pp_opts.sponsorblock_mark = yt_dlp.parse_options(
@ -242,7 +247,7 @@ def download_media(
        'writesubtitles': write_subtitles,
        'writeautomaticsub': auto_subtitles,
        'subtitleslangs': sub_langs.split(','),
-        'writethumbnail': True,
+        'writethumbnail': embed_thumbnail,
        'check_formats': False,
        'overwrites': None,
        'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20),