Merge branch 'main' into patch-3

2025-06-23 13:36:35 +00:00 · 2024-12-26 01:17:21 -05:00 · 2024-12-26 01:17:21 -05:00 · bc02f241a1
commit bc02f241a1
parent 3ac661cd1f e2f2a7bb49
11 changed files with 184 additions and 89 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -11,14 +11,15 @@ on:

 jobs:
  test:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
    strategy:
+      fail-fast: false
      matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
      - name: Install Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
@ -34,18 +35,18 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v1
+        uses: docker/setup-qemu-action@v3
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v3
      - name: Log into GitHub Container Registry
        run: echo "${{ secrets.REGISTRY_ACCESS_TOKEN }}" | docker login https://ghcr.io -u ${{ github.actor }} --password-stdin
      - name: Lowercase github username for ghcr
        id: string
-        uses: ASzc/change-string-case-action@v1
+        uses: ASzc/change-string-case-action@v6
        with:
          string: ${{ github.actor }}
      - name: Build and push
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v6
        with:
          platforms: linux/amd64,linux/arm64
          push: true
--- a/26
+++ b/26
@ -8,10 +8,10 @@ ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a90
 ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f66f785"
 ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae"

-ARG FFMPEG_DATE="autobuild-2024-12-09-14-16"
-ARG FFMPEG_VERSION="118034-gd21134313f"
-ARG SHA256_FFMPEG_AMD64="cd50122fb0939e913585282347a8f95074c2d5477ceb059cd90aca551f14e9ea"
-ARG SHA256_FFMPEG_ARM64="33b4edebf9c23701473ba8db696b26072bb9b9c05fc4a156e115f94e44d361e0"
+ARG FFMPEG_DATE="autobuild-2024-12-24-14-15"
+ARG FFMPEG_VERSION="N-118163-g954d55c2a4"
+ARG SHA256_FFMPEG_AMD64="798a7e5a0724139e6bb70df8921522b23be27028f9f551dfa83c305ec4ffaf3a"
+ARG SHA256_FFMPEG_ARM64="c3e6cc0fec42cc7e3804014fbb02c1384a1a31ef13f6f9a36121f2e1216240c0"

 ENV S6_VERSION="${S6_VERSION}" \
  FFMPEG_DATE="${FFMPEG_DATE}" \
@ -49,12 +49,16 @@ RUN decide_arch() { \
    decide_url() { \
      case "${1}" in \
        (ffmpeg) printf -- \
-          'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-N-%s-linux%s-gpl.tar.xz' \
+          'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-%s-linux%s-gpl%s.tar.xz' \
          "${FFMPEG_DATE}" \
          "${FFMPEG_VERSION}" \
          "$(case "${2}" in \
            (amd64) printf -- '64' ;; \
            (*) printf -- '%s' "${2}" ;; \
+          esac)" \
+          "$(case "${FFMPEG_VERSION%%-*}" in \
+            (n*) printf -- '-%s\n' "${FFMPEG_VERSION#n}" | cut -d '-' -f 1,2 ;; \
+            (*) printf -- '' ;; \
          esac)" ;; \
        (s6) printf -- \
          'https://github.com/just-containers/s6-overlay/releases/download/v%s/s6-overlay-%s.tar.xz' \
@ -138,6 +142,11 @@ COPY pip.conf /etc/pip.conf
 # Add Pipfile
 COPY Pipfile /app/Pipfile

+# Do not include compiled byte-code
+ENV PIP_NO_COMPILE=1 \
+  PIP_NO_CACHE_DIR=1 \
+  PIP_ROOT_USER_ACTION='ignore'
+
 # Switch workdir to the the app
 WORKDIR /app

@ -196,11 +205,12 @@ RUN set -x && \
  # Make absolutely sure we didn't accidentally bundle a SQLite dev database
  rm -rf /app/db.sqlite3 && \
  # Run any required app commands
-  /usr/bin/python3 /app/manage.py compilescss && \
-  /usr/bin/python3 /app/manage.py collectstatic --no-input --link && \
+  /usr/bin/python3 -B /app/manage.py compilescss && \
+  /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \
  # Create config, downloads and run dirs
  mkdir -v -p /run/app && \
  mkdir -v -p /config/media && \
+  mkdir -v -p /config/cache/pycache && \
  mkdir -v -p /downloads/audio && \
  mkdir -v -p /downloads/video

@ -219,7 +229,7 @@ COPY config/root /
 HEALTHCHECK --interval=1m --timeout=10s CMD /app/healthcheck.py http://127.0.0.1:8080/healthcheck

 # ENVS and ports
-ENV PYTHONPATH="/app"
+ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/cache/pycache"
 EXPOSE 4848

 # Volumes
--- a/tubesync/sync/filtering.py
+++ b/tubesync/sync/filtering.py
@ -15,23 +15,23 @@ def filter_media(instance: Media):
    skip = False

    # Check if it's published
-    if filter_published(instance):
+    if not skip and filter_published(instance):
        skip = True

    # Check if older than max_cap_age, skip
-    if filter_max_cap(instance):
+    if not skip and filter_max_cap(instance):
        skip = True

    # Check if older than source_cutoff
-    if filter_source_cutoff(instance):
+    if not skip and filter_source_cutoff(instance):
        skip = True

    # Check if we have filter_text and filter text matches
-    if filter_filter_text(instance):
+    if not skip and filter_filter_text(instance):
        skip = True

    # Check if the video is longer than the max, or shorter than the min
-    if filter_duration(instance):
+    if not skip and filter_duration(instance):
        skip = True

    # If we aren't already skipping the file, call our custom function that can be overridden
@ -118,10 +118,12 @@ def filter_max_cap(instance: Media):
        return False

    if instance.published <= max_cap_age:
-        log.info(
-            f"Media: {instance.source} / {instance} is too old for "
-            f"the download cap date, marking to be skipped"
-        )
+        # log new media instances, not every media instance every time
+        if not instance.skip:
+            log.info(
+                f"Media: {instance.source} / {instance} is too old for "
+                f"the download cap date, marking to be skipped"
+            )
        return True

    return False
--- a/tubesync/sync/models.py
+++ b/tubesync/sync/models.py
@ -1261,54 +1261,54 @@ class Media(models.Model):
        media_details = self.format_dict
        return media_format.format(**media_details)

-    @property
-    def thumbname(self):
-        if self.downloaded and self.media_file:
-            filename = os.path.basename(self.media_file.path)
-        else:
-            filename = self.filename
-        prefix, ext = os.path.splitext(filename)
-        return f'{prefix}.jpg'
-
-    @property
-    def thumbpath(self):
-        return self.source.directory_path / self.thumbname
-
-    @property
-    def nfoname(self):
-        if self.downloaded and self.media_file:
-            filename = os.path.basename(self.media_file.path)
-        else:
-            filename = self.filename
-        prefix, ext = os.path.splitext(filename)
-        return f'{prefix}.nfo'
-    
-    @property
-    def nfopath(self):
-        return self.source.directory_path / self.nfoname
-
-    @property
-    def jsonname(self):
-        if self.downloaded and self.media_file:
-            filename = os.path.basename(self.media_file.path)
-        else:
-            filename = self.filename
-        prefix, ext = os.path.splitext(filename)
-        return f'{prefix}.info.json'
-    
-    @property
-    def jsonpath(self):
-        return self.source.directory_path / self.jsonname
-
    @property
    def directory_path(self):
        dirname = self.source.directory_path / self.filename
-        return os.path.dirname(str(dirname))
+        return dirname.parent

    @property
    def filepath(self):
        return self.source.directory_path / self.filename

+    @property
+    def thumbname(self):
+        if self.downloaded and self.media_file:
+            filename = self.media_file.path
+        else:
+            filename = self.filename
+        prefix, ext = os.path.splitext(os.path.basename(filename))
+        return f'{prefix}.jpg'
+
+    @property
+    def thumbpath(self):
+        return self.directory_path / self.thumbname
+
+    @property
+    def nfoname(self):
+        if self.downloaded and self.media_file:
+            filename = self.media_file.path
+        else:
+            filename = self.filename
+        prefix, ext = os.path.splitext(os.path.basename(filename))
+        return f'{prefix}.nfo'
+    
+    @property
+    def nfopath(self):
+        return self.directory_path / self.nfoname
+
+    @property
+    def jsonname(self):
+        if self.downloaded and self.media_file:
+            filename = self.media_file.path
+        else:
+            filename = self.filename
+        prefix, ext = os.path.splitext(os.path.basename(filename))
+        return f'{prefix}.info.json'
+    
+    @property
+    def jsonpath(self):
+        return self.directory_path / self.jsonname
+
    @property
    def thumb_file_exists(self):
        if not self.thumb:
@ -1353,7 +1353,7 @@ class Media(models.Model):
        nfo.text = '\n  '
        # title = media metadata title
        title = nfo.makeelement('title', {})
-        title.text = clean_emoji(str(self.name).strip())
+        title.text = clean_emoji(self.title)
        title.tail = '\n  '
        nfo.append(title)
        # showtitle = source name
@ -1499,7 +1499,16 @@ class Media(models.Model):
        if not callable(indexer):
            raise Exception(f'Media with source type f"{self.source.source_type}" '
                            f'has no indexer')
-        return indexer(self.url)
+        response = indexer(self.url)
+        no_formats_available = (
+            not response or
+            "formats" not in response.keys() or
+            0 == len(response["formats"])
+        )
+        if no_formats_available:
+            self.can_download = False
+            self.skip = True
+        return response

    def calculate_episode_number(self):
        if self.source.source_type == Source.SOURCE_TYPE_YOUTUBE_PLAYLIST:
--- a/tubesync/sync/signals.py
+++ b/tubesync/sync/signals.py
@ -13,7 +13,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
                    download_media_thumbnail, download_media_metadata,
                    map_task_to_instance, check_source_directory_exists,
                    download_media, rescan_media_server, download_source_images,
-                    save_all_media_for_source)
+                    save_all_media_for_source, get_media_metadata_task)
 from .utils import delete_file, glob_quote
 from .filtering import filter_media

@ -103,6 +103,10 @@ def task_task_failed(sender, task_id, completed_task, **kwargs):
        obj.has_failed = True
        obj.save()

+    if isinstance(obj, Media) and completed_task.task_name == "sync.tasks.download_media_metadata":
+        log.error(f'Permanent failure for media: {obj} task: {completed_task}')
+        obj.skip = True
+        obj.save()

@receiver(post_save, sender=Media)
 def media_post_save(sender, instance, created, **kwargs):
@ -134,7 +138,7 @@ def media_post_save(sender, instance, created, **kwargs):
        instance.save()
        post_save.connect(media_post_save, sender=Media)
    # If the media is missing metadata schedule it to be downloaded
-    if not instance.metadata:
+    if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk):
        log.info(f'Scheduling task to download metadata for: {instance.url}')
        verbose_name = _('Downloading metadata for "{}"')
        download_media_metadata(
--- a/tubesync/sync/tasks.py
+++ b/tubesync/sync/tasks.py
@ -49,6 +49,7 @@ def map_task_to_instance(task):
        'sync.tasks.check_source_directory_exists': Source,
        'sync.tasks.download_media_thumbnail': Media,
        'sync.tasks.download_media': Media,
+        'sync.tasks.download_media_metadata': Media,
        'sync.tasks.save_all_media_for_source': Source,
    }
    MODEL_URL_MAP = {
@ -117,6 +118,12 @@ def get_media_download_task(media_id):
    except IndexError:
        return False

+def get_media_metadata_task(media_id):
+    try:
+        return Task.objects.get_task('sync.tasks.download_media_metadata',
+                                     args=(str(media_id),))[0]
+    except IndexError:
+        return False

 def delete_task_by_source(task_name, source_id):
    return Task.objects.filter(task_name=task_name, queue=str(source_id)).delete()
@ -146,12 +153,12 @@ def cleanup_old_media():


 def cleanup_removed_media(source, videos):
-    media_objects = Media.objects.filter(source=source, downloaded=True)
-    for item in media_objects:
-        matching_source_item = [video['id'] for video in videos if video['id'] == item.key]
+    media_objects = Media.objects.filter(source=source)
+    for media in media_objects:
+        matching_source_item = [video['id'] for video in videos if video['id'] == media.key]
        if not matching_source_item:
-            log.info(f'{item.title} is no longer in source, removing')
-            item.delete()
+            log.info(f'{media.name} is no longer in source, removing')
+            media.delete()


@background(schedule=0)
@ -191,7 +198,15 @@ def index_source_task(source_id):
        media.source = source
        try:
            media.save()
-            log.info(f'Indexed media: {source} / {media}')
+            log.debug(f'Indexed media: {source} / {media}')
+            # log the new media instances
+            new_media_instance = (
+                media.created and
+                source.last_crawl and
+                media.created >= source.last_crawl
+            )
+            if new_media_instance:
+                log.info(f'Indexed new media: {source} / {media}')
        except IntegrityError as e:
            log.error(f'Index media failed: {source} / {media} with "{e}"')
    # Tack on a cleanup of old completed tasks
@ -199,7 +214,7 @@ def index_source_task(source_id):
    # Tack on a cleanup of old media
    cleanup_old_media()
    if source.delete_removed_media:
-        log.info(f'Cleaning up media no longer in source {source}')
+        log.info(f'Cleaning up media no longer in source: {source}')
        cleanup_removed_media(source, videos)


@ -236,7 +251,7 @@ def download_source_images(source_id):
                  f'source exists with ID: {source_id}')
        return
    avatar, banner = source.get_image_url
-    log.info(f'Thumbnail URL for source with ID: {source_id} '
+    log.info(f'Thumbnail URL for source with ID: {source_id} / {source} '
        f'Avatar: {avatar} '
        f'Banner: {banner}')
    if banner != None:
@ -269,7 +284,7 @@ def download_source_images(source_id):
            with open(file_path, 'wb') as f:
                f.write(django_file.read())

-    log.info(f'Thumbnail downloaded for source with ID: {source_id}')
+    log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}')


@background(schedule=0)
@ -285,7 +300,7 @@ def download_media_metadata(media_id):
                  f'media exists with ID: {media_id}')
        return
    if media.manual_skip:
-        log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.')
+        log.info(f'Task for ID: {media_id} / {media} skipped, due to task being manually skipped.')
        return
    source = media.source
    metadata = media.index_metadata()
@ -306,7 +321,7 @@ def download_media_metadata(media_id):
    # Don't filter media here, the post_save signal will handle that
    media.save()
    log.info(f'Saved {len(media.metadata)} bytes of metadata for: '
-             f'{source} / {media_id}')
+             f'{source} / {media}: {media_id}')


@background(schedule=0)
@ -359,7 +374,7 @@ def download_media(media_id):
        return
    if media.skip:
        # Media was toggled to be skipped after the task was scheduled
-        log.warn(f'Download task triggered for  media: {media} (UUID: {media.pk}) but '
+        log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but '
                 f'it is now marked to be skipped, not downloading')
        return
    if media.downloaded and media.media_file:
@ -430,7 +445,7 @@ def download_media(media_id):
            copyfile(media.thumb.path, media.thumbpath)
        # If selected, write an NFO file
        if media.source.write_nfo:
-            log.info(f'Writing media NFO file to: to: {media.nfopath}')
+            log.info(f'Writing media NFO file to: {media.nfopath}')
            write_text_file(media.nfopath, media.nfoxml)
        # Schedule a task to update media servers
        for mediaserver in MediaServer.objects.all():
@ -446,7 +461,7 @@ def download_media(media_id):
    else:
        # Expected file doesn't exist on disk
        err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, '
-               f'expected outfile does not exist: {media.filepath}')
+               f'expected outfile does not exist: {filepath}')
        log.error(err)
        # Raising an error here triggers the task to be re-attempted (or fail)
        raise DownloadFailedException(err)
--- a/tubesync/sync/templates/sync/media-item.html
+++ b/tubesync/sync/templates/sync/media-item.html
@ -98,12 +98,19 @@
      {% if media.downloaded %}
      <tr title="The filename the media will be downloaded as">
        <td class="hide-on-small-only">Filename</td>
-        <td><span class="hide-on-med-and-up">Filename<br></span><strong>{{ media.filename }}</strong></td>
+        <td><span class="hide-on-med-and-up">Filename<br></span><strong>{{ filename_path.name }}</strong></td>
      </tr>
-      <tr title="The filename the media will be downloaded as">
+      <tr title="The directory the media will be downloaded to">
        <td class="hide-on-small-only">Directory</td>
        <td><span class="hide-on-med-and-up">Directory<br></span><strong>{{ media.directory_path }}</strong></td>
      </tr>
+      <tr title="The filepath the media was saved to">
+        <td class="hide-on-small-only">Database&nbsp;Filepath</td>
+        <td><span class="hide-on-med-and-up">DB&nbsp;Filepath<br></span><strong>{{ media_file_path }}</strong>
+          {% if media_file_path == media.filepath %}
+          <span class="green-text">&nbsp;(matched)</span>
+          {% endif %}
+        </td>                                                                   </tr>
      <tr title="Size of the file on disk">
        <td class="hide-on-small-only">File size</td>
        <td><span class="hide-on-med-and-up">File size<br></span><strong>{{ media.downloaded_filesize|filesizeformat }}</strong></td>
--- a/tubesync/sync/tests.py
+++ b/tubesync/sync/tests.py
@ -597,11 +597,11 @@ class FilepathTestCase(TestCase):
        # Check child directories work
        self.source.media_format = '{yyyy}/{key}.{ext}'
        self.assertEqual(self.media.directory_path,
-                         str(self.source.directory_path / '2017'))
+                         self.source.directory_path / '2017')
        self.assertEqual(self.media.filename, '2017/mediakey.mkv')
        self.source.media_format = '{yyyy}/{yyyy_mm_dd}/{key}.{ext}'
        self.assertEqual(self.media.directory_path,
-                         str(self.source.directory_path / '2017/2017-09-11'))
+                         self.source.directory_path / '2017/2017-09-11')
        self.assertEqual(self.media.filename, '2017/2017-09-11/mediakey.mkv')
        # Check media specific media format keys work
        test_media = Media.objects.create(
--- a/tubesync/sync/views.py
+++ b/tubesync/sync/views.py
@ -582,6 +582,8 @@ class MediaItemView(DetailView):
        data['video_exact'] = video_exact
        data['video_format'] = video_format
        data['youtube_dl_format'] = self.object.get_format_str()
+        data['filename_path'] = pathlib.Path(self.object.filename)
+        data['media_file_path'] = pathlib.Path(self.object.media_file.path) if self.object.media_file else None
        return data


--- a/tubesync/sync/youtube.py
+++ b/tubesync/sync/youtube.py
@ -73,6 +73,26 @@ def get_channel_image_info(url):
            raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e


+def _subscriber_only(msg='', response=None):
+    if response is None:
+        # process msg only
+        msg = str(msg)
+        if 'access to members-only content' in msg:
+            return True
+        if ': Join this channel' in msg:
+            return True
+    else:
+        # ignore msg entirely
+        if not isinstance(response, dict):
+            raise TypeError(f'response must be a dict, got "{type(response)}" instead')
+
+        if 'availability' not in response.keys():
+            return False
+
+        # check for the specific expected value
+        return 'subscriber_only' == response.get('availability')
+    return False
+

 def get_media_info(url):
    '''
@ -82,6 +102,8 @@ def get_media_info(url):
    '''
    opts = get_yt_opts()
    opts.update({
+        'ignoreerrors': False, # explicitly set this to catch exceptions
+        'ignore_no_formats_error': False, # we must fail first to try again with this enabled
        'skip_download': True,
        'simulate': True,
        'logger': log,
@ -92,7 +114,19 @@ def get_media_info(url):
        try:
            response = y.extract_info(url, download=False)
        except yt_dlp.utils.DownloadError as e:
-            raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e
+            if not _subscriber_only(msg=e.msg):
+                raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e
+            # adjust options and try again
+            opts.update({'ignore_no_formats_error': True,})
+            with yt_dlp.YoutubeDL(opts) as yy:
+                try:
+                    response = yy.extract_info(url, download=False)
+                except yt_dlp.utils.DownloadError as ee:
+                    raise YouTubeError(f'Failed (again) to extract_info for "{url}": {ee}') from ee
+                # validate the response is what we expected
+                if not _subscriber_only(response=response):
+                    response = {}
+
    if not response:
        raise YouTubeError(f'Failed to extract_info for "{url}": No metadata was '
                           f'returned by youtube-dl, check for error messages in the '
@ -151,6 +185,7 @@ def download_media(url, media_format, extension, output_file, info_json,
        'outtmpl': os.path.basename(output_file),
        'quiet': False if settings.DEBUG else True,
        'verbose': True if settings.DEBUG else False,
+        'noprogress': None if settings.DEBUG else True,
        'progress_hooks': [hook],
        'writeinfojson': info_json,
        'postprocessors': [],
--- a/tubesync/tubesync/local_settings.py.container
+++ b/tubesync/tubesync/local_settings.py.container
@ -60,7 +60,7 @@ if BACKGROUND_TASK_ASYNC_THREADS > MAX_BACKGROUND_TASK_ASYNC_THREADS:

 MEDIA_ROOT = CONFIG_BASE_DIR / 'media'
 DOWNLOAD_ROOT = DOWNLOADS_BASE_DIR
-YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache'
+YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache/youtube'
 YOUTUBE_DL_TEMPDIR = DOWNLOAD_ROOT / 'cache'
 COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt'

@ -88,3 +88,13 @@ SOURCE_DOWNLOAD_DIRECTORY_PREFIX = True if SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR


 VIDEO_HEIGHT_CUTOFF = int(os.getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", "240"))
+
+
+# ensure that the current directory exists
+if not YOUTUBE_DL_CACHEDIR.is_dir():
+    YOUTUBE_DL_CACHEDIR.mkdir(parents=True)
+# rename any old yt_dlp cache directories to the current directory
+old_youtube_cache_dirs = list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube-*'))
+for cache_dir in old_youtube_cache_dirs:
+    cache_dir.rename(YOUTUBE_DL_CACHEDIR / cache_dir.name)
+