diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a2b1225b..faf25319 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,14 +11,15 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: + fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -34,18 +35,18 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Log into GitHub Container Registry run: echo "${{ secrets.REGISTRY_ACCESS_TOKEN }}" | docker login https://ghcr.io -u ${{ github.actor }} --password-stdin - name: Lowercase github username for ghcr id: string - uses: ASzc/change-string-case-action@v1 + uses: ASzc/change-string-case-action@v6 with: string: ${{ github.actor }} - name: Build and push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 push: true diff --git a/Dockerfile b/Dockerfile index 4941821b..76bb21b2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,10 +8,10 @@ ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a90 ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f66f785" ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae" -ARG FFMPEG_DATE="autobuild-2024-12-09-14-16" -ARG FFMPEG_VERSION="118034-gd21134313f" -ARG SHA256_FFMPEG_AMD64="cd50122fb0939e913585282347a8f95074c2d5477ceb059cd90aca551f14e9ea" -ARG SHA256_FFMPEG_ARM64="33b4edebf9c23701473ba8db696b26072bb9b9c05fc4a156e115f94e44d361e0" +ARG FFMPEG_DATE="autobuild-2024-12-24-14-15" +ARG FFMPEG_VERSION="N-118163-g954d55c2a4" +ARG SHA256_FFMPEG_AMD64="798a7e5a0724139e6bb70df8921522b23be27028f9f551dfa83c305ec4ffaf3a" +ARG SHA256_FFMPEG_ARM64="c3e6cc0fec42cc7e3804014fbb02c1384a1a31ef13f6f9a36121f2e1216240c0" ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ @@ -49,12 +49,16 @@ RUN decide_arch() { \ decide_url() { \ case "${1}" in \ (ffmpeg) printf -- \ - 'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-N-%s-linux%s-gpl.tar.xz' \ + 'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-%s-linux%s-gpl%s.tar.xz' \ "${FFMPEG_DATE}" \ "${FFMPEG_VERSION}" \ "$(case "${2}" in \ (amd64) printf -- '64' ;; \ (*) printf -- '%s' "${2}" ;; \ + esac)" \ + "$(case "${FFMPEG_VERSION%%-*}" in \ + (n*) printf -- '-%s\n' "${FFMPEG_VERSION#n}" | cut -d '-' -f 1,2 ;; \ + (*) printf -- '' ;; \ esac)" ;; \ (s6) printf -- \ 'https://github.com/just-containers/s6-overlay/releases/download/v%s/s6-overlay-%s.tar.xz' \ @@ -138,6 +142,11 @@ COPY pip.conf /etc/pip.conf # Add Pipfile COPY Pipfile /app/Pipfile +# Do not include compiled byte-code +ENV PIP_NO_COMPILE=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_ROOT_USER_ACTION='ignore' + # Switch workdir to the the app WORKDIR /app @@ -196,11 +205,12 @@ RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ # Run any required app commands - /usr/bin/python3 /app/manage.py compilescss && \ - /usr/bin/python3 /app/manage.py collectstatic --no-input --link && \ + /usr/bin/python3 -B /app/manage.py compilescss && \ + /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ mkdir -v -p /config/media && \ + mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video @@ -219,7 +229,7 @@ COPY config/root / HEALTHCHECK --interval=1m --timeout=10s CMD /app/healthcheck.py http://127.0.0.1:8080/healthcheck # ENVS and ports -ENV PYTHONPATH="/app" +ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/cache/pycache" EXPOSE 4848 # Volumes diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index 45710fe9..ff356edb 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -15,23 +15,23 @@ def filter_media(instance: Media): skip = False # Check if it's published - if filter_published(instance): + if not skip and filter_published(instance): skip = True # Check if older than max_cap_age, skip - if filter_max_cap(instance): + if not skip and filter_max_cap(instance): skip = True # Check if older than source_cutoff - if filter_source_cutoff(instance): + if not skip and filter_source_cutoff(instance): skip = True # Check if we have filter_text and filter text matches - if filter_filter_text(instance): + if not skip and filter_filter_text(instance): skip = True # Check if the video is longer than the max, or shorter than the min - if filter_duration(instance): + if not skip and filter_duration(instance): skip = True # If we aren't already skipping the file, call our custom function that can be overridden @@ -118,10 +118,12 @@ def filter_max_cap(instance: Media): return False if instance.published <= max_cap_age: - log.info( - f"Media: {instance.source} / {instance} is too old for " - f"the download cap date, marking to be skipped" - ) + # log new media instances, not every media instance every time + if not instance.skip: + log.info( + f"Media: {instance.source} / {instance} is too old for " + f"the download cap date, marking to be skipped" + ) return True return False diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 455a38a5..2037492d 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1261,54 +1261,54 @@ class Media(models.Model): media_details = self.format_dict return media_format.format(**media_details) - @property - def thumbname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.jpg' - - @property - def thumbpath(self): - return self.source.directory_path / self.thumbname - - @property - def nfoname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.nfo' - - @property - def nfopath(self): - return self.source.directory_path / self.nfoname - - @property - def jsonname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.info.json' - - @property - def jsonpath(self): - return self.source.directory_path / self.jsonname - @property def directory_path(self): dirname = self.source.directory_path / self.filename - return os.path.dirname(str(dirname)) + return dirname.parent @property def filepath(self): return self.source.directory_path / self.filename + @property + def thumbname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.jpg' + + @property + def thumbpath(self): + return self.directory_path / self.thumbname + + @property + def nfoname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.nfo' + + @property + def nfopath(self): + return self.directory_path / self.nfoname + + @property + def jsonname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.info.json' + + @property + def jsonpath(self): + return self.directory_path / self.jsonname + @property def thumb_file_exists(self): if not self.thumb: @@ -1353,7 +1353,7 @@ class Media(models.Model): nfo.text = '\n ' # title = media metadata title title = nfo.makeelement('title', {}) - title.text = clean_emoji(str(self.name).strip()) + title.text = clean_emoji(self.title) title.tail = '\n ' nfo.append(title) # showtitle = source name @@ -1499,7 +1499,16 @@ class Media(models.Model): if not callable(indexer): raise Exception(f'Media with source type f"{self.source.source_type}" ' f'has no indexer') - return indexer(self.url) + response = indexer(self.url) + no_formats_available = ( + not response or + "formats" not in response.keys() or + 0 == len(response["formats"]) + ) + if no_formats_available: + self.can_download = False + self.skip = True + return response def calculate_episode_number(self): if self.source.source_type == Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 0ca0e897..59794e0d 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -13,7 +13,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta download_media_thumbnail, download_media_metadata, map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images, - save_all_media_for_source) + save_all_media_for_source, get_media_metadata_task) from .utils import delete_file, glob_quote from .filtering import filter_media @@ -103,6 +103,10 @@ def task_task_failed(sender, task_id, completed_task, **kwargs): obj.has_failed = True obj.save() + if isinstance(obj, Media) and completed_task.task_name == "sync.tasks.download_media_metadata": + log.error(f'Permanent failure for media: {obj} task: {completed_task}') + obj.skip = True + obj.save() @receiver(post_save, sender=Media) def media_post_save(sender, instance, created, **kwargs): @@ -134,7 +138,7 @@ def media_post_save(sender, instance, created, **kwargs): instance.save() post_save.connect(media_post_save, sender=Media) # If the media is missing metadata schedule it to be downloaded - if not instance.metadata: + if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk): log.info(f'Scheduling task to download metadata for: {instance.url}') verbose_name = _('Downloading metadata for "{}"') download_media_metadata( diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 37983932..3df651ba 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -49,6 +49,7 @@ def map_task_to_instance(task): 'sync.tasks.check_source_directory_exists': Source, 'sync.tasks.download_media_thumbnail': Media, 'sync.tasks.download_media': Media, + 'sync.tasks.download_media_metadata': Media, 'sync.tasks.save_all_media_for_source': Source, } MODEL_URL_MAP = { @@ -117,6 +118,12 @@ def get_media_download_task(media_id): except IndexError: return False +def get_media_metadata_task(media_id): + try: + return Task.objects.get_task('sync.tasks.download_media_metadata', + args=(str(media_id),))[0] + except IndexError: + return False def delete_task_by_source(task_name, source_id): return Task.objects.filter(task_name=task_name, queue=str(source_id)).delete() @@ -146,12 +153,12 @@ def cleanup_old_media(): def cleanup_removed_media(source, videos): - media_objects = Media.objects.filter(source=source, downloaded=True) - for item in media_objects: - matching_source_item = [video['id'] for video in videos if video['id'] == item.key] + media_objects = Media.objects.filter(source=source) + for media in media_objects: + matching_source_item = [video['id'] for video in videos if video['id'] == media.key] if not matching_source_item: - log.info(f'{item.title} is no longer in source, removing') - item.delete() + log.info(f'{media.name} is no longer in source, removing') + media.delete() @background(schedule=0) @@ -191,7 +198,15 @@ def index_source_task(source_id): media.source = source try: media.save() - log.info(f'Indexed media: {source} / {media}') + log.debug(f'Indexed media: {source} / {media}') + # log the new media instances + new_media_instance = ( + media.created and + source.last_crawl and + media.created >= source.last_crawl + ) + if new_media_instance: + log.info(f'Indexed new media: {source} / {media}') except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') # Tack on a cleanup of old completed tasks @@ -199,7 +214,7 @@ def index_source_task(source_id): # Tack on a cleanup of old media cleanup_old_media() if source.delete_removed_media: - log.info(f'Cleaning up media no longer in source {source}') + log.info(f'Cleaning up media no longer in source: {source}') cleanup_removed_media(source, videos) @@ -236,7 +251,7 @@ def download_source_images(source_id): f'source exists with ID: {source_id}') return avatar, banner = source.get_image_url - log.info(f'Thumbnail URL for source with ID: {source_id} ' + log.info(f'Thumbnail URL for source with ID: {source_id} / {source} ' f'Avatar: {avatar} ' f'Banner: {banner}') if banner != None: @@ -269,7 +284,7 @@ def download_source_images(source_id): with open(file_path, 'wb') as f: f.write(django_file.read()) - log.info(f'Thumbnail downloaded for source with ID: {source_id}') + log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') @background(schedule=0) @@ -285,7 +300,7 @@ def download_media_metadata(media_id): f'media exists with ID: {media_id}') return if media.manual_skip: - log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.') + log.info(f'Task for ID: {media_id} / {media} skipped, due to task being manually skipped.') return source = media.source metadata = media.index_metadata() @@ -306,7 +321,7 @@ def download_media_metadata(media_id): # Don't filter media here, the post_save signal will handle that media.save() log.info(f'Saved {len(media.metadata)} bytes of metadata for: ' - f'{source} / {media_id}') + f'{source} / {media}: {media_id}') @background(schedule=0) @@ -359,7 +374,7 @@ def download_media(media_id): return if media.skip: # Media was toggled to be skipped after the task was scheduled - log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' + log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' f'it is now marked to be skipped, not downloading') return if media.downloaded and media.media_file: @@ -430,7 +445,7 @@ def download_media(media_id): copyfile(media.thumb.path, media.thumbpath) # If selected, write an NFO file if media.source.write_nfo: - log.info(f'Writing media NFO file to: to: {media.nfopath}') + log.info(f'Writing media NFO file to: {media.nfopath}') write_text_file(media.nfopath, media.nfoxml) # Schedule a task to update media servers for mediaserver in MediaServer.objects.all(): @@ -446,7 +461,7 @@ def download_media(media_id): else: # Expected file doesn't exist on disk err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, ' - f'expected outfile does not exist: {media.filepath}') + f'expected outfile does not exist: {filepath}') log.error(err) # Raising an error here triggers the task to be re-attempted (or fail) raise DownloadFailedException(err) diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html index 0c78f9b4..6f751be6 100644 --- a/tubesync/sync/templates/sync/media-item.html +++ b/tubesync/sync/templates/sync/media-item.html @@ -98,12 +98,19 @@ {% if media.downloaded %}