diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a2b1225b..faf25319 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,14 +11,15 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: + fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -34,18 +35,18 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Log into GitHub Container Registry run: echo "${{ secrets.REGISTRY_ACCESS_TOKEN }}" | docker login https://ghcr.io -u ${{ github.actor }} --password-stdin - name: Lowercase github username for ghcr id: string - uses: ASzc/change-string-case-action@v1 + uses: ASzc/change-string-case-action@v6 with: string: ${{ github.actor }} - name: Build and push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 push: true diff --git a/Dockerfile b/Dockerfile index 4941821b..76bb21b2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,10 +8,10 @@ ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a90 ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f66f785" ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae" -ARG FFMPEG_DATE="autobuild-2024-12-09-14-16" -ARG FFMPEG_VERSION="118034-gd21134313f" -ARG SHA256_FFMPEG_AMD64="cd50122fb0939e913585282347a8f95074c2d5477ceb059cd90aca551f14e9ea" -ARG SHA256_FFMPEG_ARM64="33b4edebf9c23701473ba8db696b26072bb9b9c05fc4a156e115f94e44d361e0" +ARG FFMPEG_DATE="autobuild-2024-12-24-14-15" +ARG FFMPEG_VERSION="N-118163-g954d55c2a4" +ARG SHA256_FFMPEG_AMD64="798a7e5a0724139e6bb70df8921522b23be27028f9f551dfa83c305ec4ffaf3a" +ARG SHA256_FFMPEG_ARM64="c3e6cc0fec42cc7e3804014fbb02c1384a1a31ef13f6f9a36121f2e1216240c0" ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ @@ -49,12 +49,16 @@ RUN decide_arch() { \ decide_url() { \ case "${1}" in \ (ffmpeg) printf -- \ - 'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-N-%s-linux%s-gpl.tar.xz' \ + 'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-%s-linux%s-gpl%s.tar.xz' \ "${FFMPEG_DATE}" \ "${FFMPEG_VERSION}" \ "$(case "${2}" in \ (amd64) printf -- '64' ;; \ (*) printf -- '%s' "${2}" ;; \ + esac)" \ + "$(case "${FFMPEG_VERSION%%-*}" in \ + (n*) printf -- '-%s\n' "${FFMPEG_VERSION#n}" | cut -d '-' -f 1,2 ;; \ + (*) printf -- '' ;; \ esac)" ;; \ (s6) printf -- \ 'https://github.com/just-containers/s6-overlay/releases/download/v%s/s6-overlay-%s.tar.xz' \ @@ -138,6 +142,11 @@ COPY pip.conf /etc/pip.conf # Add Pipfile COPY Pipfile /app/Pipfile +# Do not include compiled byte-code +ENV PIP_NO_COMPILE=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_ROOT_USER_ACTION='ignore' + # Switch workdir to the the app WORKDIR /app @@ -196,11 +205,12 @@ RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ # Run any required app commands - /usr/bin/python3 /app/manage.py compilescss && \ - /usr/bin/python3 /app/manage.py collectstatic --no-input --link && \ + /usr/bin/python3 -B /app/manage.py compilescss && \ + /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ mkdir -v -p /config/media && \ + mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video @@ -219,7 +229,7 @@ COPY config/root / HEALTHCHECK --interval=1m --timeout=10s CMD /app/healthcheck.py http://127.0.0.1:8080/healthcheck # ENVS and ports -ENV PYTHONPATH="/app" +ENV PYTHONPATH="/app" PYTHONPYCACHEPREFIX="/config/cache/pycache" EXPOSE 4848 # Volumes diff --git a/tubesync/sync/filtering.py b/tubesync/sync/filtering.py index 45710fe9..ff356edb 100644 --- a/tubesync/sync/filtering.py +++ b/tubesync/sync/filtering.py @@ -15,23 +15,23 @@ def filter_media(instance: Media): skip = False # Check if it's published - if filter_published(instance): + if not skip and filter_published(instance): skip = True # Check if older than max_cap_age, skip - if filter_max_cap(instance): + if not skip and filter_max_cap(instance): skip = True # Check if older than source_cutoff - if filter_source_cutoff(instance): + if not skip and filter_source_cutoff(instance): skip = True # Check if we have filter_text and filter text matches - if filter_filter_text(instance): + if not skip and filter_filter_text(instance): skip = True # Check if the video is longer than the max, or shorter than the min - if filter_duration(instance): + if not skip and filter_duration(instance): skip = True # If we aren't already skipping the file, call our custom function that can be overridden @@ -118,10 +118,12 @@ def filter_max_cap(instance: Media): return False if instance.published <= max_cap_age: - log.info( - f"Media: {instance.source} / {instance} is too old for " - f"the download cap date, marking to be skipped" - ) + # log new media instances, not every media instance every time + if not instance.skip: + log.info( + f"Media: {instance.source} / {instance} is too old for " + f"the download cap date, marking to be skipped" + ) return True return False diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 455a38a5..2037492d 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1261,54 +1261,54 @@ class Media(models.Model): media_details = self.format_dict return media_format.format(**media_details) - @property - def thumbname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.jpg' - - @property - def thumbpath(self): - return self.source.directory_path / self.thumbname - - @property - def nfoname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.nfo' - - @property - def nfopath(self): - return self.source.directory_path / self.nfoname - - @property - def jsonname(self): - if self.downloaded and self.media_file: - filename = os.path.basename(self.media_file.path) - else: - filename = self.filename - prefix, ext = os.path.splitext(filename) - return f'{prefix}.info.json' - - @property - def jsonpath(self): - return self.source.directory_path / self.jsonname - @property def directory_path(self): dirname = self.source.directory_path / self.filename - return os.path.dirname(str(dirname)) + return dirname.parent @property def filepath(self): return self.source.directory_path / self.filename + @property + def thumbname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.jpg' + + @property + def thumbpath(self): + return self.directory_path / self.thumbname + + @property + def nfoname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.nfo' + + @property + def nfopath(self): + return self.directory_path / self.nfoname + + @property + def jsonname(self): + if self.downloaded and self.media_file: + filename = self.media_file.path + else: + filename = self.filename + prefix, ext = os.path.splitext(os.path.basename(filename)) + return f'{prefix}.info.json' + + @property + def jsonpath(self): + return self.directory_path / self.jsonname + @property def thumb_file_exists(self): if not self.thumb: @@ -1353,7 +1353,7 @@ class Media(models.Model): nfo.text = '\n ' # title = media metadata title title = nfo.makeelement('title', {}) - title.text = clean_emoji(str(self.name).strip()) + title.text = clean_emoji(self.title) title.tail = '\n ' nfo.append(title) # showtitle = source name @@ -1499,7 +1499,16 @@ class Media(models.Model): if not callable(indexer): raise Exception(f'Media with source type f"{self.source.source_type}" ' f'has no indexer') - return indexer(self.url) + response = indexer(self.url) + no_formats_available = ( + not response or + "formats" not in response.keys() or + 0 == len(response["formats"]) + ) + if no_formats_available: + self.can_download = False + self.skip = True + return response def calculate_episode_number(self): if self.source.source_type == Source.SOURCE_TYPE_YOUTUBE_PLAYLIST: diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 0ca0e897..59794e0d 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -13,7 +13,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta download_media_thumbnail, download_media_metadata, map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images, - save_all_media_for_source) + save_all_media_for_source, get_media_metadata_task) from .utils import delete_file, glob_quote from .filtering import filter_media @@ -103,6 +103,10 @@ def task_task_failed(sender, task_id, completed_task, **kwargs): obj.has_failed = True obj.save() + if isinstance(obj, Media) and completed_task.task_name == "sync.tasks.download_media_metadata": + log.error(f'Permanent failure for media: {obj} task: {completed_task}') + obj.skip = True + obj.save() @receiver(post_save, sender=Media) def media_post_save(sender, instance, created, **kwargs): @@ -134,7 +138,7 @@ def media_post_save(sender, instance, created, **kwargs): instance.save() post_save.connect(media_post_save, sender=Media) # If the media is missing metadata schedule it to be downloaded - if not instance.metadata: + if not instance.metadata and not instance.skip and not get_media_metadata_task(instance.pk): log.info(f'Scheduling task to download metadata for: {instance.url}') verbose_name = _('Downloading metadata for "{}"') download_media_metadata( diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 37983932..3df651ba 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -49,6 +49,7 @@ def map_task_to_instance(task): 'sync.tasks.check_source_directory_exists': Source, 'sync.tasks.download_media_thumbnail': Media, 'sync.tasks.download_media': Media, + 'sync.tasks.download_media_metadata': Media, 'sync.tasks.save_all_media_for_source': Source, } MODEL_URL_MAP = { @@ -117,6 +118,12 @@ def get_media_download_task(media_id): except IndexError: return False +def get_media_metadata_task(media_id): + try: + return Task.objects.get_task('sync.tasks.download_media_metadata', + args=(str(media_id),))[0] + except IndexError: + return False def delete_task_by_source(task_name, source_id): return Task.objects.filter(task_name=task_name, queue=str(source_id)).delete() @@ -146,12 +153,12 @@ def cleanup_old_media(): def cleanup_removed_media(source, videos): - media_objects = Media.objects.filter(source=source, downloaded=True) - for item in media_objects: - matching_source_item = [video['id'] for video in videos if video['id'] == item.key] + media_objects = Media.objects.filter(source=source) + for media in media_objects: + matching_source_item = [video['id'] for video in videos if video['id'] == media.key] if not matching_source_item: - log.info(f'{item.title} is no longer in source, removing') - item.delete() + log.info(f'{media.name} is no longer in source, removing') + media.delete() @background(schedule=0) @@ -191,7 +198,15 @@ def index_source_task(source_id): media.source = source try: media.save() - log.info(f'Indexed media: {source} / {media}') + log.debug(f'Indexed media: {source} / {media}') + # log the new media instances + new_media_instance = ( + media.created and + source.last_crawl and + media.created >= source.last_crawl + ) + if new_media_instance: + log.info(f'Indexed new media: {source} / {media}') except IntegrityError as e: log.error(f'Index media failed: {source} / {media} with "{e}"') # Tack on a cleanup of old completed tasks @@ -199,7 +214,7 @@ def index_source_task(source_id): # Tack on a cleanup of old media cleanup_old_media() if source.delete_removed_media: - log.info(f'Cleaning up media no longer in source {source}') + log.info(f'Cleaning up media no longer in source: {source}') cleanup_removed_media(source, videos) @@ -236,7 +251,7 @@ def download_source_images(source_id): f'source exists with ID: {source_id}') return avatar, banner = source.get_image_url - log.info(f'Thumbnail URL for source with ID: {source_id} ' + log.info(f'Thumbnail URL for source with ID: {source_id} / {source} ' f'Avatar: {avatar} ' f'Banner: {banner}') if banner != None: @@ -269,7 +284,7 @@ def download_source_images(source_id): with open(file_path, 'wb') as f: f.write(django_file.read()) - log.info(f'Thumbnail downloaded for source with ID: {source_id}') + log.info(f'Thumbnail downloaded for source with ID: {source_id} / {source}') @background(schedule=0) @@ -285,7 +300,7 @@ def download_media_metadata(media_id): f'media exists with ID: {media_id}') return if media.manual_skip: - log.info(f'Task for ID: {media_id} skipped, due to task being manually skipped.') + log.info(f'Task for ID: {media_id} / {media} skipped, due to task being manually skipped.') return source = media.source metadata = media.index_metadata() @@ -306,7 +321,7 @@ def download_media_metadata(media_id): # Don't filter media here, the post_save signal will handle that media.save() log.info(f'Saved {len(media.metadata)} bytes of metadata for: ' - f'{source} / {media_id}') + f'{source} / {media}: {media_id}') @background(schedule=0) @@ -359,7 +374,7 @@ def download_media(media_id): return if media.skip: # Media was toggled to be skipped after the task was scheduled - log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' + log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' f'it is now marked to be skipped, not downloading') return if media.downloaded and media.media_file: @@ -430,7 +445,7 @@ def download_media(media_id): copyfile(media.thumb.path, media.thumbpath) # If selected, write an NFO file if media.source.write_nfo: - log.info(f'Writing media NFO file to: to: {media.nfopath}') + log.info(f'Writing media NFO file to: {media.nfopath}') write_text_file(media.nfopath, media.nfoxml) # Schedule a task to update media servers for mediaserver in MediaServer.objects.all(): @@ -446,7 +461,7 @@ def download_media(media_id): else: # Expected file doesn't exist on disk err = (f'Failed to download media: {media} (UUID: {media.pk}) to disk, ' - f'expected outfile does not exist: {media.filepath}') + f'expected outfile does not exist: {filepath}') log.error(err) # Raising an error here triggers the task to be re-attempted (or fail) raise DownloadFailedException(err) diff --git a/tubesync/sync/templates/sync/media-item.html b/tubesync/sync/templates/sync/media-item.html index 0c78f9b4..6f751be6 100644 --- a/tubesync/sync/templates/sync/media-item.html +++ b/tubesync/sync/templates/sync/media-item.html @@ -98,12 +98,19 @@ {% if media.downloaded %} Filename - Filename
{{ media.filename }} + Filename
{{ filename_path.name }} - + Directory Directory
{{ media.directory_path }} + + Database Filepath + DB Filepath
{{ media_file_path }} + {% if media_file_path == media.filepath %} +  (matched) + {% endif %} + File size File size
{{ media.downloaded_filesize|filesizeformat }} diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 6aa0ccb6..8f0de6ef 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -597,11 +597,11 @@ class FilepathTestCase(TestCase): # Check child directories work self.source.media_format = '{yyyy}/{key}.{ext}' self.assertEqual(self.media.directory_path, - str(self.source.directory_path / '2017')) + self.source.directory_path / '2017') self.assertEqual(self.media.filename, '2017/mediakey.mkv') self.source.media_format = '{yyyy}/{yyyy_mm_dd}/{key}.{ext}' self.assertEqual(self.media.directory_path, - str(self.source.directory_path / '2017/2017-09-11')) + self.source.directory_path / '2017/2017-09-11') self.assertEqual(self.media.filename, '2017/2017-09-11/mediakey.mkv') # Check media specific media format keys work test_media = Media.objects.create( diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 94e91432..52090042 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -582,6 +582,8 @@ class MediaItemView(DetailView): data['video_exact'] = video_exact data['video_format'] = video_format data['youtube_dl_format'] = self.object.get_format_str() + data['filename_path'] = pathlib.Path(self.object.filename) + data['media_file_path'] = pathlib.Path(self.object.media_file.path) if self.object.media_file else None return data diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index cb612c69..1eac4c7f 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -73,6 +73,26 @@ def get_channel_image_info(url): raise YouTubeError(f'Failed to extract channel info for "{url}": {e}') from e +def _subscriber_only(msg='', response=None): + if response is None: + # process msg only + msg = str(msg) + if 'access to members-only content' in msg: + return True + if ': Join this channel' in msg: + return True + else: + # ignore msg entirely + if not isinstance(response, dict): + raise TypeError(f'response must be a dict, got "{type(response)}" instead') + + if 'availability' not in response.keys(): + return False + + # check for the specific expected value + return 'subscriber_only' == response.get('availability') + return False + def get_media_info(url): ''' @@ -82,6 +102,8 @@ def get_media_info(url): ''' opts = get_yt_opts() opts.update({ + 'ignoreerrors': False, # explicitly set this to catch exceptions + 'ignore_no_formats_error': False, # we must fail first to try again with this enabled 'skip_download': True, 'simulate': True, 'logger': log, @@ -92,7 +114,19 @@ def get_media_info(url): try: response = y.extract_info(url, download=False) except yt_dlp.utils.DownloadError as e: - raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e + if not _subscriber_only(msg=e.msg): + raise YouTubeError(f'Failed to extract_info for "{url}": {e}') from e + # adjust options and try again + opts.update({'ignore_no_formats_error': True,}) + with yt_dlp.YoutubeDL(opts) as yy: + try: + response = yy.extract_info(url, download=False) + except yt_dlp.utils.DownloadError as ee: + raise YouTubeError(f'Failed (again) to extract_info for "{url}": {ee}') from ee + # validate the response is what we expected + if not _subscriber_only(response=response): + response = {} + if not response: raise YouTubeError(f'Failed to extract_info for "{url}": No metadata was ' f'returned by youtube-dl, check for error messages in the ' @@ -151,6 +185,7 @@ def download_media(url, media_format, extension, output_file, info_json, 'outtmpl': os.path.basename(output_file), 'quiet': False if settings.DEBUG else True, 'verbose': True if settings.DEBUG else False, + 'noprogress': None if settings.DEBUG else True, 'progress_hooks': [hook], 'writeinfojson': info_json, 'postprocessors': [], diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index a0426a4c..e75778b8 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -60,7 +60,7 @@ if BACKGROUND_TASK_ASYNC_THREADS > MAX_BACKGROUND_TASK_ASYNC_THREADS: MEDIA_ROOT = CONFIG_BASE_DIR / 'media' DOWNLOAD_ROOT = DOWNLOADS_BASE_DIR -YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache' +YOUTUBE_DL_CACHEDIR = CONFIG_BASE_DIR / 'cache/youtube' YOUTUBE_DL_TEMPDIR = DOWNLOAD_ROOT / 'cache' COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt' @@ -88,3 +88,13 @@ SOURCE_DOWNLOAD_DIRECTORY_PREFIX = True if SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR VIDEO_HEIGHT_CUTOFF = int(os.getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", "240")) + + +# ensure that the current directory exists +if not YOUTUBE_DL_CACHEDIR.is_dir(): + YOUTUBE_DL_CACHEDIR.mkdir(parents=True) +# rename any old yt_dlp cache directories to the current directory +old_youtube_cache_dirs = list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube-*')) +for cache_dir in old_youtube_cache_dirs: + cache_dir.rename(YOUTUBE_DL_CACHEDIR / cache_dir.name) +