diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c1dd9205..6068cab1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,7 +28,9 @@ jobs: pip install pipenv pipenv install --system --skip-lock - name: Set up Django environment - run: cp tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py + run: | + cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py + cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* - name: Run Django tests run: cd tubesync && python3 manage.py test --verbosity=2 containerise: diff --git a/.gitignore b/.gitignore index 17e61eba..c5cd63bc 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ __pycache__/ # C extensions *.so +# vim swap files +.*.swp + # Distribution / packaging .Python build/ diff --git a/Dockerfile b/Dockerfile index 43aebe28..99e2b102 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,36 @@ ARG FFMPEG_CHECKSUM_ALGORITHM="sha256" ARG S6_CHECKSUM_ALGORITHM="sha256" +FROM debian:${DEBIAN_VERSION} AS tubesync-base + +ARG TARGETARCH + +ENV DEBIAN_FRONTEND="noninteractive" \ + HOME="/root" \ + LANGUAGE="en_US.UTF-8" \ + LANG="en_US.UTF-8" \ + LC_ALL="en_US.UTF-8" \ + TERM="xterm" \ + # Do not include compiled byte-code + PIP_NO_COMPILE=1 \ + PIP_ROOT_USER_ACTION='ignore' + +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ + # to be careful, ensure that these files aren't from a different architecture + rm -f /var/cache/apt/*cache.bin ; \ + # Update from the network and keep cache + rm -f /etc/apt/apt.conf.d/docker-clean ; \ + set -x && \ + apt-get update && \ + # Install locales + apt-get -y --no-install-recommends install locales && \ + printf -- "en_US.UTF-8 UTF-8\n" > /etc/locale.gen && \ + locale-gen en_US.UTF-8 && \ + # Clean up + apt-get -y autopurge && \ + apt-get -y autoclean + FROM alpine:${ALPINE_VERSION} AS ffmpeg-download ARG FFMPEG_DATE ARG FFMPEG_VERSION @@ -218,52 +248,24 @@ RUN set -eu ; \ FROM scratch AS s6-overlay COPY --from=s6-overlay-extracted /s6-overlay-rootfs / -FROM debian:${DEBIAN_VERSION} AS tubesync +FROM tubesync-base AS tubesync ARG S6_VERSION ARG FFMPEG_DATE ARG FFMPEG_VERSION -ENV DEBIAN_FRONTEND="noninteractive" \ - HOME="/root" \ - LANGUAGE="en_US.UTF-8" \ - LANG="en_US.UTF-8" \ - LC_ALL="en_US.UTF-8" \ - TERM="xterm" \ - # Do not include compiled byte-code - PIP_NO_COMPILE=1 \ - PIP_ROOT_USER_ACTION='ignore' \ - S6_CMD_WAIT_FOR_SERVICES_MAXTIME="0" +ARG TARGETARCH ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ FFMPEG_VERSION="${FFMPEG_VERSION}" -# Install third party software -COPY --from=s6-overlay / / -COPY --from=ffmpeg /usr/local/bin/ /usr/local/bin/ - # Reminder: the SHELL handles all variables -RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=locked,target=/var/cache/apt \ +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ set -x && \ - # Update from the network and keep cache - rm -f /etc/apt/apt.conf.d/docker-clean && \ apt-get update && \ - # Install locales - apt-get -y --no-install-recommends install locales && \ - printf -- "en_US.UTF-8 UTF-8\n" > /etc/locale.gen && \ - locale-gen en_US.UTF-8 && \ - # Install file - apt-get -y --no-install-recommends install file && \ - # Installed s6 (using COPY earlier) - file -L /command/s6-overlay-suexec && \ - # Installed ffmpeg (using COPY earlier) - /usr/local/bin/ffmpeg -version && \ - file /usr/local/bin/ff* && \ - # Clean up file - apt-get -y autoremove --purge file && \ # Install dependencies we keep # Install required distro packages apt-get -y --no-install-recommends install \ @@ -281,13 +283,35 @@ RUN --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ curl \ less \ && \ + # Link to the current python3 version + ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ + # Create a 'app' user which the application will run as + groupadd app && \ + useradd -M -d /app -s /bin/false -g app app && \ # Clean up apt-get -y autopurge && \ - apt-get -y autoclean && \ - rm -rf /tmp/* + apt-get -y autoclean -# Copy over pip.conf to use piwheels -COPY pip.conf /etc/pip.conf +# Install third party software +COPY --from=s6-overlay / / +COPY --from=ffmpeg /usr/local/bin/ /usr/local/bin/ + +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ + set -x && \ + apt-get update && \ + # Install file + apt-get -y --no-install-recommends install file && \ + # Installed s6 (using COPY earlier) + file -L /command/s6-overlay-suexec && \ + # Installed ffmpeg (using COPY earlier) + /usr/local/bin/ffmpeg -version && \ + file /usr/local/bin/ff* && \ + # Clean up file + apt-get -y autoremove --purge file && \ + # Clean up + apt-get -y autopurge && \ + apt-get -y autoclean # Switch workdir to the the app WORKDIR /app @@ -295,12 +319,10 @@ WORKDIR /app # Set up the app RUN --mount=type=tmpfs,target=/cache \ --mount=type=cache,id=pipenv-cache,sharing=locked,target=/cache/pipenv \ - --mount=type=cache,id=apt-lib-cache,sharing=locked,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=locked,target=/var/cache/apt \ + --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ --mount=type=bind,source=Pipfile,target=/app/Pipfile \ set -x && \ - # Update from the network and keep cache - rm -f /etc/apt/apt.conf.d/docker-clean && \ apt-get update && \ # Install required build packages apt-get -y --no-install-recommends install \ @@ -316,9 +338,6 @@ RUN --mount=type=tmpfs,target=/cache \ python3-pip \ zlib1g-dev \ && \ - # Create a 'app' user which the application will run as - groupadd app && \ - useradd -M -d /app -s /bin/false -g app app && \ # Install non-distro packages cp -at /tmp/ "${HOME}" && \ HOME="/tmp/${HOME#/}" \ @@ -348,12 +367,18 @@ RUN --mount=type=tmpfs,target=/cache \ COPY tubesync /app COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings.py +# patch background_task +COPY patches/background_task/ \ + /usr/local/lib/python3/dist-packages/background_task/ + +# patch yt_dlp +COPY patches/yt_dlp/ \ + /usr/local/lib/python3/dist-packages/yt_dlp/ + # Build app RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database rm -rf /app/db.sqlite3 && \ - # Check nginx configuration - nginx -t && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ @@ -363,8 +388,6 @@ RUN set -x && \ mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ - # Link to the current python3 version - ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ # Append software versions ffmpeg_version=$(/usr/local/bin/ffmpeg -version | awk -v 'ev=31' '1 == NR && "ffmpeg" == $1 { print $3; ev=0; } END { exit ev; }') && \ test -n "${ffmpeg_version}" && \ @@ -373,13 +396,8 @@ RUN set -x && \ # Copy root COPY config/root / -# patch background_task -COPY patches/background_task/ \ - /usr/local/lib/python3/dist-packages/background_task/ - -# patch yt_dlp -COPY patches/yt_dlp/ \ - /usr/local/lib/python3/dist-packages/yt_dlp/ +# Check nginx configuration copied from config/root/etc +RUN set -x && nginx -t # Create a healthcheck HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck.py", "http://127.0.0.1:8080/healthcheck"] @@ -387,6 +405,7 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=3m CMD ["/app/healthcheck # ENVS and ports ENV PYTHONPATH="/app" \ PYTHONPYCACHEPREFIX="/config/cache/pycache" \ + S6_CMD_WAIT_FOR_SERVICES_MAXTIME="0" \ XDG_CACHE_HOME="/config/cache" EXPOSE 4848 diff --git a/Pipfile b/Pipfile index 3c29cafb..bf53b4bf 100644 --- a/Pipfile +++ b/Pipfile @@ -15,12 +15,12 @@ gunicorn = "*" httptools = "*" django-background-tasks = ">=1.2.8" django-basicauth = "*" -psycopg2-binary = "*" +psycopg = {extras = ["binary", "pool"], version = "*"} mysqlclient = "*" PySocks = "*" urllib3 = {extras = ["socks"], version = "*"} requests = {extras = ["socks"], version = "*"} -yt-dlp = "*" +yt-dlp = {extras = ["default", "curl-cffi"], version = "*"} emoji = "*" brotli = "*" html5lib = "*" diff --git a/README.md b/README.md index af3cd910..17367a4a 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ currently just Plex, to complete the PVR experience. TubeSync is designed to be run in a container, such as via Docker or Podman. It also works in a Docker Compose stack. `amd64` (most desktop PCs and servers) and `arm64` -(modern ARM computers, such as the Rasperry Pi 3 or later) are supported. +(modern ARM computers, such as the Raspberry Pi 3 or later) are supported. Example (with Docker on *nix): @@ -356,7 +356,7 @@ etc.). Configuration of this is beyond the scope of this README. Only two are supported, for the moment: - `amd64` (most desktop PCs and servers) - `arm64` -(modern ARM computers, such as the Rasperry Pi 3 or later) +(modern ARM computers, such as the Raspberry Pi 3 or later) Others may be made available, if there is demand. diff --git a/patches/yt_dlp/patch/__init__.py b/patches/yt_dlp/patch/__init__.py new file mode 100644 index 00000000..f2d40a97 --- /dev/null +++ b/patches/yt_dlp/patch/__init__.py @@ -0,0 +1,5 @@ +from yt_dlp.compat.compat_utils import passthrough_module + +passthrough_module(__name__, '.patch') +del passthrough_module + diff --git a/patches/yt_dlp/patch/check_thumbnails.py b/patches/yt_dlp/patch/check_thumbnails.py new file mode 100644 index 00000000..25723bb6 --- /dev/null +++ b/patches/yt_dlp/patch/check_thumbnails.py @@ -0,0 +1,43 @@ +from yt_dlp import YoutubeDL +from yt_dlp.utils import sanitize_url, LazyList + +class PatchedYoutubeDL(YoutubeDL): + + def _sanitize_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] + if not thumbnails: + return + + + def check_thumbnails(thumbnails): + for t in thumbnails: + self.to_screen(f'[info] Testing thumbnail {t["id"]}: {t["url"]!r}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + continue + yield t + + + self._sort_thumbnails(thumbnails) + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = str(i) + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) + + + if self.params.get('check_thumbnails') is True: + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) + else: + info_dict['thumbnails'] = thumbnails + + +YoutubeDL.__unpatched___sanitize_thumbnails = YoutubeDL._sanitize_thumbnails +YoutubeDL._sanitize_thumbnails = PatchedYoutubeDL._sanitize_thumbnails diff --git a/patches/yt_dlp/patch/fatal_http_errors.py b/patches/yt_dlp/patch/fatal_http_errors.py new file mode 100644 index 00000000..442db436 --- /dev/null +++ b/patches/yt_dlp/patch/fatal_http_errors.py @@ -0,0 +1,25 @@ +from yt_dlp.extractor.youtube import YoutubeIE + + +class PatchedYoutubeIE(YoutubeIE): + + def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): + webpage = None + if 'webpage' not in self._configuration_arg('player_skip'): + query = {'bpctr': '9999999999', 'has_verified': '1'} + pp = self._configuration_arg('player_params', [None], casesense=True)[0] + if pp: + query['pp'] = pp + webpage = self._download_webpage_with_retries(webpage_url, video_id, retry_fatal=True, query=query) + + master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() + + player_responses, player_url = self._extract_player_responses( + self._get_requested_clients(url, smuggled_data), + video_id, webpage, master_ytcfg, smuggled_data) + + return webpage, master_ytcfg, player_responses, player_url + + +YoutubeIE.__unpatched___download_player_responses = YoutubeIE._download_player_responses +YoutubeIE._download_player_responses = PatchedYoutubeIE._download_player_responses diff --git a/pip.conf b/pip.conf deleted file mode 100644 index e92bae15..00000000 --- a/pip.conf +++ /dev/null @@ -1,2 +0,0 @@ -[global] -extra-index-url=https://www.piwheels.org/simple diff --git a/tubesync/common/errors.py b/tubesync/common/errors.py index 130510a7..87d8aa4d 100644 --- a/tubesync/common/errors.py +++ b/tubesync/common/errors.py @@ -14,6 +14,14 @@ class NoFormatException(Exception): pass +class NoMetadataException(Exception): + ''' + Raised when a media item is attempted to be downloaded but it has no valid + metadata. + ''' + pass + + class DownloadFailedException(Exception): ''' Raised when a downloaded media file is expected to be present, but doesn't diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 95efd9f3..f99cdf23 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -1,11 +1,51 @@ +import cProfile +import emoji +import io +import os +import pstats import string +import time from datetime import datetime from urllib.parse import urlunsplit, urlencode, urlparse -import emoji from yt_dlp.utils import LazyList from .errors import DatabaseConnectionError +def getenv(key, default=None, /, *, integer=False, string=True): + ''' + Guarantees a returned type from calling `os.getenv` + The caller can request the integer type, + or use the default string type. + ''' + + args = dict(key=key, default=default, integer=integer, string=string) + supported_types = dict(zip(args.keys(), ( + (str,), # key + ( + bool, + float, + int, + str, + None.__class__, + ), # default + (bool,) * (len(args.keys()) - 2), + ))) + unsupported_type_msg = 'Unsupported type for positional argument, "{}": {}' + for k, t in supported_types.items(): + v = args[k] + assert isinstance(v, t), unsupported_type_msg.format(k, type(v)) + + d = str(default) if default is not None else None + + r = os.getenv(key, d) + if r is None: + if string: r = str() + if integer: r = int() + elif integer: + r = int(float(r)) + return r + + def parse_database_connection_string(database_connection_string): ''' Parses a connection string in a URL style format, such as: @@ -136,3 +176,28 @@ def json_serial(obj): if isinstance(obj, LazyList): return list(obj) raise TypeError(f'Type {type(obj)} is not json_serial()-able') + + +def time_func(func): + def wrapper(*args, **kwargs): + start = time.perf_counter() + result = func(*args, **kwargs) + end = time.perf_counter() + return (result, (end - start, start, end,),) + return wrapper + + +def profile_func(func): + def wrapper(*args, **kwargs): + s = io.StringIO() + with cProfile.Profile() as pr: + pr.enable() + result = func(*args, **kwargs) + pr.disable() + ps = pstats.Stats(pr, stream=s) + ps.sort_stats( + pstats.SortKey.CUMULATIVE + ).print_stats() + return (result, (s.getvalue(), ps, s),) + return wrapper + diff --git a/tubesync/sync/management/commands/delete-source.py b/tubesync/sync/management/commands/delete-source.py index 104ec887..206aee7f 100644 --- a/tubesync/sync/management/commands/delete-source.py +++ b/tubesync/sync/management/commands/delete-source.py @@ -29,11 +29,12 @@ class Command(BaseCommand): except Source.DoesNotExist: raise CommandError(f'Source does not exist with ' f'UUID: {source_uuid}') - # Detach post-delete signal for Media so we don't spam media servers - signals.post_delete.disconnect(media_post_delete, sender=Media) + # Reconfigure the source to not update the disk or media servers + source.deactivate() # Delete the source, triggering pre-delete signals for each media item log.info(f'Found source with UUID "{source.uuid}" with name ' f'"{source.name}" and deleting it, this may take some time!') + log.info(f'Source directory: {source.directory_path}') source.delete() # Update any media servers for mediaserver in MediaServer.objects.all(): @@ -42,10 +43,9 @@ class Command(BaseCommand): rescan_media_server( str(mediaserver.pk), priority=0, + schedule=30, verbose_name=verbose_name.format(mediaserver), remove_existing_tasks=True ) - # Re-attach signals - signals.post_delete.connect(media_post_delete, sender=Media) # All done log.info('Done') diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 2e802599..5d36191e 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -333,6 +333,27 @@ class Source(models.Model): replaced = self.name.replace('_', '-').replace('&', 'and').replace('+', 'and') return slugify(replaced)[:80] + def deactivate(self): + self.download_media = False + self.index_streams = False + self.index_videos = False + self.index_schedule = IndexSchedule.NEVER + self.save(update_fields={ + 'download_media', + 'index_streams', + 'index_videos', + 'index_schedule', + }) + + @property + def is_active(self): + active = ( + self.download_media or + self.index_streams or + self.index_videos + ) + return self.index_schedule and active + @property def is_audio(self): return self.source_resolution == SourceResolution.AUDIO.value @@ -1255,7 +1276,8 @@ class Media(models.Model): # Create a suitable filename from the source media_format media_format = str(self.source.media_format) media_details = self.format_dict - return media_format.format(**media_details) + result = media_format.format(**media_details) + return '.' + result if '/' == result[0] else result @property def directory_path(self): @@ -1507,17 +1529,35 @@ class Media(models.Model): def calculate_episode_number(self): if self.source.is_playlist: - sorted_media = Media.objects.filter(source=self.source) + sorted_media = Media.objects.filter( + source=self.source, + metadata__isnull=False, + ).order_by( + 'published', + 'created', + 'key', + ) else: - self_year = self.upload_date.year if self.upload_date else self.created.year - filtered_media = Media.objects.filter(source=self.source, published__year=self_year) - filtered_media = [m for m in filtered_media if m.upload_date is not None] - sorted_media = sorted(filtered_media, key=lambda x: (x.upload_date, x.key)) - position_counter = 1 - for media in sorted_media: + self_year = self.created.year # unlikely to be accurate + if self.published: + self_year = self.published.year + elif self.has_metadata and self.upload_date: + self_year = self.upload_date.year + elif self.download_date: + # also, unlikely to be accurate + self_year = self.download_date.year + sorted_media = Media.objects.filter( + source=self.source, + metadata__isnull=False, + published__year=self_year, + ).order_by( + 'published', + 'created', + 'key', + ) + for counter, media in enumerate(sorted_media, start=1): if media == self: - return position_counter - position_counter += 1 + return counter def get_episode_str(self, use_padding=False): episode_number = self.calculate_episode_number() diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 404974c7..77e5686e 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -1,4 +1,5 @@ from pathlib import Path +from shutil import rmtree from tempfile import TemporaryDirectory from django.conf import settings from django.db.models.signals import pre_save, post_save, pre_delete, post_delete @@ -12,8 +13,8 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta download_media_thumbnail, download_media_metadata, map_task_to_instance, check_source_directory_exists, download_media, rescan_media_server, download_source_images, - save_all_media_for_source, rename_media, - get_media_metadata_task, get_media_download_task) + delete_all_media_for_source, save_all_media_for_source, + rename_media, get_media_metadata_task, get_media_download_task) from .utils import delete_file, glob_quote, mkdir_p from .filtering import filter_media from .choices import Val, YouTube_SourceType @@ -43,6 +44,8 @@ def source_pre_save(sender, instance, **kwargs): work_directory = existing_dirpath for _count in range(parents_count, 0, -1): work_directory = work_directory.parent + if not Path(work_directory).resolve(strict=True).is_relative_to(Path(settings.DOWNLOAD_ROOT)): + work_directory = Path(settings.DOWNLOAD_ROOT) with TemporaryDirectory(suffix=('.'+new_dirpath.name), prefix='.tmp.', dir=work_directory) as tmp_dir: tmp_dirpath = Path(tmp_dir) existed = None @@ -129,7 +132,7 @@ def source_post_save(sender, instance, created, **kwargs): verbose_name = _('Checking all media for source "{}"') save_all_media_for_source( str(instance.pk), - priority=9, + priority=25, verbose_name=verbose_name.format(instance.name), remove_existing_tasks=True ) @@ -139,16 +142,45 @@ def source_post_save(sender, instance, created, **kwargs): def source_pre_delete(sender, instance, **kwargs): # Triggered before a source is deleted, delete all media objects to trigger # the Media models post_delete signal - for media in Media.objects.filter(source=instance): - log.info(f'Deleting media for source: {instance.name} item: {media.name}') - media.delete() + log.info(f'Deactivating source: {instance.name}') + instance.deactivate() + log.info(f'Deleting tasks for source: {instance.name}') + delete_task_by_source('sync.tasks.index_source_task', instance.pk) + delete_task_by_source('sync.tasks.check_source_directory_exists', instance.pk) + delete_task_by_source('sync.tasks.rename_all_media_for_source', instance.pk) + delete_task_by_source('sync.tasks.save_all_media_for_source', instance.pk) + # Schedule deletion of media + delete_task_by_source('sync.tasks.delete_all_media_for_source', instance.pk) + verbose_name = _('Deleting all media for source "{}"') + delete_all_media_for_source( + str(instance.pk), + str(instance.name), + priority=1, + verbose_name=verbose_name.format(instance.name), + ) + # Try to do it all immediately + # If this is killed, the scheduled task should do the work instead. + delete_all_media_for_source.now( + str(instance.pk), + str(instance.name), + ) @receiver(post_delete, sender=Source) def source_post_delete(sender, instance, **kwargs): # Triggered after a source is deleted - log.info(f'Deleting tasks for source: {instance.name}') + source = instance + log.info(f'Deleting tasks for removed source: {source.name}') delete_task_by_source('sync.tasks.index_source_task', instance.pk) + delete_task_by_source('sync.tasks.check_source_directory_exists', instance.pk) + delete_task_by_source('sync.tasks.delete_all_media_for_source', instance.pk) + delete_task_by_source('sync.tasks.rename_all_media_for_source', instance.pk) + delete_task_by_source('sync.tasks.save_all_media_for_source', instance.pk) + # Remove the directory, if the user requested that + directory_path = Path(source.directory_path) + if (directory_path / '.to_be_removed').is_file(): + log.info(f'Deleting directory for: {source.name}: {directory_path}') + rmtree(directory_path, True) @receiver(task_failed, sender=Task) @@ -211,7 +243,7 @@ def media_post_save(sender, instance, created, **kwargs): rename_media( str(media.pk), queue=str(media.pk), - priority=16, + priority=20, verbose_name=verbose_name.format(media.key, media.name), remove_existing_tasks=True ) @@ -222,7 +254,7 @@ def media_post_save(sender, instance, created, **kwargs): verbose_name = _('Downloading metadata for "{}"') download_media_metadata( str(instance.pk), - priority=10, + priority=20, verbose_name=verbose_name.format(instance.pk), remove_existing_tasks=True ) @@ -342,6 +374,8 @@ def media_post_delete(sender, instance, **kwargs): log.info(f'Deleting file for: {instance} path: {file}') delete_file(file) + if not instance.source.is_active: + return # Schedule a task to update media servers for mediaserver in MediaServer.objects.all(): log.info(f'Scheduling media server updates') diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index cf0d99d4..c510b8fd 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -23,7 +23,7 @@ from django.utils.translation import gettext_lazy as _ from background_task import background from background_task.models import Task, CompletedTask from common.logger import log -from common.errors import NoMediaException, DownloadFailedException +from common.errors import NoMediaException, NoMetadataException, DownloadFailedException from common.utils import json_serial from .models import Source, Media, MediaServer from .utils import (get_remote_image, resize_image_to_height, delete_file, @@ -55,6 +55,7 @@ def map_task_to_instance(task): 'sync.tasks.rename_media': Media, 'sync.tasks.rename_all_media_for_source': Source, 'sync.tasks.wait_for_media_premiere': Media, + 'sync.tasks.delete_all_media_for_source': Source, } MODEL_URL_MAP = { Source: 'sync:source', @@ -234,6 +235,9 @@ def index_source_task(source_id): task.save(update_fields={'verbose_name'}) try: media.save() + except IntegrityError as e: + log.error(f'Index media failed: {source} / {media} with "{e}"') + else: log.debug(f'Indexed media: {source} / {media}') # log the new media instances new_media_instance = ( @@ -243,19 +247,27 @@ def index_source_task(source_id): ) if new_media_instance: log.info(f'Indexed new media: {source} / {media}') - except IntegrityError as e: - log.error(f'Index media failed: {source} / {media} with "{e}"') + log.info(f'Scheduling task to download metadata for: {media.url}') + verbose_name = _('Downloading metadata for "{}"') + download_media_metadata( + str(media.pk), + priority=20, + verbose_name=verbose_name.format(media.pk), + ) + if task: task.verbose_name = verbose_name with atomic(): task.save(update_fields={'verbose_name'}) + # Tack on a cleanup of old completed tasks cleanup_completed_tasks() - # Tack on a cleanup of old media - cleanup_old_media() - if source.delete_removed_media: - log.info(f'Cleaning up media no longer in source: {source}') - cleanup_removed_media(source, videos) + with atomic(durable=True): + # Tack on a cleanup of old media + cleanup_old_media() + if source.delete_removed_media: + log.info(f'Cleaning up media no longer in source: {source}') + cleanup_removed_media(source, videos) @background(schedule=0) @@ -425,6 +437,8 @@ def download_media_thumbnail(media_id, url): except Media.DoesNotExist: # Task triggered but the media no longer exists, do nothing return + if not media.has_metadata: + raise NoMetadataException('Metadata is not yet available.') if media.skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' @@ -462,6 +476,8 @@ def download_media(media_id): except Media.DoesNotExist: # Task triggered but the media no longer exists, do nothing return + if not media.has_metadata: + raise NoMetadataException('Metadata is not yet available.') if media.skip: # Media was toggled to be skipped after the task was scheduled log.warn(f'Download task triggered for media: {media} (UUID: {media.pk}) but ' @@ -715,3 +731,23 @@ def wait_for_media_premiere(media_id): media.title = _(f'Premieres in {hours(media.published - now)} hours') media.save() +@background(schedule=300, remove_existing_tasks=False) +def delete_all_media_for_source(source_id, source_name): + source = None + try: + source = Source.objects.get(pk=source_id) + except Source.DoesNotExist: + # Task triggered but the source no longer exists, do nothing + log.error(f'Task delete_all_media_for_source(pk={source_id}) called but no ' + f'source exists with ID: {source_id}') + pass + mqs = Media.objects.all().defer( + 'metadata', + ).filter( + source=source or source_id, + ) + for media in mqs: + log.info(f'Deleting media for source: {source_name} item: {media.name}') + with atomic(): + media.delete() + diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 99844a39..4c8e672b 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -3,7 +3,6 @@ import os import json from base64 import b64decode import pathlib -import shutil import sys from django.conf import settings from django.http import FileResponse, Http404, HttpResponseNotFound, HttpResponseRedirect @@ -415,15 +414,8 @@ class DeleteSourceView(DeleteView, FormMixin): delete_media = True if delete_media_val is not False else False if delete_media: source = self.get_object() - for media in Media.objects.filter(source=source): - if media.media_file: - file_path = media.media_file.path - matching_files = glob.glob(os.path.splitext(file_path)[0] + '.*') - for file in matching_files: - delete_file(file) - directory_path = source.directory_path - if os.path.exists(directory_path): - shutil.rmtree(directory_path, True) + directory_path = pathlib.Path(source.directory_path) + (directory_path / '.to_be_removed').touch(exist_ok=True) return super().post(request, *args, **kwargs) def get_success_url(self): diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 48cff0c9..65b72e06 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -17,6 +17,8 @@ from django.conf import settings from .hooks import postprocessor_hook, progress_hook from .utils import mkdir_p import yt_dlp +import yt_dlp.patch.check_thumbnails +import yt_dlp.patch.fatal_http_errors from yt_dlp.utils import remove_end @@ -162,12 +164,13 @@ def get_media_info(url, days=None): 'logger': log, 'extract_flat': True, 'check_formats': True, + 'check_thumbnails': False, 'daterange': yt_dlp.utils.DateRange(start=start), 'extractor_args': { 'youtubetab': {'approximate_date': ['true']}, }, 'paths': paths, - 'sleep_interval_requests': 2, + 'sleep_interval_requests': 2 * settings.BACKGROUND_TASK_ASYNC_THREADS, 'verbose': True if settings.DEBUG else False, }) if start: @@ -279,7 +282,7 @@ def download_media( 'overwrites': None, 'sleep_interval': 10 + int(settings.DOWNLOAD_MEDIA_DELAY / 20), 'max_sleep_interval': settings.DOWNLOAD_MEDIA_DELAY, - 'sleep_interval_requests': 5, + 'sleep_interval_requests': 1 + (2 * settings.BACKGROUND_TASK_ASYNC_THREADS), 'paths': opts.get('paths', dict()), 'postprocessor_args': opts.get('postprocessor_args', dict()), 'postprocessor_hooks': opts.get('postprocessor_hooks', list()), diff --git a/tubesync/tubesync/local_settings.py.container b/tubesync/tubesync/local_settings.py.container index 4b73b7d7..cc20f73b 100644 --- a/tubesync/tubesync/local_settings.py.container +++ b/tubesync/tubesync/local_settings.py.container @@ -1,40 +1,41 @@ -import os import sys from pathlib import Path from urllib.parse import urljoin -from common.utils import parse_database_connection_string +from common.utils import getenv, parse_database_connection_string BASE_DIR = Path(__file__).resolve().parent.parent ROOT_DIR = Path('/') CONFIG_BASE_DIR = ROOT_DIR / 'config' DOWNLOADS_BASE_DIR = ROOT_DIR / 'downloads' -DJANGO_URL_PREFIX = os.getenv('DJANGO_URL_PREFIX', None) -STATIC_URL = str(os.getenv('DJANGO_STATIC_URL', '/static/')) +DJANGO_URL_PREFIX = getenv('DJANGO_URL_PREFIX').strip() +STATIC_URL = getenv('DJANGO_STATIC_URL', '/static/').strip() if DJANGO_URL_PREFIX and STATIC_URL: STATIC_URL = urljoin(DJANGO_URL_PREFIX, STATIC_URL[1:]) # This is not ever meant to be a public web interface so this isn't too critical -SECRET_KEY = str(os.getenv('DJANGO_SECRET_KEY', 'tubesync-django-secret')) +SECRET_KEY = getenv('DJANGO_SECRET_KEY', 'tubesync-django-secret') -ALLOWED_HOSTS_STR = str(os.getenv('TUBESYNC_HOSTS', '*')) +ALLOWED_HOSTS_STR = getenv('TUBESYNC_HOSTS', '*') ALLOWED_HOSTS = ALLOWED_HOSTS_STR.split(',') -DEBUG = True if os.getenv('TUBESYNC_DEBUG', False) else False -FORCE_SCRIPT_NAME = os.getenv('DJANGO_FORCE_SCRIPT_NAME', DJANGO_URL_PREFIX) +DEBUG_STR = getenv('TUBESYNC_DEBUG', False) +DEBUG = True if 'true' == DEBUG_STR.strip().lower() else False +FORCE_SCRIPT_NAME = getenv('DJANGO_FORCE_SCRIPT_NAME', DJANGO_URL_PREFIX) database_dict = {} -database_connection_env = os.getenv('DATABASE_CONNECTION', '') +database_connection_env = getenv('DATABASE_CONNECTION') if database_connection_env: database_dict = parse_database_connection_string(database_connection_env) if database_dict: - print(f'Using database connection: {database_dict["ENGINE"]}://' + print(f'Using database connection: {database_dict["DRIVER"]}://' f'{database_dict["USER"]}:[hidden]@{database_dict["HOST"]}:' - f'{database_dict["PORT"]}/{database_dict["NAME"]}', file=sys.stdout) + f'{database_dict["PORT"]}/{database_dict["NAME"]}', + file=sys.stdout, flush=True) DATABASES = { 'default': database_dict, } @@ -60,7 +61,7 @@ else: DEFAULT_THREADS = 1 -BACKGROUND_TASK_ASYNC_THREADS = int(os.getenv('TUBESYNC_WORKERS', DEFAULT_THREADS)) +BACKGROUND_TASK_ASYNC_THREADS = getenv('TUBESYNC_WORKERS', DEFAULT_THREADS, integer=True) MEDIA_ROOT = CONFIG_BASE_DIR / 'media' @@ -70,14 +71,14 @@ YOUTUBE_DL_TEMPDIR = DOWNLOAD_ROOT / 'cache' COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt' -HEALTHCHECK_FIREWALL_STR = str(os.getenv('TUBESYNC_HEALTHCHECK_FIREWALL', 'True')).strip().lower() -HEALTHCHECK_FIREWALL = True if HEALTHCHECK_FIREWALL_STR == 'true' else False -HEALTHCHECK_ALLOWED_IPS_STR = str(os.getenv('TUBESYNC_HEALTHCHECK_ALLOWED_IPS', '127.0.0.1')) +HEALTHCHECK_FIREWALL_STR = getenv('TUBESYNC_HEALTHCHECK_FIREWALL', True) +HEALTHCHECK_FIREWALL = ( 'true' == HEALTHCHECK_FIREWALL_STR.strip().lower() ) +HEALTHCHECK_ALLOWED_IPS_STR = getenv('TUBESYNC_HEALTHCHECK_ALLOWED_IPS', '127.0.0.1') HEALTHCHECK_ALLOWED_IPS = HEALTHCHECK_ALLOWED_IPS_STR.split(',') -BASICAUTH_USERNAME = os.getenv('HTTP_USER', '').strip() -BASICAUTH_PASSWORD = os.getenv('HTTP_PASS', '').strip() +BASICAUTH_USERNAME = getenv('HTTP_USER').strip() +BASICAUTH_PASSWORD = getenv('HTTP_PASS').strip() if BASICAUTH_USERNAME and BASICAUTH_PASSWORD: BASICAUTH_DISABLE = False BASICAUTH_USERS = { @@ -88,25 +89,25 @@ else: BASICAUTH_USERS = {} -SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR = os.getenv('TUBESYNC_DIRECTORY_PREFIX', 'True').strip().lower() -SOURCE_DOWNLOAD_DIRECTORY_PREFIX = True if SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR == 'true' else False +SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR = getenv('TUBESYNC_DIRECTORY_PREFIX', True) +SOURCE_DOWNLOAD_DIRECTORY_PREFIX = ( 'true' == SOURCE_DOWNLOAD_DIRECTORY_PREFIX_STR.strip().lower() ) -SHRINK_NEW_MEDIA_METADATA_STR = os.getenv('TUBESYNC_SHRINK_NEW', 'false').strip().lower() -SHRINK_NEW_MEDIA_METADATA = ( 'true' == SHRINK_NEW_MEDIA_METADATA_STR ) -SHRINK_OLD_MEDIA_METADATA_STR = os.getenv('TUBESYNC_SHRINK_OLD', 'false').strip().lower() -SHRINK_OLD_MEDIA_METADATA = ( 'true' == SHRINK_OLD_MEDIA_METADATA_STR ) +SHRINK_NEW_MEDIA_METADATA_STR = getenv('TUBESYNC_SHRINK_NEW', False) +SHRINK_NEW_MEDIA_METADATA = ( 'true' == SHRINK_NEW_MEDIA_METADATA_STR.strip().lower() ) +SHRINK_OLD_MEDIA_METADATA_STR = getenv('TUBESYNC_SHRINK_OLD', False) +SHRINK_OLD_MEDIA_METADATA = ( 'true' == SHRINK_OLD_MEDIA_METADATA_STR.strip().lower() ) # TUBESYNC_RENAME_ALL_SOURCES: True or False -RENAME_ALL_SOURCES_STR = os.getenv('TUBESYNC_RENAME_ALL_SOURCES', 'False').strip().lower() -RENAME_ALL_SOURCES = ( 'true' == RENAME_ALL_SOURCES_STR ) +RENAME_ALL_SOURCES_STR = getenv('TUBESYNC_RENAME_ALL_SOURCES', False) +RENAME_ALL_SOURCES = ( 'true' == RENAME_ALL_SOURCES_STR.strip().lower() ) # TUBESYNC_RENAME_SOURCES: A comma-separated list of Source directories -RENAME_SOURCES_STR = os.getenv('TUBESYNC_RENAME_SOURCES', '') +RENAME_SOURCES_STR = getenv('TUBESYNC_RENAME_SOURCES') RENAME_SOURCES = RENAME_SOURCES_STR.split(',') if RENAME_SOURCES_STR else None -VIDEO_HEIGHT_CUTOFF = int(os.getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", "240")) +VIDEO_HEIGHT_CUTOFF = getenv("TUBESYNC_VIDEO_HEIGHT_CUTOFF", 240, integer=True) # ensure that the current directory exists @@ -117,4 +118,11 @@ old_youtube_cache_dirs = list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube-*')) old_youtube_cache_dirs.extend(list(YOUTUBE_DL_CACHEDIR.parent.glob('youtube/youtube-*'))) for cache_dir in old_youtube_cache_dirs: cache_dir.rename(YOUTUBE_DL_CACHEDIR / cache_dir.name) +# try to remove the old, hopefully empty, directory +empty_old_youtube_dir = YOUTUBE_DL_CACHEDIR.parent / 'youtube' +if empty_old_youtube_dir.is_dir(): + try: + empty_old_youtube_dir.rmdir() + except: + pass diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index a9f4061c..fc309b28 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -1,5 +1,5 @@ -import os from pathlib import Path +from common.utils import getenv BASE_DIR = Path(__file__).resolve().parent.parent @@ -97,7 +97,7 @@ AUTH_PASSWORD_VALIDATORS = [ LANGUAGE_CODE = 'en-us' -TIME_ZONE = os.getenv('TZ', 'UTC') +TIME_ZONE = getenv('TZ', 'UTC') USE_I18N = True USE_L10N = True USE_TZ = True @@ -172,6 +172,7 @@ YOUTUBE_DEFAULTS = { 'ignoreerrors': True, # Skip on errors (such as unavailable videos in playlists) 'cachedir': False, # Disable on-disk caching 'addmetadata': True, # Embed metadata during postprocessing where available + 'geo_verification_proxy': getenv('geo_verification_proxy').strip() or None, } COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt'