diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9edc7225..b7de46be 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -84,7 +84,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 - name: Install Python ${{ matrix.python-version }} @@ -101,8 +101,9 @@ jobs: cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/background_task/ patches/background_task/* cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* + cd tubesync && python3 -B manage.py collectstatic --no-input --link - name: Run Django tests - run: cd tubesync && python3 manage.py test --verbosity=2 + run: cd tubesync && python3 -B -W default manage.py test --verbosity=2 containerise: if: ${{ !cancelled() && 'success' == needs.info.result }} diff --git a/Dockerfile b/Dockerfile index c21a18c1..6ef178c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -58,6 +58,28 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va apt-get -y autoclean && \ rm -f /var/cache/debconf/*.dat-old +FROM alpine:${ALPINE_VERSION} AS openresty-debian +ARG TARGETARCH +ARG DEBIAN_VERSION +ADD 'https://openresty.org/package/pubkey.gpg' '/downloaded/pubkey.gpg' +RUN set -eu ; \ + decide_arch() { \ + case "${TARGETARCH}" in \ + (amd64) printf -- '' ;; \ + (arm64) printf -- 'arm64/' ;; \ + esac ; \ + } ; \ + set -x ; \ + mkdir -v -p '/etc/apt/trusted.gpg.d' && \ + apk --no-cache --no-progress add cmd:gpg2 && \ + gpg2 --dearmor \ + -o '/etc/apt/trusted.gpg.d/openresty.gpg' \ + < '/downloaded/pubkey.gpg' && \ + mkdir -v -p '/etc/apt/sources.list.d' && \ + printf -- >| '/etc/apt/sources.list.d/openresty.list' \ + 'deb http://openresty.org/package/%sdebian %s openresty' \ + "$(decide_arch)" "${DEBIAN_VERSION%-slim}" + FROM alpine:${ALPINE_VERSION} AS ffmpeg-download ARG FFMPEG_DATE ARG FFMPEG_VERSION @@ -257,7 +279,38 @@ RUN set -eu ; \ FROM scratch AS s6-overlay COPY --from=s6-overlay-extracted /s6-overlay-rootfs / -FROM tubesync-base AS tubesync +FROM tubesync-base AS tubesync-openresty + +COPY --from=openresty-debian \ + /etc/apt/trusted.gpg.d/openresty.gpg /etc/apt/trusted.gpg.d/openresty.gpg +COPY --from=openresty-debian \ + /etc/apt/sources.list.d/openresty.list /etc/apt/sources.list.d/openresty.list + +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ + set -x && \ + apt-get update && \ + apt-get -y --no-install-recommends install nginx-common openresty && \ + # Clean up + apt-get -y autopurge && \ + apt-get -y autoclean && \ + rm -v -f /var/cache/debconf/*.dat-old + +FROM tubesync-base AS tubesync-nginx + +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ + set -x && \ + apt-get update && \ + apt-get -y --no-install-recommends install nginx-light && \ + # openresty binary should still work + ln -v -s -T ../sbin/nginx /usr/bin/openresty && \ + # Clean up + apt-get -y autopurge && \ + apt-get -y autoclean && \ + rm -v -f /var/cache/debconf/*.dat-old + +FROM tubesync-openresty AS tubesync ARG S6_VERSION @@ -282,7 +335,6 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va libmariadb3 \ libpq5 \ libwebp7 \ - nginx-light \ pipenv \ pkgconf \ python3 \ @@ -406,7 +458,7 @@ RUN set -x && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ # Check nginx configuration copied from config/root/etc - nginx -t && \ + openresty -c /etc/nginx/nginx.conf -e stderr -t && \ # Append software versions ffmpeg_version=$(/usr/local/bin/ffmpeg -version | awk -v 'ev=31' '1 == NR && "ffmpeg" == $1 { print $3; ev=0; } END { exit ev; }') && \ test -n "${ffmpeg_version}" && \ diff --git a/Pipfile b/Pipfile index 17288683..aed25ce3 100644 --- a/Pipfile +++ b/Pipfile @@ -7,7 +7,7 @@ verify_ssl = true autopep8 = "*" [packages] -django = "<5.2" +django = "*" django-sass-processor = {extras = ["management-command"], version = "*"} pillow = "*" whitenoise = "*" diff --git a/README.md b/README.md index 502abf3a..2ea83c54 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ services: ## Optional authentication -Available in `v1.0` (or `:latest`)and later. If you want to enable a basic username and +Available in `v1.0` (or `:latest`) and later. If you want to enable a basic username and password to be required to access the TubeSync dashboard you can set them with the following environment variables: @@ -188,6 +188,14 @@ $ docker pull ghcr.io/meeb/tubesync:v[number] Back-end updates such as database migrations should be automatic. +> [!IMPORTANT] +> `MariaDB` was not automatically upgraded for `UUID` column types. +> To see what changes are needed, you can run: +> ```bash +> docker exec -it tubesync python3 /app/manage.py fix-mariadb --dry-run --uuid-columns +> ``` +> Removing the `--dry-run` will attempt to execute those statements using the configured database connection. + # Moving, backing up, etc. @@ -349,7 +357,7 @@ and you can probably break things by playing in the admin. If you still want to it you can run: ```bash -$ docker exec -ti tubesync python3 /app/manage.py createsuperuser +$ docker exec -it tubesync python3 /app/manage.py createsuperuser ``` And follow the instructions to create an initial Django superuser, once created, you @@ -415,7 +423,7 @@ following this rough guide, you are on your own and should be knowledgeable abou installing and running WSGI-based Python web applications before attempting this. 1. Clone or download this repo -2. Make sure you're running a modern version of Python (>=3.9) and have Pipenv +2. Make sure you're running a modern version of Python (>=3.10) and have Pipenv installed 3. Set up the environment with `pipenv install` 4. Copy `tubesync/tubesync/local_settings.py.example` to diff --git a/config/root/etc/s6-overlay/s6-rc.d/nginx/run b/config/root/etc/s6-overlay/s6-rc.d/nginx/run index 87769e62..63653343 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/nginx/run +++ b/config/root/etc/s6-overlay/s6-rc.d/nginx/run @@ -2,4 +2,4 @@ cd / -exec /usr/sbin/nginx +exec /usr/bin/openresty -c /etc/nginx/nginx.conf -e stderr diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index ff0d4d55..baaf6e0c 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -25,6 +25,13 @@ then chmod -R 0755 /downloads fi +if [ 'True' = "${TUBESYNC_DEBUG:-False}" ] +then + s6-setuidgid app \ + /usr/bin/python3 /app/manage.py \ + showmigrations -v 3 --list +fi + # Run migrations exec s6-setuidgid app \ /usr/bin/python3 /app/manage.py migrate diff --git a/docs/other-database-backends.md b/docs/other-database-backends.md index 4f90d3ab..8df7989c 100644 --- a/docs/other-database-backends.md +++ b/docs/other-database-backends.md @@ -18,22 +18,73 @@ reset your database. If you are comfortable with Django you can export and re-im existing database data with: ```bash -$ docker exec -i tubesync python3 /app/manage.py dumpdata > some-file.json +# Stop services +$ docker exec -t tubesync \ + bash -c 'for svc in \ + /run/service/{gunicorn,tubesync*-worker} ; \ +do \ + /command/s6-svc -wd -D "${svc}" ; \ +done' +# Backup the database into a compressed file +$ docker exec -t tubesync \ + python3 /app/manage.py \ + dumpdata --format jsonl \ + --exclude background_task \ + --output /downloads/tubesync-database-backup.jsonl.xz ``` -Then change you database backend over, then use +Writing the compressed backup file to your `/downloads/` makes sense, as long as that directory is still available after destroying the current container. +If you have a configuration where that file will be deleted, choose a different place to store the output (perhaps `/config/`, if it has sufficient storage available) and place the file there instead. + +You can also copy the file from the container to the local filesystem (`/tmp/` in this example) with: ```bash -$ cat some-file.json | docker exec -i tubesync python3 /app/manage.py loaddata - --format=json +$ docker cp \ + tubesync:/downloads/tubesync-database-backup.jsonl.xz \ + /tmp/ +``` + +If you use `-` as the destination, then `docker cp` provides a `tar` archive. + +After you have changed your database backend over, then use: + +```bash +# Stop services +$ docker exec -t tubesync \ + bash -c 'for svc in \ + /run/service/{gunicorn,tubesync*-worker} ; \ +do \ + /command/s6-svc -wd -D "${svc}" ; \ +done' +# Load fixture file into the database +$ docker exec -t tubesync \ + python3 /app/manage.py \ + loaddata /downloads/tubesync-database-backup.jsonl.xz +``` + +Or, if you only have the copy in `/tmp/`, then you would use: +```bash +# Stop services +$ docker exec -t tubesync \ + bash -c 'for svc in \ + /run/service/{gunicorn,tubesync*-worker} ; \ +do \ + /command/s6-svc -wd -D "${svc}" ; \ +done' +# Load fixture data from standard input into the database +$ xzcat /tmp/tubesync-database-backup.jsonl.xz | \ + docker exec -i tubesync \ + python3 /app/manage.py \ + loaddata --format=jsonl - ``` As detailed in the Django documentation: -https://docs.djangoproject.com/en/3.1/ref/django-admin/#dumpdata +https://docs.djangoproject.com/en/5.1/ref/django-admin/#dumpdata and: -https://docs.djangoproject.com/en/3.1/ref/django-admin/#loaddata +https://docs.djangoproject.com/en/5.1/ref/django-admin/#loaddata Further instructions are beyond the scope of TubeSync documenation and you should refer to Django documentation for more details. @@ -94,13 +145,13 @@ the DB for the performance benefits, a configuration like this would be enough: ``` tubesync-db: - image: postgres:15.2 + image: postgres:17 container_name: tubesync-db restart: unless-stopped volumes: - - //init.sql:/docker-entrypoint-initdb.d/init.sql - //tubesync-db:/var/lib/postgresql/data environment: + - POSTGRES_DB=tubesync - POSTGRES_USER=postgres - POSTGRES_PASSWORD=testpassword @@ -118,15 +169,3 @@ the DB for the performance benefits, a configuration like this would be enough: depends_on: - tubesync-db ``` - -Note that an `init.sql` file is needed to initialize the `tubesync` -database before it can be written to. This file should contain: - -``` -CREATE DATABASE tubesync; -``` - - -Then it must be mapped to `/docker-entrypoint-initdb.d/init.sql` for it -to be executed on first startup of the container. See the `tubesync-db` -volume mapping above for how to do this. diff --git a/tubesync/common/errors.py b/tubesync/common/errors.py index 87d8aa4d..9ff44a48 100644 --- a/tubesync/common/errors.py +++ b/tubesync/common/errors.py @@ -22,6 +22,13 @@ class NoMetadataException(Exception): pass +class NoThumbnailException(Exception): + ''' + Raised when a thumbnail was not found at the remote URL. + ''' + pass + + class DownloadFailedException(Exception): ''' Raised when a downloaded media file is expected to be present, but doesn't diff --git a/tubesync/common/json.py b/tubesync/common/json.py new file mode 100644 index 00000000..e8a22e1c --- /dev/null +++ b/tubesync/common/json.py @@ -0,0 +1,16 @@ +from django.core.serializers.json import DjangoJSONEncoder + + +class JSONEncoder(DjangoJSONEncoder): + item_separator = ',' + key_separator = ':' + + def default(self, obj): + try: + iterable = iter(obj) + except TypeError: + pass + else: + return list(iterable) + return super().default(obj) + diff --git a/tubesync/common/timestamp.py b/tubesync/common/timestamp.py index df7b2f13..d8b69178 100644 --- a/tubesync/common/timestamp.py +++ b/tubesync/common/timestamp.py @@ -1,8 +1,8 @@ import datetime -posix_epoch = datetime.datetime.utcfromtimestamp(0) utc_tz = datetime.timezone.utc +posix_epoch = datetime.datetime.fromtimestamp(0, utc_tz) def add_epoch(seconds): @@ -13,10 +13,9 @@ def add_epoch(seconds): def subtract_epoch(arg_dt, /): assert isinstance(arg_dt, datetime.datetime) - epoch = posix_epoch.astimezone(utc_tz) utc_dt = arg_dt.astimezone(utc_tz) - return utc_dt - epoch + return utc_dt - posix_epoch def datetime_to_timestamp(arg_dt, /, *, integer=True): timestamp = subtract_epoch(arg_dt).total_seconds() diff --git a/tubesync/sync/choices.py b/tubesync/sync/choices.py index 25dd762a..6412ad14 100644 --- a/tubesync/sync/choices.py +++ b/tubesync/sync/choices.py @@ -8,6 +8,7 @@ DOMAINS = dict({ 'youtube': frozenset({ 'youtube.com', 'm.youtube.com', + 'music.youtube.com', 'www.youtube.com', }), }) diff --git a/tubesync/sync/forms.py b/tubesync/sync/forms.py index 3d795a5f..e46b740f 100644 --- a/tubesync/sync/forms.py +++ b/tubesync/sync/forms.py @@ -1,8 +1,14 @@ -from django import forms +from django import forms, VERSION as DJANGO_VERSION from django.utils.translation import gettext_lazy as _ +if DJANGO_VERSION[0:3] < (5, 0, 0): + _assume_scheme = dict() +else: + # Silence RemovedInDjango60Warning + _assume_scheme = dict(assume_scheme='http') + class ValidateSourceForm(forms.Form): source_type = forms.CharField( @@ -12,7 +18,8 @@ class ValidateSourceForm(forms.Form): ) source_url = forms.URLField( label=_('Source URL'), - required=True + required=True, + **_assume_scheme, ) @@ -44,10 +51,33 @@ class ResetTasksForm(forms.Form): pass +class ScheduleTaskForm(forms.Form): + + now = forms.DateTimeField( + label=_('The current date and time'), + required=False, + widget=forms.DateTimeInput( + attrs={ + 'type': 'datetime-local', + 'readonly': 'true', + }, + ), + ) + + when = forms.DateTimeField( + label=_('When the task should run'), + required=True, + widget=forms.DateTimeInput( + attrs={'type': 'datetime-local'}, + ), + ) + + class ConfirmDeleteMediaServerForm(forms.Form): pass + _media_server_type_label = 'Jellyfin' class JellyfinMediaServerForm(forms.Form): diff --git a/tubesync/sync/management/commands/fix-mariadb.py b/tubesync/sync/management/commands/fix-mariadb.py new file mode 100644 index 00000000..9b21d2df --- /dev/null +++ b/tubesync/sync/management/commands/fix-mariadb.py @@ -0,0 +1,256 @@ +from django import db +from io import BytesIO, TextIOWrapper +from django.utils.translation import gettext_lazy +from django.core.management import call_command +from django.core.management.base import BaseCommand, CommandError +from common.logger import log + + +db_tables = db.connection.introspection.table_names +db_quote_name = db.connection.ops.quote_name +new_tables = { + 'sync_media_metadata_format', + 'sync_media_metadata', + 'sync_metadataformat', + 'sync_metadata', +} +sql_statements = db.connection.ops.prepare_sql_script + +def _(arg_str): + return str(gettext_lazy(arg_str)) + +def SQLTable(arg_table): + assert isinstance(arg_table, str), type(arg_table) + needle = arg_table + if needle.startswith('new__'): + needle = arg_table[len('new__'):] + db.connection.ensure_connection() + valid_table_name = ( + needle in new_tables and + arg_table in db_tables(include_views=False) + ) + if not valid_table_name: + raise ValueError(_('Invalid table name')) + return str(arg_table) + +def _mk_wrapper(): + return TextIOWrapper( + BytesIO(), + line_buffering=True, + write_through=True, + ) + +def check_migration_status(migration_str, /, *, needle=None): + if needle is None: + needle = 'No planned migration operations.' + wrap_stderr, wrap_stdout = _mk_wrapper(), _mk_wrapper() + try: + call_command( + 'migrate', '-v', '3', '--plan', 'sync', + migration_str, + stderr=wrap_stderr, + stdout=wrap_stdout, + ) + except db.migrations.exceptions.NodeNotFoundError: + return (False, None, None,) + wrap_stderr.seek(0, 0) + stderr_lines = wrap_stderr.readlines() + wrap_stdout.seek(0, 0) + stdout_lines = wrap_stdout.readlines() + return ( + bool([ line for line in stdout_lines if needle in line ]), + stderr_lines, + stdout_lines, + ) + +def db_columns(table_str, /): + columns = list() + db_gtd = db.connection.introspection.get_table_description + with db.connection.cursor() as cursor: + columns.extend(db_gtd(cursor, table_str)) + return columns + + +class Command(BaseCommand): + + help = _('Fixes MariaDB database issues') + output_transaction = True + requires_migrations_checks = False + + def add_arguments(self, parser): + parser.add_argument( + '--dry-run', + action='store_true', + default=False, + help=_('Only show the SQL; do not apply it to the database'), + ) + parser.add_argument( + '--uuid-columns', + action='store_true', + default=False, + help=_('Switch to the native UUID column type'), + ) + parser.add_argument( + '--delete-table', + action='append', + default=list(), + metavar='TABLE', + type=SQLTable, + help=_('SQL table name to be deleted'), + ) + + def _using_char(self, table_str, column_str='uuid', /): + cols = db_columns(table_str) + char_sizes = { 32, 36, } + char_types = { 'char', 'varchar', } + return column_str in [ + c.name for c in cols if c.data_type in char_types and c.display_size in char_sizes + ] + + def _column_type(self, table_str, column_str='uuid', /): + cols = db_columns(table_str) + found = [ + f'{c.data_type}({c.display_size})' for c in cols if column_str.lower() == c.name.lower() + ] + if not found: + return str() + return found[0] + + def handle(self, *args, **options): + if 'mysql' != db.connection.vendor: + raise CommandError( + _('An invalid database vendor is configured') + + f': {db.connection.vendor}' + ) + + db.connection.ensure_connection() + db_is_mariadb = ( + hasattr(db.connection, 'mysql_is_mariadb') and + db.connection.is_usable() and + db.connection.mysql_is_mariadb + ) + if not db_is_mariadb: + raise CommandError(_('Not conbected to a MariaDB database server.')) + + display_name = db.connection.display_name + table_names = options.get('delete_table') + schema = db.connection.schema_editor(collect_sql=True) + quote_name = schema.quote_name + + log.info('Start') + + + if options['uuid_columns']: + if 'uuid' != db.connection.data_types.get('UUIDField', ''): + raise CommandError(_( + f'The {display_name} database server does not support UUID columns.' + )) + uuid_column_type_str = 'uuid(36)' + both_tables = ( + self._using_char('sync_source', 'uuid') and + self._using_char('sync_media', 'uuid') + ) + if not both_tables: + if uuid_column_type_str == self._column_type('sync_source', 'uuid').lower(): + log.info('The source table is already using a native UUID column.') + elif uuid_column_type_str == self._column_type('sync_media', 'uuid').lower(): + log.info('The media table is already using a native UUID column.') + elif uuid_column_type_str == self._column_type('sync_media', 'source_id').lower(): + log.info('The media table is already using a native UUID column.') + else: + raise CommandError(_( + 'The database is not in an appropriate state to switch to ' + 'native UUID columns. Manual intervention is required.' + )) + else: + media_table_str = quote_name('sync_media') + source_table_str = quote_name('sync_source') + fk_name_str = quote_name('sync_media_source_id_36827e1d_fk_sync_source_uuid') + source_id_column_str = quote_name('source_id') + uuid_column_str = quote_name('uuid') + uuid_type_str = 'uuid'.upper() + remove_fk = schema.sql_delete_fk % dict( + table=media_table_str, + name=fk_name_str, + ) + add_fk = schema.sql_create_fk % dict( + table=media_table_str, + name=fk_name_str, + column=source_id_column_str, + to_table=source_table_str, + to_column=uuid_column_str, + deferrable='', + ) + + schema.execute( + schema.sql_alter_column % dict( + table=media_table_str, + changes=schema.sql_alter_column_not_null % dict( + type=uuid_type_str, + column=uuid_column_str, + ), + ), + None, + ) + schema.execute(remove_fk, None) + schema.execute( + schema.sql_alter_column % dict( + table=source_table_str, + changes=schema.sql_alter_column_not_null % dict( + type=uuid_type_str, + column=uuid_column_str, + ), + ), + None, + ) + schema.execute( + schema.sql_alter_column % dict( + table=media_table_str, + changes=schema.sql_alter_column_not_null % dict( + type=uuid_type_str, + column=source_id_column_str, + ), + ), + None, + ) + schema.execute(add_fk, None) + + + if table_names: + # Check that the migration is at an appropriate step + at_30, err_30, out_30 = check_migration_status( '0030_alter_source_source_vcodec' ) + at_31, err_31, out_31 = check_migration_status( '0031_metadata_metadataformat' ) + at_31s, err_31s, out_31s = check_migration_status( '0031_squashed_metadata_metadataformat' ) + after_31, err_31a, out_31a = check_migration_status( + '0031_metadata_metadataformat', + needle='Undo Rename table for metadata to sync_media_metadata', + ) + + should_delete = ( + not (at_31s or after_31) and + (at_30 or at_31) + ) + if not should_delete: + raise CommandError(_( + 'Deleting metadata tables that are in use is not safe!' + )) + + for table in table_names: + schema.execute( + schema.sql_delete_table % dict( + table=quote_name(table), + ), + None, + ) + + if options['dry_run']: + log.info('Done') + return '\n'.join(schema.collected_sql) + else: + with db.connection.schema_editor(collect_sql=False) as schema_editor: + for sql in schema.collected_sql: + schema_editor.execute(sql, None) + + + # All done + log.info('Done') diff --git a/tubesync/sync/mediaservers.py b/tubesync/sync/mediaservers.py index e0f9e7e7..ceab239f 100644 --- a/tubesync/sync/mediaservers.py +++ b/tubesync/sync/mediaservers.py @@ -29,7 +29,7 @@ class MediaServer: def make_request_args(self, uri='/', token_header=None, headers={}, token_param=None, params={}): base_parts = urlsplit(self.object.url) if self.token is None: - self.token = self.object.loaded_options['token'] or None + self.token = self.object.options['token'] or None if token_header and self.token: headers.update({token_header: self.token}) self.headers.update(headers) @@ -116,7 +116,7 @@ class PlexMediaServer(MediaServer): if port < 1 or port > 65535: raise ValidationError('Plex Media Server "port" must be between 1 ' 'and 65535') - options = self.object.loaded_options + options = self.object.options if 'token' not in options: raise ValidationError('Plex Media Server requires a "token"') token = options['token'].strip() @@ -183,7 +183,7 @@ class PlexMediaServer(MediaServer): def update(self): # For each section / library ID pop off a request to refresh it - libraries = self.object.loaded_options.get('libraries', '') + libraries = self.object.options.get('libraries', '') for library_id in libraries.split(','): library_id = library_id.strip() uri = f'/library/sections/{library_id}/refresh' @@ -258,7 +258,7 @@ class JellyfinMediaServer(MediaServer): except (TypeError, ValueError): raise ValidationError('Jellyfin Media Server "port" must be an integer') - options = self.object.loaded_options + options = self.object.options if 'token' not in options: raise ValidationError('Jellyfin Media Server requires a "token"') if 'libraries' not in options: @@ -302,7 +302,7 @@ class JellyfinMediaServer(MediaServer): return True def update(self): - libraries = self.object.loaded_options.get('libraries', '').split(',') + libraries = self.object.options.get('libraries', '').split(',') for library_id in map(str.strip, libraries): uri = f'/Items/{library_id}/Refresh' response = self.make_request(uri, method='POST') diff --git a/tubesync/sync/migrations/0016_auto_20230214_2052.py b/tubesync/sync/migrations/0016_auto_20230214_2052.py index ffba1952..d4319759 100644 --- a/tubesync/sync/migrations/0016_auto_20230214_2052.py +++ b/tubesync/sync/migrations/0016_auto_20230214_2052.py @@ -1,7 +1,7 @@ # Generated by Django 3.2.18 on 2023-02-14 20:52 from django.db import migrations, models -import sync.models +import sync.fields class Migration(migrations.Migration): @@ -29,6 +29,6 @@ class Migration(migrations.Migration): migrations.AddField( model_name='source', name='sponsorblock_categories', - field=sync.models.CommaSepChoiceField(default='all', possible_choices=(('all', 'All'), ('sponsor', 'Sponsor'), ('intro', 'Intermission/Intro Animation'), ('outro', 'Endcards/Credits'), ('selfpromo', 'Unpaid/Self Promotion'), ('preview', 'Preview/Recap'), ('filler', 'Filler Tangent'), ('interaction', 'Interaction Reminder'), ('music_offtopic', 'Non-Music Section'))), + field=sync.fields.CommaSepChoiceField(default='all', possible_choices=(('all', 'All'), ('sponsor', 'Sponsor'), ('intro', 'Intermission/Intro Animation'), ('outro', 'Endcards/Credits'), ('selfpromo', 'Unpaid/Self Promotion'), ('preview', 'Preview/Recap'), ('filler', 'Filler Tangent'), ('interaction', 'Interaction Reminder'), ('music_offtopic', 'Non-Music Section'))), ), ] diff --git a/tubesync/sync/migrations/0031_metadata_metadataformat.py b/tubesync/sync/migrations/0031_metadata_metadataformat.py index 00efa0f6..aee89518 100644 --- a/tubesync/sync/migrations/0031_metadata_metadataformat.py +++ b/tubesync/sync/migrations/0031_metadata_metadataformat.py @@ -1,7 +1,7 @@ # Generated by Django 5.1.8 on 2025-04-11 07:36 +import common.json import django.db.models.deletion -import sync.models import uuid from django.db import migrations, models @@ -23,7 +23,7 @@ class Migration(migrations.Migration): ('retrieved', models.DateTimeField(auto_now_add=True, db_index=True, help_text='Date and time the metadata was retrieved', verbose_name='retrieved')), ('uploaded', models.DateTimeField(help_text='Date and time the media was uploaded', null=True, verbose_name='uploaded')), ('published', models.DateTimeField(help_text='Date and time the media was published', null=True, verbose_name='published')), - ('value', models.JSONField(default=dict, encoder=sync.models.JSONEncoder, help_text='JSON metadata object', verbose_name='value')), + ('value', models.JSONField(default=dict, encoder=common.json.JSONEncoder, help_text='JSON metadata object', verbose_name='value')), ('media', models.ForeignKey(help_text='Media the metadata belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='metadata_media', to='sync.media')), ], options={ @@ -40,7 +40,7 @@ class Migration(migrations.Migration): ('key', models.CharField(blank=True, default='', help_text='Media identifier at the site for which this format is available', max_length=256, verbose_name='key')), ('number', models.PositiveIntegerField(help_text='Ordering number for this format', verbose_name='number')), ('code', models.CharField(blank=True, default='', help_text='Format identification code', max_length=64, verbose_name='code')), - ('value', models.JSONField(default=dict, encoder=sync.models.JSONEncoder, help_text='JSON metadata format object', verbose_name='value')), + ('value', models.JSONField(default=dict, encoder=common.json.JSONEncoder, help_text='JSON metadata format object', verbose_name='value')), ('metadata', models.ForeignKey(help_text='Metadata the format belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='metadataformat_metadata', to='sync.metadata')), ], options={ diff --git a/tubesync/sync/migrations/0031_squashed_metadata_metadataformat.py b/tubesync/sync/migrations/0031_squashed_metadata_metadataformat.py index 13189f10..c7a78bd8 100644 --- a/tubesync/sync/migrations/0031_squashed_metadata_metadataformat.py +++ b/tubesync/sync/migrations/0031_squashed_metadata_metadataformat.py @@ -1,7 +1,7 @@ # Generated by Django 5.1.8 on 2025-04-23 18:10 +import common.json import django.db.models.deletion -import sync.models import uuid from django.db import migrations, models @@ -25,7 +25,7 @@ class Migration(migrations.Migration): ('retrieved', models.DateTimeField(auto_now_add=True, db_index=True, help_text='Date and time the metadata was retrieved', verbose_name='retrieved')), ('uploaded', models.DateTimeField(db_index=True, help_text='Date and time the media was uploaded', null=True, verbose_name='uploaded')), ('published', models.DateTimeField(db_index=True, help_text='Date and time the media was published', null=True, verbose_name='published')), - ('value', models.JSONField(default=dict, encoder=sync.models.JSONEncoder, help_text='JSON metadata object', verbose_name='value')), + ('value', models.JSONField(default=dict, encoder=common.json.JSONEncoder, help_text='JSON metadata object', verbose_name='value')), ('media', models.OneToOneField(help_text='Media the metadata belongs to', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='new_metadata', to='sync.media')), ], options={ @@ -43,7 +43,7 @@ class Migration(migrations.Migration): ('key', models.CharField(blank=True, db_index=True, default='', help_text='Media identifier at the site from which this format is available', max_length=256, verbose_name='key')), ('number', models.PositiveIntegerField(help_text='Ordering number for this format', verbose_name='number')), ('code', models.CharField(blank=True, default='', help_text='Format identification code', max_length=64, verbose_name='code')), - ('value', models.JSONField(default=dict, encoder=sync.models.JSONEncoder, help_text='JSON metadata format object', verbose_name='value')), + ('value', models.JSONField(default=dict, encoder=common.json.JSONEncoder, help_text='JSON metadata format object', verbose_name='value')), ('metadata', models.ForeignKey(help_text='Metadata the format belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='format', to='sync.metadata')), ], options={ diff --git a/tubesync/sync/migrations/0033_alter_mediaserver_options_alter_source_source_acodec_and_more.py b/tubesync/sync/migrations/0033_alter_mediaserver_options_alter_source_source_acodec_and_more.py new file mode 100644 index 00000000..46ea113f --- /dev/null +++ b/tubesync/sync/migrations/0033_alter_mediaserver_options_alter_source_source_acodec_and_more.py @@ -0,0 +1,29 @@ +# Generated by Django 5.1.9 on 2025-05-10 06:18 + +import common.json +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sync', '0032_metadata_transfer'), + ] + + operations = [ + migrations.AlterField( + model_name='mediaserver', + name='options', + field=models.JSONField(encoder=common.json.JSONEncoder, help_text='Options for the media server', null=True, verbose_name='options'), + ), + migrations.AlterField( + model_name='source', + name='source_acodec', + field=models.CharField(choices=[('OPUS', 'OPUS'), ('MP4A', 'MP4A')], db_index=True, default='OPUS', help_text='Source audio codec, desired audio encoding format to download', max_length=8, verbose_name='source audio codec'), + ), + migrations.AlterField( + model_name='source', + name='source_vcodec', + field=models.CharField(choices=[('AV1', 'AV1'), ('VP9', 'VP9'), ('AVC1', 'AVC1 (H.264)')], db_index=True, default='VP9', help_text='Source video codec, desired video encoding format to download (ignored if "resolution" is audio only)', max_length=8, verbose_name='source video codec'), + ), + ] diff --git a/tubesync/sync/models/__init__.py b/tubesync/sync/models/__init__.py new file mode 100644 index 00000000..d7ed077c --- /dev/null +++ b/tubesync/sync/models/__init__.py @@ -0,0 +1,19 @@ +# These are referenced from the migration files + +from ._migrations import ( + get_media_file_path, + get_media_thumb_path, + media_file_storage, +) + +# The actual model classes +# The order starts with independent classes +# then the classes that depend on them follow. + +from .media_server import MediaServer + +from .source import Source +from .media import Media +from .metadata import Metadata +from .metadata_format import MetadataFormat + diff --git a/tubesync/sync/models/_migrations.py b/tubesync/sync/models/_migrations.py new file mode 100644 index 00000000..5ca5d101 --- /dev/null +++ b/tubesync/sync/models/_migrations.py @@ -0,0 +1,21 @@ +from pathlib import Path +from django.conf import settings +from django.core.files.storage import FileSystemStorage + + +media_file_storage = FileSystemStorage(location=str(settings.DOWNLOAD_ROOT), base_url='/media-data/') + + +def get_media_file_path(instance, filename): + return instance.filepath + + +def get_media_thumb_path(instance, filename): + # we don't want to use alternate names for thumb files + if instance.thumb: + instance.thumb.delete(save=False) + fileid = str(instance.uuid).lower() + filename = f'{fileid}.jpg' + prefix = fileid[:2] + return Path('thumbs') / prefix / filename + diff --git a/tubesync/sync/models/_private.py b/tubesync/sync/models/_private.py new file mode 100644 index 00000000..96539dbe --- /dev/null +++ b/tubesync/sync/models/_private.py @@ -0,0 +1,12 @@ +from ..choices import Val, YouTube_SourceType + + +_srctype_dict = lambda n: dict(zip( YouTube_SourceType.values, (n,) * len(YouTube_SourceType.values) )) + + +def _nfo_element(nfo, label, text, /, *, attrs={}, tail='\n', char=' ', indent=2): + element = nfo.makeelement(label, attrs) + element.text = text + element.tail = tail + (char * indent) + return element + diff --git a/tubesync/sync/models.py b/tubesync/sync/models/media.py similarity index 56% rename from tubesync/sync/models.py rename to tubesync/sync/models/media.py index a68e1c4b..daaf723d 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models/media.py @@ -1,7 +1,6 @@ import os import uuid import json -import re from collections import OrderedDict from copy import deepcopy from datetime import datetime, timedelta, timezone as tz @@ -9,580 +8,39 @@ from pathlib import Path from xml.etree import ElementTree from django.conf import settings from django.db import models -from django.core.exceptions import ObjectDoesNotExist, SuspiciousOperation -from django.core.files.storage import FileSystemStorage -from django.core.serializers.json import DjangoJSONEncoder -from django.core.validators import RegexValidator +from django.core.exceptions import ObjectDoesNotExist from django.db.transaction import atomic from django.utils.text import slugify from django.utils import timezone from django.utils.translation import gettext_lazy as _ from common.logger import log from common.errors import NoFormatException -from common.utils import ( clean_filename, clean_emoji, - django_queryset_generator as qs_gen, ) -from .youtube import ( get_media_info as get_youtube_media_info, - download_media as download_youtube_media, - get_channel_image_info as get_youtube_channel_image_info) -from .utils import (seconds_to_timestr, parse_media_format, filter_response, - write_text_file, mkdir_p, directory_and_stem, glob_quote) -from .matching import ( get_best_combined_format, get_best_audio_format, - get_best_video_format) -from .fields import CommaSepChoiceField -from .choices import ( Val, CapChoices, Fallback, FileExtension, - FilterSeconds, IndexSchedule, MediaServerType, - MediaState, SourceResolution, SourceResolutionInteger, - SponsorBlock_Category, YouTube_AudioCodec, - YouTube_SourceType, YouTube_VideoCodec) - -media_file_storage = FileSystemStorage(location=str(settings.DOWNLOAD_ROOT), base_url='/media-data/') -_srctype_dict = lambda n: dict(zip( YouTube_SourceType.values, (n,) * len(YouTube_SourceType.values) )) - -class JSONEncoder(DjangoJSONEncoder): - item_separator = ',' - key_separator = ':' - - def default(self, obj): - try: - iterable = iter(obj) - except TypeError: - pass - else: - return list(iterable) - return super().default(obj) - - -class Source(models.Model): - ''' - A Source is a source of media. Currently, this is either a YouTube channel - or a YouTube playlist. - ''' - - sponsorblock_categories = CommaSepChoiceField( - _(''), - max_length=128, - possible_choices=SponsorBlock_Category.choices, - all_choice='all', - allow_all=True, - all_label='(All Categories)', - default='all', - help_text=_('Select the SponsorBlock categories that you wish to be removed from downloaded videos.') - ) - embed_metadata = models.BooleanField( - _('embed metadata'), - default=False, - help_text=_('Embed metadata from source into file') - ) - embed_thumbnail = models.BooleanField( - _('embed thumbnail'), - default=False, - help_text=_('Embed thumbnail into the file') - ) - enable_sponsorblock = models.BooleanField( - _('enable sponsorblock'), - default=True, - help_text=_('Use SponsorBlock?') - ) - - # Fontawesome icons used for the source on the front end - ICONS = _srctype_dict('') - - # Format to use to display a URL for the source - URLS = dict(zip( - YouTube_SourceType.values, - ( - 'https://www.youtube.com/c/{key}', - 'https://www.youtube.com/channel/{key}', - 'https://www.youtube.com/playlist?list={key}', - ), - )) - - # Format used to create indexable URLs - INDEX_URLS = dict(zip( - YouTube_SourceType.values, - ( - 'https://www.youtube.com/c/{key}/{type}', - 'https://www.youtube.com/channel/{key}/{type}', - 'https://www.youtube.com/playlist?list={key}', - ), - )) - - # Callback functions to get a list of media from the source - INDEXERS = _srctype_dict(get_youtube_media_info) - - # Field names to find the media ID used as the key when storing media - KEY_FIELD = _srctype_dict('id') - - uuid = models.UUIDField( - _('uuid'), - primary_key=True, - editable=False, - default=uuid.uuid4, - help_text=_('UUID of the source') - ) - created = models.DateTimeField( - _('created'), - auto_now_add=True, - db_index=True, - help_text=_('Date and time the source was created') - ) - last_crawl = models.DateTimeField( - _('last crawl'), - db_index=True, - null=True, - blank=True, - help_text=_('Date and time the source was last crawled') - ) - source_type = models.CharField( - _('source type'), - max_length=1, - db_index=True, - choices=YouTube_SourceType.choices, - default=YouTube_SourceType.CHANNEL, - help_text=_('Source type') - ) - key = models.CharField( - _('key'), - max_length=100, - db_index=True, - unique=True, - help_text=_('Source key, such as exact YouTube channel name or playlist ID') - ) - name = models.CharField( - _('name'), - max_length=100, - db_index=True, - unique=True, - help_text=_('Friendly name for the source, used locally in TubeSync only') - ) - directory = models.CharField( - _('directory'), - max_length=100, - db_index=True, - unique=True, - help_text=_('Directory name to save the media into') - ) - media_format = models.CharField( - _('media format'), - max_length=200, - default=settings.MEDIA_FORMATSTR_DEFAULT, - help_text=_('File format to use for saving files, detailed options at bottom of page.') - ) - index_schedule = models.IntegerField( - _('index schedule'), - choices=IndexSchedule.choices, - db_index=True, - default=IndexSchedule.EVERY_24_HOURS, - help_text=_('Schedule of how often to index the source for new media') - ) - download_media = models.BooleanField( - _('download media'), - default=True, - help_text=_('Download media from this source, if not selected the source will only be indexed') - ) - index_videos = models.BooleanField( - _('index videos'), - default=True, - help_text=_('Index video media from this source') - ) - index_streams = models.BooleanField( - _('index streams'), - default=False, - help_text=_('Index live stream media from this source') - ) - download_cap = models.IntegerField( - _('download cap'), - choices=CapChoices.choices, - default=CapChoices.CAP_NOCAP, - help_text=_('Do not download media older than this capped date') - ) - delete_old_media = models.BooleanField( - _('delete old media'), - default=False, - help_text=_('Delete old media after "days to keep" days?') - ) - days_to_keep = models.PositiveSmallIntegerField( - _('days to keep'), - default=14, - help_text=_('If "delete old media" is ticked, the number of days after which ' - 'to automatically delete media') - ) - filter_text = models.CharField( - _('filter string'), - max_length=200, - default='', - blank=True, - help_text=_('Regex compatible filter string for video titles') - ) - filter_text_invert = models.BooleanField( - _("invert filter text matching"), - default=False, - help_text="Invert filter string regex match, skip any matching titles when selected", - ) - filter_seconds = models.PositiveIntegerField( - _('filter seconds'), - blank=True, - null=True, - help_text=_('Filter Media based on Min/Max duration. Leave blank or 0 to disable filtering') - ) - filter_seconds_min = models.BooleanField( - _('filter seconds min/max'), - choices=FilterSeconds.choices, - default=Val(FilterSeconds.MIN), - help_text=_('When Filter Seconds is > 0, do we skip on minimum (video shorter than limit) or maximum (video ' - 'greater than maximum) video duration') - ) - delete_removed_media = models.BooleanField( - _('delete removed media'), - default=False, - help_text=_('Delete media that is no longer on this playlist') - ) - delete_files_on_disk = models.BooleanField( - _('delete files on disk'), - default=False, - help_text=_('Delete files on disk when they are removed from TubeSync') - ) - source_resolution = models.CharField( - _('source resolution'), - max_length=8, - db_index=True, - choices=SourceResolution.choices, - default=SourceResolution.VIDEO_1080P, - help_text=_('Source resolution, desired video resolution to download') - ) - source_vcodec = models.CharField( - _('source video codec'), - max_length=8, - db_index=True, - choices=list(reversed(YouTube_VideoCodec.choices)), - default=YouTube_VideoCodec.VP9, - help_text=_('Source video codec, desired video encoding format to download (ignored if "resolution" is audio only)') - ) - source_acodec = models.CharField( - _('source audio codec'), - max_length=8, - db_index=True, - choices=list(reversed(YouTube_AudioCodec.choices)), - default=YouTube_AudioCodec.OPUS, - help_text=_('Source audio codec, desired audio encoding format to download') - ) - prefer_60fps = models.BooleanField( - _('prefer 60fps'), - default=True, - help_text=_('Where possible, prefer 60fps media for this source') - ) - prefer_hdr = models.BooleanField( - _('prefer hdr'), - default=False, - help_text=_('Where possible, prefer HDR media for this source') - ) - fallback = models.CharField( - _('fallback'), - max_length=1, - db_index=True, - choices=Fallback.choices, - default=Fallback.NEXT_BEST_HD, - help_text=_('What do do when media in your source resolution and codecs is not available') - ) - copy_channel_images = models.BooleanField( - _('copy channel images'), - default=False, - help_text=_('Copy channel banner and avatar. These may be detected and used by some media servers') - ) - copy_thumbnails = models.BooleanField( - _('copy thumbnails'), - default=False, - help_text=_('Copy thumbnails with the media, these may be detected and used by some media servers') - ) - write_nfo = models.BooleanField( - _('write nfo'), - default=False, - help_text=_('Write an NFO file in XML with the media info, these may be detected and used by some media servers') - ) - write_json = models.BooleanField( - _('write json'), - default=False, - help_text=_('Write a JSON file with the media info, these may be detected and used by some media servers') - ) - has_failed = models.BooleanField( - _('has failed'), - default=False, - help_text=_('Source has failed to index media') - ) - - write_subtitles = models.BooleanField( - _('write subtitles'), - default=False, - help_text=_('Download video subtitles') - ) - - auto_subtitles = models.BooleanField( - _('accept auto-generated subs'), - default=False, - help_text=_('Accept auto-generated subtitles') - ) - sub_langs = models.CharField( - _('subs langs'), - max_length=30, - default='en', - help_text=_('List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat'), - validators=[ - RegexValidator( - regex=r"^(\-?[\_\.a-zA-Z-]+(,|$))+", - message=_('Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat') - ) - ] - ) - - def __str__(self): - return self.name - - class Meta: - verbose_name = _('Source') - verbose_name_plural = _('Sources') - - @property - def icon(self): - return self.ICONS.get(self.source_type) - - @property - def slugname(self): - replaced = self.name.replace('_', '-').replace('&', 'and').replace('+', 'and') - return slugify(replaced)[:80] - - def deactivate(self): - self.download_media = False - self.index_streams = False - self.index_videos = False - self.index_schedule = IndexSchedule.NEVER - self.save(update_fields={ - 'download_media', - 'index_streams', - 'index_videos', - 'index_schedule', - }) - - @property - def is_active(self): - active = ( - self.download_media or - self.index_streams or - self.index_videos - ) - return self.index_schedule and active - - @property - def is_audio(self): - return self.source_resolution == SourceResolution.AUDIO.value - - @property - def is_playlist(self): - return self.source_type == YouTube_SourceType.PLAYLIST.value - - @property - def is_video(self): - return not self.is_audio - - @property - def download_cap_date(self): - delta = self.download_cap - if delta > 0: - return timezone.now() - timedelta(seconds=delta) - else: - return False - - @property - def days_to_keep_date(self): - delta = self.days_to_keep - if delta > 0: - return timezone.now() - timedelta(days=delta) - else: - return False - - @property - def extension(self): - ''' - The extension is also used by youtube-dl to set the output container. As - it is possible to quite easily pick combinations of codecs and containers - which are invalid (e.g. OPUS audio in an MP4 container) just set this for - people. All video is set to mkv containers, audio-only is set to m4a or ogg - depending on audio codec. - ''' - if self.is_audio: - if self.source_acodec == Val(YouTube_AudioCodec.MP4A): - return Val(FileExtension.M4A) - elif self.source_acodec == Val(YouTube_AudioCodec.OPUS): - return Val(FileExtension.OGG) - else: - raise ValueError('Unable to choose audio extension, uknown acodec') - else: - return Val(FileExtension.MKV) - - @classmethod - def create_url(obj, source_type, key): - url = obj.URLS.get(source_type) - return url.format(key=key) - - @classmethod - def create_index_url(obj, source_type, key, type): - url = obj.INDEX_URLS.get(source_type) - return url.format(key=key, type=type) - - @property - def url(self): - return Source.create_url(self.source_type, self.key) - - def get_index_url(self, type): - return Source.create_index_url(self.source_type, self.key, type) - - @property - def format_summary(self): - if self.is_audio: - vc = 'none' - else: - vc = self.source_vcodec - ac = self.source_acodec - f = ' 60FPS' if self.is_video and self.prefer_60fps else '' - h = ' HDR' if self.is_video and self.prefer_hdr else '' - return f'{self.source_resolution} (video:{vc}, audio:{ac}){f}{h}'.strip() - - @property - def directory_path(self): - download_dir = Path(media_file_storage.location) - return download_dir / self.type_directory_path - - @property - def type_directory_path(self): - if settings.SOURCE_DOWNLOAD_DIRECTORY_PREFIX: - if self.is_audio: - return Path(settings.DOWNLOAD_AUDIO_DIR) / self.directory - else: - return Path(settings.DOWNLOAD_VIDEO_DIR) / self.directory - else: - return Path(self.directory) - - def make_directory(self): - return os.makedirs(self.directory_path, exist_ok=True) - - @property - def get_image_url(self): - if self.is_playlist: - raise SuspiciousOperation('This source is a playlist so it doesn\'t have thumbnail.') - - return get_youtube_channel_image_info(self.url) - - - def directory_exists(self): - return (os.path.isdir(self.directory_path) and - os.access(self.directory_path, os.W_OK)) - - @property - def key_field(self): - return self.KEY_FIELD.get(self.source_type, '') - - @property - def source_resolution_height(self): - return SourceResolutionInteger.get(self.source_resolution, 0) - - @property - def can_fallback(self): - return self.fallback != Val(Fallback.FAIL) - - @property - def example_media_format_dict(self): - ''' - Populates a dict with real-ish and some placeholder data for media name - format strings. Used for example filenames and media_format validation. - ''' - fmt = [] - if self.source_resolution: - fmt.append(self.source_resolution) - if self.source_vcodec: - fmt.append(self.source_vcodec.lower()) - if self.source_acodec: - fmt.append(self.source_acodec.lower()) - if self.prefer_60fps: - fmt.append('60fps') - if self.prefer_hdr: - fmt.append('hdr') - now = timezone.now() - return { - 'yyyymmdd': now.strftime('%Y%m%d'), - 'yyyy_mm_dd': now.strftime('%Y-%m-%d'), - 'yyyy': now.strftime('%Y'), - 'mm': now.strftime('%m'), - 'dd': now.strftime('%d'), - 'source': self.slugname, - 'source_full': self.name, - 'uploader': 'Some Channel Name', - 'title': 'some-media-title-name', - 'title_full': 'Some Media Title Name', - 'key': 'SoMeUnIqUiD', - 'format': '-'.join(fmt), - 'playlist_title': 'Some Playlist Title', - 'video_order': '01', - 'ext': self.extension, - 'resolution': self.source_resolution if self.source_resolution else '', - 'height': '720' if self.source_resolution else '', - 'width': '1280' if self.source_resolution else '', - 'vcodec': self.source_vcodec.lower() if self.source_vcodec else '', - 'acodec': self.source_acodec.lower(), - 'fps': '24' if self.source_resolution else '', - 'hdr': 'hdr' if self.source_resolution else '' - } - - def get_example_media_format(self): - try: - return self.media_format.format(**self.example_media_format_dict) - except Exception as e: - return '' - - def is_regex_match(self, media_item_title): - if not self.filter_text: - return True - return bool(re.search(self.filter_text, media_item_title)) - - def get_index(self, type): - indexer = self.INDEXERS.get(self.source_type, None) - if not callable(indexer): - raise Exception(f'Source type f"{self.source_type}" has no indexer') - days = None - if self.download_cap_date: - days = timedelta(seconds=self.download_cap).days - response = indexer(self.get_index_url(type=type), days=days) - if not isinstance(response, dict): - return [] - entries = response.get('entries', []) - return entries - - def index_media(self): - ''' - Index the media source returning a list of media metadata as dicts. - ''' - entries = list() - if self.index_videos: - entries += self.get_index('videos') - # Playlists do something different that I have yet to figure out - if not self.is_playlist: - if self.index_streams: - entries += self.get_index('streams') - - if settings.MAX_ENTRIES_PROCESSING: - entries = entries[:settings.MAX_ENTRIES_PROCESSING] - return entries - -def get_media_thumb_path(instance, filename): - # we don't want to use alternate names for thumb files - if instance.thumb: - instance.thumb.delete(save=False) - fileid = str(instance.uuid).lower() - filename = f'{fileid}.jpg' - prefix = fileid[:2] - return Path('thumbs') / prefix / filename - - -def get_media_file_path(instance, filename): - return instance.filepath +from common.utils import ( + clean_filename, clean_emoji, + django_queryset_generator as qs_gen, +) +from ..youtube import ( + get_media_info as get_youtube_media_info, + download_media as download_youtube_media, +) +from ..utils import ( + seconds_to_timestr, parse_media_format, filter_response, + write_text_file, mkdir_p, directory_and_stem, glob_quote, + multi_key_sort, +) +from ..matching import ( + get_best_combined_format, + get_best_audio_format, get_best_video_format, +) +from ..choices import ( + Val, Fallback, MediaState, SourceResolution, + YouTube_AudioCodec, YouTube_VideoCodec, +) +from ._migrations import ( + media_file_storage, get_media_thumb_path, get_media_file_path, +) +from ._private import _srctype_dict, _nfo_element +from .source import Source class Media(models.Model): @@ -1268,6 +726,27 @@ class Media(models.Model): if getattr(settings, 'SHRINK_NEW_MEDIA_METADATA', False): response = filter_response(metadata, True) + # save the new list of thumbnails + thumbnails = self.get_metadata_first_value( + 'thumbnails', + self.get_metadata_first_value('thumbnails', []), + arg_dict=response, + ) + field = self.get_metadata_field('thumbnails') + self.save_to_metadata(field, thumbnails) + + # select and save our best thumbnail url + try: + thumbnail = [ thumb.get('url') for thumb in multi_key_sort( + thumbnails, + [('preference', True,)], + ) if thumb.get('url', '').endswith('.jpg') ][0] + except IndexError: + pass + else: + field = self.get_metadata_field('thumbnail') + self.save_to_metadata(field, thumbnail) + field = self.get_metadata_field('formats') self.save_to_metadata(field, response.get(field, [])) self.save_to_metadata(refreshed_key, response.get('epoch', formats_seconds)) @@ -1290,23 +769,15 @@ class Media(models.Model): return self.get_metadata_first_value(('fulltitle', 'title',), '') def ts_to_dt(self, /, timestamp): - assert timestamp is not None try: timestamp_float = float(timestamp) - except Exception as e: + except (TypeError, ValueError,) as e: log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') pass else: return self.posix_epoch + timedelta(seconds=timestamp_float) return None - def metadata_published(self, timestamp=None): - if timestamp is None: - timestamp = self.get_metadata_first_value( - ('release_timestamp', 'timestamp',) - ) - return self.ts_to_dt(timestamp) - @property def slugtitle(self): replaced = self.title.replace('_', '-').replace('&', 'and').replace('+', 'and') @@ -1481,37 +952,27 @@ class Media(models.Model): nfo = ElementTree.Element('episodedetails') nfo.text = '\n ' # title = media metadata title - title = nfo.makeelement('title', {}) - title.text = clean_emoji(self.title) - title.tail = '\n ' - nfo.append(title) + nfo.append(_nfo_element(nfo, + 'title', clean_emoji(self.title), + )) # showtitle = source name - showtitle = nfo.makeelement('showtitle', {}) - showtitle.text = clean_emoji(str(self.source.name).strip()) - showtitle.tail = '\n ' - nfo.append(showtitle) + nfo.append(_nfo_element(nfo, + 'showtitle', clean_emoji(str(self.source.name).strip()), + )) # season = upload date year - season = nfo.makeelement('season', {}) - if self.source.is_playlist: - # If it's a playlist, set season to 1 - season.text = '1' - else: - # If it's not a playlist, set season to upload date year - season.text = str(self.upload_date.year) if self.upload_date else '' - season.tail = '\n ' - nfo.append(season) + nfo.append(_nfo_element(nfo, + 'season', + '1' if self.source.is_playlist else str( + self.upload_date.year if self.upload_date else '' + ), + )) # episode = number of video in the year - episode = nfo.makeelement('episode', {}) - episode.text = self.get_episode_str() - episode.tail = '\n ' - nfo.append(episode) + nfo.append(_nfo_element(nfo, + 'episode', self.get_episode_str(), + )) # ratings = media metadata youtube rating - value = nfo.makeelement('value', {}) - value.text = str(self.rating) - value.tail = '\n ' - votes = nfo.makeelement('votes', {}) - votes.text = str(self.votes) - votes.tail = '\n ' + value = _nfo_element(nfo, 'value', str(self.rating), indent=6) + votes = _nfo_element(nfo, 'votes', str(self.votes), indent=4) rating_attrs = OrderedDict() rating_attrs['name'] = 'youtube' rating_attrs['max'] = '5' @@ -1528,61 +989,51 @@ class Media(models.Model): ratings.tail = '\n ' nfo.append(ratings) # plot = media metadata description - plot = nfo.makeelement('plot', {}) - plot.text = clean_emoji(str(self.description).strip()) - plot.tail = '\n ' - nfo.append(plot) + nfo.append(_nfo_element(nfo, + 'plot', clean_emoji(str(self.description).strip()), + )) # thumb = local path to media thumbnail - thumb = nfo.makeelement('thumb', {}) - thumb.text = self.thumbname if self.source.copy_thumbnails else '' - thumb.tail = '\n ' - nfo.append(thumb) + nfo.append(_nfo_element(nfo, + 'thumb', self.thumbname if self.source.copy_thumbnails else '', + )) # mpaa = media metadata age requirement - mpaa = nfo.makeelement('mpaa', {}) - mpaa.text = str(self.age_limit) - mpaa.tail = '\n ' if self.age_limit and self.age_limit > 0: - nfo.append(mpaa) + nfo.append(_nfo_element(nfo, + 'mpaa', str(self.age_limit), + )) # runtime = media metadata duration in seconds - runtime = nfo.makeelement('runtime', {}) - runtime.text = str(self.duration) - runtime.tail = '\n ' - nfo.append(runtime) + nfo.append(_nfo_element(nfo, + 'runtime', str(self.duration), + )) # id = media key - idn = nfo.makeelement('id', {}) - idn.text = str(self.key).strip() - idn.tail = '\n ' - nfo.append(idn) + nfo.append(_nfo_element(nfo, + 'id', str(self.key).strip(), + )) # uniqueid = media key uniqueid_attrs = OrderedDict() uniqueid_attrs['type'] = 'youtube' uniqueid_attrs['default'] = 'True' - uniqueid = nfo.makeelement('uniqueid', uniqueid_attrs) - uniqueid.text = str(self.key).strip() - uniqueid.tail = '\n ' - nfo.append(uniqueid) + nfo.append(_nfo_element(nfo, + 'uniqueid', str(self.key).strip(), attrs=uniqueid_attrs, + )) # studio = media metadata uploader - studio = nfo.makeelement('studio', {}) - studio.text = clean_emoji(str(self.uploader).strip()) - studio.tail = '\n ' - nfo.append(studio) + nfo.append(_nfo_element(nfo, + 'studio', clean_emoji(str(self.uploader).strip()), + )) # aired = media metadata uploaded date - aired = nfo.makeelement('aired', {}) upload_date = self.upload_date - aired.text = upload_date.strftime('%Y-%m-%d') if upload_date else '' - aired.tail = '\n ' - nfo.append(aired) + nfo.append(_nfo_element(nfo, + 'aired', upload_date.strftime('%Y-%m-%d') if upload_date else '', + )) # dateadded = date and time media was created in tubesync - dateadded = nfo.makeelement('dateadded', {}) - dateadded.text = self.created.strftime('%Y-%m-%d %H:%M:%S') - dateadded.tail = '\n ' - nfo.append(dateadded) + nfo.append(_nfo_element(nfo, + 'dateadded', self.created.strftime('%Y-%m-%d %H:%M:%S'), + )) # genre = any media metadata categories if they exist for category_str in self.categories: - genre = nfo.makeelement('genre', {}) - genre.text = str(category_str).strip() - genre.tail = '\n ' - nfo.append(genre) + nfo.append(_nfo_element(nfo, + 'genre', str(category_str).strip(), + )) nfo[-1].tail = '\n' # Return XML tree as a prettified string return ElementTree.tostring(nfo, encoding='utf8', method='xml').decode('utf8') @@ -1768,307 +1219,3 @@ class Media(models.Model): except OSError as e: pass - -class Metadata(models.Model): - ''' - Metadata for an indexed `Media` item. - ''' - class Meta: - db_table = 'sync_media_metadata' - verbose_name = _('Metadata about Media') - verbose_name_plural = _('Metadata about Media') - unique_together = ( - ('media', 'site', 'key'), - ) - get_latest_by = ["-retrieved", "-created"] - - uuid = models.UUIDField( - _('uuid'), - primary_key=True, - editable=False, - default=uuid.uuid4, - help_text=_('UUID of the metadata'), - ) - media = models.OneToOneField( - Media, - # on_delete=models.DO_NOTHING, - on_delete=models.SET_NULL, - related_name='new_metadata', - help_text=_('Media the metadata belongs to'), - null=True, - parent_link=False, - ) - site = models.CharField( - _('site'), - max_length=256, - blank=True, - db_index=True, - null=False, - default='Youtube', - help_text=_('Site from which the metadata was retrieved'), - ) - key = models.CharField( - _('key'), - max_length=256, - blank=True, - db_index=True, - null=False, - default='', - help_text=_('Media identifier at the site from which the metadata was retrieved'), - ) - created = models.DateTimeField( - _('created'), - auto_now_add=True, - db_index=True, - help_text=_('Date and time the metadata was created'), - ) - retrieved = models.DateTimeField( - _('retrieved'), - auto_now_add=True, - db_index=True, - help_text=_('Date and time the metadata was retrieved'), - ) - uploaded = models.DateTimeField( - _('uploaded'), - db_index=True, - null=True, - help_text=_('Date and time the media was uploaded'), - ) - published = models.DateTimeField( - _('published'), - db_index=True, - null=True, - help_text=_('Date and time the media was published'), - ) - value = models.JSONField( - _('value'), - encoder=JSONEncoder, - null=False, - default=dict, - help_text=_('JSON metadata object'), - ) - - - def __str__(self): - template = '"{}" from {} at: {}' - return template.format( - self.key, - self.site, - self.retrieved.isoformat(timespec='seconds'), - ) - - @atomic(durable=False) - def ingest_formats(self, formats=list(), /): - number = 0 - for number, format in enumerate(formats, start=1): - mdf, created = self.format.get_or_create(site=self.site, key=self.key, number=number) - mdf.value = format - mdf.save() - if number > 0: - # delete any numbers we did not overwrite or create - self.format.filter(site=self.site, key=self.key, number__gt=number).delete() - - @property - def with_formats(self): - formats = self.format.all().order_by('number') - formats_list = [ f.value for f in qs_gen(formats) ] - metadata = self.value.copy() - metadata.update(dict(formats=formats_list)) - return metadata - - @atomic(durable=False) - def ingest_metadata(self, data): - assert isinstance(data, dict), type(data) - from common.timestamp import timestamp_to_datetime - - try: - self.retrieved = timestamp_to_datetime( - self.media.get_metadata_first_value( - 'epoch', - arg_dict=data, - ) - ) or self.created - except AssertionError: - self.retrieved = self.created - - try: - self.published = timestamp_to_datetime( - self.media.get_metadata_first_value( - ('release_timestamp', 'timestamp',), - arg_dict=data, - ) - ) or self.media.published - except AssertionError: - self.published = self.media.published - - self.value = data.copy() # try not to have side-effects for the caller - formats_key = self.media.get_metadata_field('formats') - formats = self.value.pop(formats_key, list()) - self.uploaded = min( - self.published, - self.retrieved, - self.media.created, - ) - self.save() - self.ingest_formats(formats) - - return self.with_formats - - -class MetadataFormat(models.Model): - ''' - A format from the Metadata for an indexed `Media` item. - ''' - class Meta: - db_table = f'{Metadata._meta.db_table}_format' - verbose_name = _('Format from Media Metadata') - verbose_name_plural = _('Formats from Media Metadata') - unique_together = ( - ('metadata', 'site', 'key', 'number'), - ) - ordering = ['site', 'key', 'number'] - - uuid = models.UUIDField( - _('uuid'), - primary_key=True, - editable=False, - default=uuid.uuid4, - help_text=_('UUID of the format'), - ) - metadata = models.ForeignKey( - Metadata, - # on_delete=models.DO_NOTHING, - on_delete=models.CASCADE, - related_name='format', - help_text=_('Metadata the format belongs to'), - null=False, - ) - site = models.CharField( - _('site'), - max_length=256, - blank=True, - db_index=True, - null=False, - default='Youtube', - help_text=_('Site from which the format is available'), - ) - key = models.CharField( - _('key'), - max_length=256, - blank=True, - db_index=True, - null=False, - default='', - help_text=_('Media identifier at the site from which this format is available'), - ) - number = models.PositiveIntegerField( - _('number'), - blank=False, - null=False, - help_text=_('Ordering number for this format') - ) - value = models.JSONField( - _('value'), - encoder=JSONEncoder, - null=False, - default=dict, - help_text=_('JSON metadata format object'), - ) - - - def __str__(self): - template = '#{:n} "{}" from {}: {}' - return template.format( - self.number, - self.key, - self.site, - self.value.get('format') or self.value.get('format_id'), - ) - - -class MediaServer(models.Model): - ''' - A remote media server, such as a Plex server. - ''' - - ICONS = { - Val(MediaServerType.JELLYFIN): '', - Val(MediaServerType.PLEX): '', - } - HANDLERS = MediaServerType.handlers_dict() - - server_type = models.CharField( - _('server type'), - max_length=1, - db_index=True, - choices=MediaServerType.choices, - default=MediaServerType.PLEX, - help_text=_('Server type') - ) - host = models.CharField( - _('host'), - db_index=True, - max_length=200, - help_text=_('Hostname or IP address of the media server') - ) - port = models.PositiveIntegerField( - _('port'), - db_index=True, - help_text=_('Port number of the media server') - ) - use_https = models.BooleanField( - _('use https'), - default=False, - help_text=_('Connect to the media server over HTTPS') - ) - verify_https = models.BooleanField( - _('verify https'), - default=True, - help_text=_('If connecting over HTTPS, verify the SSL certificate is valid') - ) - options = models.TextField( - _('options'), - blank=False, # valid JSON only - null=True, - help_text=_('JSON encoded options for the media server') - ) - - def __str__(self): - return f'{self.get_server_type_display()} server at {self.url}' - - class Meta: - verbose_name = _('Media Server') - verbose_name_plural = _('Media Servers') - unique_together = ( - ('host', 'port'), - ) - - @property - def url(self): - scheme = 'https' if self.use_https else 'http' - return f'{scheme}://{self.host.strip()}:{self.port}' - - @property - def icon(self): - return self.ICONS.get(self.server_type) - - @property - def handler(self): - handler_class = self.HANDLERS.get(self.server_type) - return handler_class(self) - - @property - def loaded_options(self): - try: - return json.loads(self.options) - except Exception as e: - return {} - - def validate(self): - return self.handler.validate() - - def update(self): - return self.handler.update() - - def get_help_html(self): - return self.handler.HELP diff --git a/tubesync/sync/models/media_server.py b/tubesync/sync/models/media_server.py new file mode 100644 index 00000000..74502fac --- /dev/null +++ b/tubesync/sync/models/media_server.py @@ -0,0 +1,86 @@ +from common.json import JSONEncoder +from django import db +from django.utils.translation import gettext_lazy as _ +from ..choices import Val, MediaServerType + + +class MediaServer(db.models.Model): + ''' + A remote media server, such as a Plex server. + ''' + + ICONS = { + Val(MediaServerType.JELLYFIN): '', + Val(MediaServerType.PLEX): '', + } + HANDLERS = MediaServerType.handlers_dict() + + server_type = db.models.CharField( + _('server type'), + max_length=1, + db_index=True, + choices=MediaServerType.choices, + default=MediaServerType.PLEX, + help_text=_('Server type'), + ) + host = db.models.CharField( + _('host'), + db_index=True, + max_length=200, + help_text=_('Hostname or IP address of the media server'), + ) + port = db.models.PositiveIntegerField( + _('port'), + db_index=True, + help_text=_('Port number of the media server'), + ) + use_https = db.models.BooleanField( + _('use https'), + default=False, + help_text=_('Connect to the media server over HTTPS'), + ) + verify_https = db.models.BooleanField( + _('verify https'), + default=True, + help_text=_('If connecting over HTTPS, verify the SSL certificate is valid'), + ) + options = db.models.JSONField( + _('options'), + encoder=JSONEncoder, + blank=False, + null=True, + help_text=_('Options for the media server'), + ) + + def __str__(self): + return f'{self.get_server_type_display()} server at {self.url}' + + class Meta: + verbose_name = _('Media Server') + verbose_name_plural = _('Media Servers') + unique_together = ( + ('host', 'port'), + ) + + @property + def url(self): + scheme = 'https' if self.use_https else 'http' + return f'{scheme}://{self.host.strip()}:{self.port}' + + @property + def icon(self): + return self.ICONS.get(self.server_type) + + @property + def handler(self): + handler_class = self.HANDLERS.get(self.server_type) + return handler_class(self) + + def validate(self): + return self.handler.validate() + + def update(self): + return self.handler.update() + + def get_help_html(self): + return self.handler.HELP diff --git a/tubesync/sync/models/metadata.py b/tubesync/sync/models/metadata.py new file mode 100644 index 00000000..17d214fb --- /dev/null +++ b/tubesync/sync/models/metadata.py @@ -0,0 +1,153 @@ +import uuid +from common.json import JSONEncoder +from common.timestamp import timestamp_to_datetime +from common.utils import django_queryset_generator as qs_gen +from django import db +from django.utils.translation import gettext_lazy as _ +from .media import Media + + +class Metadata(db.models.Model): + ''' + Metadata for an indexed `Media` item. + ''' + class Meta: + db_table = 'sync_media_metadata' + verbose_name = _('Metadata about Media') + verbose_name_plural = _('Metadata about Media') + unique_together = ( + ('media', 'site', 'key'), + ) + get_latest_by = ["-retrieved", "-created"] + + uuid = db.models.UUIDField( + _('uuid'), + primary_key=True, + editable=False, + default=uuid.uuid4, + help_text=_('UUID of the metadata'), + ) + media = db.models.OneToOneField( + Media, + # on_delete=models.DO_NOTHING, + on_delete=db.models.SET_NULL, + related_name='new_metadata', + help_text=_('Media the metadata belongs to'), + null=True, + parent_link=False, + ) + site = db.models.CharField( + _('site'), + max_length=256, + blank=True, + db_index=True, + null=False, + default='Youtube', + help_text=_('Site from which the metadata was retrieved'), + ) + key = db.models.CharField( + _('key'), + max_length=256, + blank=True, + db_index=True, + null=False, + default='', + help_text=_('Media identifier at the site from which the metadata was retrieved'), + ) + created = db.models.DateTimeField( + _('created'), + auto_now_add=True, + db_index=True, + help_text=_('Date and time the metadata was created'), + ) + retrieved = db.models.DateTimeField( + _('retrieved'), + auto_now_add=True, + db_index=True, + help_text=_('Date and time the metadata was retrieved'), + ) + uploaded = db.models.DateTimeField( + _('uploaded'), + db_index=True, + null=True, + help_text=_('Date and time the media was uploaded'), + ) + published = db.models.DateTimeField( + _('published'), + db_index=True, + null=True, + help_text=_('Date and time the media was published'), + ) + value = db.models.JSONField( + _('value'), + encoder=JSONEncoder, + null=False, + default=dict, + help_text=_('JSON metadata object'), + ) + + + def __str__(self): + template = '"{}" from {} at: {}' + return template.format( + self.key, + self.site, + self.retrieved.isoformat(timespec='seconds'), + ) + + @db.transaction.atomic(durable=False) + def ingest_formats(self, formats=list(), /): + number = 0 + for number, format in enumerate(formats, start=1): + mdf, created = self.format.get_or_create(site=self.site, key=self.key, number=number) + mdf.value = format + mdf.save() + if number > 0: + # delete any numbers we did not overwrite or create + self.format.filter(site=self.site, key=self.key, number__gt=number).delete() + + @property + def with_formats(self): + formats = self.format.all().order_by('number') + formats_list = [ f.value for f in qs_gen(formats) ] + metadata = self.value.copy() + metadata.update(dict(formats=formats_list)) + return metadata + + @db.transaction.atomic(durable=False) + def ingest_metadata(self, data): + assert isinstance(data, dict), type(data) + + try: + self.retrieved = timestamp_to_datetime( + self.media.get_metadata_first_value( + 'epoch', + arg_dict=data, + ) + ) or self.created + except AssertionError: + self.retrieved = self.created + + try: + self.published = timestamp_to_datetime( + self.media.get_metadata_first_value( + ('release_timestamp', 'timestamp',), + arg_dict=data, + ) + ) or self.media.published + except AssertionError: + self.published = self.media.published + + self.value = data.copy() # try not to have side-effects for the caller + formats_key = self.media.get_metadata_field('formats') + formats = self.value.pop(formats_key, list()) + self.uploaded = min( + self.published, + self.retrieved, + self.media.created, + ) + self.save() + self.ingest_formats(formats) + + return self.with_formats + diff --git a/tubesync/sync/models/metadata_format.py b/tubesync/sync/models/metadata_format.py new file mode 100644 index 00000000..c116575b --- /dev/null +++ b/tubesync/sync/models/metadata_format.py @@ -0,0 +1,75 @@ +import uuid +from common.json import JSONEncoder +from django import db +from django.utils.translation import gettext_lazy as _ +from .metadata import Metadata + +class MetadataFormat(db.models.Model): + ''' + A format from the Metadata for an indexed `Media` item. + ''' + class Meta: + db_table = f'{Metadata._meta.db_table}_format' + verbose_name = _('Format from Media Metadata') + verbose_name_plural = _('Formats from Media Metadata') + unique_together = ( + ('metadata', 'site', 'key', 'number'), + ) + ordering = ['site', 'key', 'number'] + + uuid = db.models.UUIDField( + _('uuid'), + primary_key=True, + editable=False, + default=uuid.uuid4, + help_text=_('UUID of the format'), + ) + metadata = db.models.ForeignKey( + Metadata, + # on_delete=models.DO_NOTHING, + on_delete=db.models.CASCADE, + related_name='format', + help_text=_('Metadata the format belongs to'), + null=False, + ) + site = db.models.CharField( + _('site'), + max_length=256, + blank=True, + db_index=True, + null=False, + default='Youtube', + help_text=_('Site from which the format is available'), + ) + key = db.models.CharField( + _('key'), + max_length=256, + blank=True, + db_index=True, + null=False, + default='', + help_text=_('Media identifier at the site from which this format is available'), + ) + number = db.models.PositiveIntegerField( + _('number'), + blank=False, + null=False, + help_text=_('Ordering number for this format'), + ) + value = db.models.JSONField( + _('value'), + encoder=JSONEncoder, + null=False, + default=dict, + help_text=_('JSON metadata format object'), + ) + + + def __str__(self): + template = '#{:n} "{}" from {}: {}' + return template.format( + self.number, + self.key, + self.site, + self.value.get('format') or self.value.get('format_id'), + ) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py new file mode 100644 index 00000000..74f75278 --- /dev/null +++ b/tubesync/sync/models/source.py @@ -0,0 +1,549 @@ +import os +import re +import uuid +from pathlib import Path +from django import db +from django.conf import settings +from django.core.exceptions import SuspiciousOperation +from django.core.validators import RegexValidator +from django.utils import timezone +from django.utils.text import slugify +from django.utils.translation import gettext_lazy as _ +from ..choices import (Val, + SponsorBlock_Category, YouTube_SourceType, IndexSchedule, + CapChoices, Fallback, FileExtension, FilterSeconds, + SourceResolution, SourceResolutionInteger, + YouTube_VideoCodec, YouTube_AudioCodec, +) +from ..fields import CommaSepChoiceField +from ..youtube import ( + get_media_info as get_youtube_media_info, + get_channel_image_info as get_youtube_channel_image_info, +) +from ._migrations import media_file_storage +from ._private import _srctype_dict + + +class Source(db.models.Model): + ''' + A Source is a source of media. Currently, this is either a YouTube channel + or a YouTube playlist. + ''' + + sponsorblock_categories = CommaSepChoiceField( + _(''), + max_length=128, + possible_choices=SponsorBlock_Category.choices, + all_choice='all', + allow_all=True, + all_label='(All Categories)', + default='all', + help_text=_('Select the SponsorBlock categories that you wish to be removed from downloaded videos.'), + ) + embed_metadata = db.models.BooleanField( + _('embed metadata'), + default=False, + help_text=_('Embed metadata from source into file'), + ) + embed_thumbnail = db.models.BooleanField( + _('embed thumbnail'), + default=False, + help_text=_('Embed thumbnail into the file'), + ) + enable_sponsorblock = db.models.BooleanField( + _('enable sponsorblock'), + default=True, + help_text=_('Use SponsorBlock?'), + ) + + # Fontawesome icons used for the source on the front end + ICONS = _srctype_dict('') + + # Format to use to display a URL for the source + URLS = dict(zip( + YouTube_SourceType.values, + ( + 'https://www.youtube.com/c/{key}', + 'https://www.youtube.com/channel/{key}', + 'https://www.youtube.com/playlist?list={key}', + ), + )) + + # Format used to create indexable URLs + INDEX_URLS = dict(zip( + YouTube_SourceType.values, + ( + 'https://www.youtube.com/c/{key}/{type}', + 'https://www.youtube.com/channel/{key}/{type}', + 'https://www.youtube.com/playlist?list={key}', + ), + )) + + # Callback functions to get a list of media from the source + INDEXERS = _srctype_dict(get_youtube_media_info) + + # Field names to find the media ID used as the key when storing media + KEY_FIELD = _srctype_dict('id') + + uuid = db.models.UUIDField( + _('uuid'), + primary_key=True, + editable=False, + default=uuid.uuid4, + help_text=_('UUID of the source'), + ) + created = db.models.DateTimeField( + _('created'), + auto_now_add=True, + db_index=True, + help_text=_('Date and time the source was created'), + ) + last_crawl = db.models.DateTimeField( + _('last crawl'), + db_index=True, + null=True, + blank=True, + help_text=_('Date and time the source was last crawled'), + ) + source_type = db.models.CharField( + _('source type'), + max_length=1, + db_index=True, + choices=YouTube_SourceType.choices, + default=YouTube_SourceType.CHANNEL, + help_text=_('Source type'), + ) + key = db.models.CharField( + _('key'), + max_length=100, + db_index=True, + unique=True, + help_text=_('Source key, such as exact YouTube channel name or playlist ID'), + ) + name = db.models.CharField( + _('name'), + max_length=100, + db_index=True, + unique=True, + help_text=_('Friendly name for the source, used locally in TubeSync only'), + ) + directory = db.models.CharField( + _('directory'), + max_length=100, + db_index=True, + unique=True, + help_text=_('Directory name to save the media into'), + ) + media_format = db.models.CharField( + _('media format'), + max_length=200, + default=settings.MEDIA_FORMATSTR_DEFAULT, + help_text=_('File format to use for saving files, detailed options at bottom of page.'), + ) + index_schedule = db.models.IntegerField( + _('index schedule'), + choices=IndexSchedule.choices, + db_index=True, + default=IndexSchedule.EVERY_24_HOURS, + help_text=_('Schedule of how often to index the source for new media'), + ) + download_media = db.models.BooleanField( + _('download media'), + default=True, + help_text=_('Download media from this source, if not selected the source will only be indexed'), + ) + index_videos = db.models.BooleanField( + _('index videos'), + default=True, + help_text=_('Index video media from this source'), + ) + index_streams = db.models.BooleanField( + _('index streams'), + default=False, + help_text=_('Index live stream media from this source'), + ) + download_cap = db.models.IntegerField( + _('download cap'), + choices=CapChoices.choices, + default=CapChoices.CAP_NOCAP, + help_text=_('Do not download media older than this capped date'), + ) + delete_old_media = db.models.BooleanField( + _('delete old media'), + default=False, + help_text=_('Delete old media after "days to keep" days?'), + ) + days_to_keep = db.models.PositiveSmallIntegerField( + _('days to keep'), + default=14, + help_text=_( + 'If "delete old media" is ticked, the number of days after which ' + 'to automatically delete media' + ), + ) + filter_text = db.models.CharField( + _('filter string'), + max_length=200, + default='', + blank=True, + help_text=_('Regex compatible filter string for video titles'), + ) + filter_text_invert = db.models.BooleanField( + _('invert filter text matching'), + default=False, + help_text=_('Invert filter string regex match, skip any matching titles when selected'), + ) + filter_seconds = db.models.PositiveIntegerField( + _('filter seconds'), + blank=True, + null=True, + help_text=_('Filter Media based on Min/Max duration. Leave blank or 0 to disable filtering'), + ) + filter_seconds_min = db.models.BooleanField( + _('filter seconds min/max'), + choices=FilterSeconds.choices, + default=Val(FilterSeconds.MIN), + help_text=_( + 'When Filter Seconds is > 0, do we skip on minimum (video shorter than limit) or maximum (video ' + 'greater than maximum) video duration' + ), + ) + delete_removed_media = db.models.BooleanField( + _('delete removed media'), + default=False, + help_text=_('Delete media that is no longer on this playlist'), + ) + delete_files_on_disk = db.models.BooleanField( + _('delete files on disk'), + default=False, + help_text=_('Delete files on disk when they are removed from TubeSync'), + ) + source_resolution = db.models.CharField( + _('source resolution'), + max_length=8, + db_index=True, + choices=SourceResolution.choices, + default=SourceResolution.VIDEO_1080P, + help_text=_('Source resolution, desired video resolution to download'), + ) + source_vcodec = db.models.CharField( + _('source video codec'), + max_length=8, + db_index=True, + choices=YouTube_VideoCodec.choices, + default=YouTube_VideoCodec.VP9, + help_text=_('Source video codec, desired video encoding format to download (ignored if "resolution" is audio only)'), + ) + source_acodec = db.models.CharField( + _('source audio codec'), + max_length=8, + db_index=True, + choices=YouTube_AudioCodec.choices, + default=YouTube_AudioCodec.OPUS, + help_text=_('Source audio codec, desired audio encoding format to download'), + ) + prefer_60fps = db.models.BooleanField( + _('prefer 60fps'), + default=True, + help_text=_('Where possible, prefer 60fps media for this source'), + ) + prefer_hdr = db.models.BooleanField( + _('prefer hdr'), + default=False, + help_text=_('Where possible, prefer HDR media for this source'), + ) + fallback = db.models.CharField( + _('fallback'), + max_length=1, + db_index=True, + choices=Fallback.choices, + default=Fallback.NEXT_BEST_HD, + help_text=_('What do do when media in your source resolution and codecs is not available'), + ) + copy_channel_images = db.models.BooleanField( + _('copy channel images'), + default=False, + help_text=_('Copy channel banner and avatar. These may be detected and used by some media servers'), + ) + copy_thumbnails = db.models.BooleanField( + _('copy thumbnails'), + default=False, + help_text=_('Copy thumbnails with the media, these may be detected and used by some media servers'), + ) + write_nfo = db.models.BooleanField( + _('write nfo'), + default=False, + help_text=_('Write an NFO file in XML with the media info, these may be detected and used by some media servers'), + ) + write_json = db.models.BooleanField( + _('write json'), + default=False, + help_text=_('Write a JSON file with the media info, these may be detected and used by some media servers'), + ) + has_failed = db.models.BooleanField( + _('has failed'), + default=False, + help_text=_('Source has failed to index media'), + ) + + write_subtitles = db.models.BooleanField( + _('write subtitles'), + default=False, + help_text=_('Download video subtitles'), + ) + + auto_subtitles = db.models.BooleanField( + _('accept auto-generated subs'), + default=False, + help_text=_('Accept auto-generated subtitles'), + ) + sub_langs = db.models.CharField( + _('subs langs'), + max_length=30, + default='en', + help_text=_('List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat'), + validators=[ + RegexValidator( + regex=r"^(\-?[\_\.a-zA-Z-]+(,|$))+", + message=_('Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat'), + ), + ], + ) + + def __str__(self): + return self.name + + class Meta: + verbose_name = _('Source') + verbose_name_plural = _('Sources') + + @property + def icon(self): + return self.ICONS.get(self.source_type) + + @property + def slugname(self): + replaced = self.name.replace('_', '-').replace('&', 'and').replace('+', 'and') + return slugify(replaced)[:80] + + def deactivate(self): + self.download_media = False + self.index_streams = False + self.index_videos = False + self.index_schedule = IndexSchedule.NEVER + self.save(update_fields={ + 'download_media', + 'index_streams', + 'index_videos', + 'index_schedule', + }) + + @property + def is_active(self): + active = ( + self.download_media or + self.index_streams or + self.index_videos + ) + return self.index_schedule and active + + @property + def is_audio(self): + return self.source_resolution == SourceResolution.AUDIO.value + + @property + def is_playlist(self): + return self.source_type == YouTube_SourceType.PLAYLIST.value + + @property + def is_video(self): + return not self.is_audio + + @property + def download_cap_date(self): + delta = self.download_cap + if delta > 0: + return timezone.now() - timezone.timedelta(seconds=delta) + else: + return False + + @property + def days_to_keep_date(self): + delta = self.days_to_keep + if delta > 0: + return timezone.now() - timezone.timedelta(days=delta) + else: + return False + + @property + def extension(self): + ''' + The extension is also used by youtube-dl to set the output container. As + it is possible to quite easily pick combinations of codecs and containers + which are invalid (e.g. OPUS audio in an MP4 container) just set this for + people. All video is set to mkv containers, audio-only is set to m4a or ogg + depending on audio codec. + ''' + if self.is_audio: + if self.source_acodec == Val(YouTube_AudioCodec.MP4A): + return Val(FileExtension.M4A) + elif self.source_acodec == Val(YouTube_AudioCodec.OPUS): + return Val(FileExtension.OGG) + else: + raise ValueError('Unable to choose audio extension, uknown acodec') + else: + return Val(FileExtension.MKV) + + @classmethod + def create_url(cls, source_type, key): + url = cls.URLS.get(source_type) + return url.format(key=key) + + @classmethod + def create_index_url(cls, source_type, key, type): + url = cls.INDEX_URLS.get(source_type) + return url.format(key=key, type=type) + + @property + def url(self): + return self.__class__.create_url(self.source_type, self.key) + + def get_index_url(self, type): + return self.__class__.create_index_url(self.source_type, self.key, type) + + @property + def format_summary(self): + if self.is_audio: + vc = 'none' + else: + vc = self.source_vcodec + ac = self.source_acodec + f = ' 60FPS' if self.is_video and self.prefer_60fps else '' + h = ' HDR' if self.is_video and self.prefer_hdr else '' + return f'{self.source_resolution} (video:{vc}, audio:{ac}){f}{h}'.strip() + + @property + def directory_path(self): + download_dir = Path(media_file_storage.location) + return download_dir / self.type_directory_path + + @property + def type_directory_path(self): + if settings.SOURCE_DOWNLOAD_DIRECTORY_PREFIX: + if self.is_audio: + return Path(settings.DOWNLOAD_AUDIO_DIR) / self.directory + else: + return Path(settings.DOWNLOAD_VIDEO_DIR) / self.directory + else: + return Path(self.directory) + + def make_directory(self): + return os.makedirs(self.directory_path, exist_ok=True) + + @property + def get_image_url(self): + if self.is_playlist: + raise SuspiciousOperation('This source is a playlist so it doesn\'t have thumbnail.') + + return get_youtube_channel_image_info(self.url) + + + def directory_exists(self): + return (os.path.isdir(self.directory_path) and + os.access(self.directory_path, os.W_OK)) + + @property + def key_field(self): + return self.KEY_FIELD.get(self.source_type, '') + + @property + def source_resolution_height(self): + return SourceResolutionInteger.get(self.source_resolution, 0) + + @property + def can_fallback(self): + return self.fallback != Val(Fallback.FAIL) + + @property + def example_media_format_dict(self): + ''' + Populates a dict with real-ish and some placeholder data for media name + format strings. Used for example filenames and media_format validation. + ''' + fmt = [] + if self.source_resolution: + fmt.append(self.source_resolution) + if self.source_vcodec: + fmt.append(self.source_vcodec.lower()) + if self.source_acodec: + fmt.append(self.source_acodec.lower()) + if self.prefer_60fps: + fmt.append('60fps') + if self.prefer_hdr: + fmt.append('hdr') + now = timezone.now() + return { + 'yyyymmdd': now.strftime('%Y%m%d'), + 'yyyy_mm_dd': now.strftime('%Y-%m-%d'), + 'yyyy': now.strftime('%Y'), + 'mm': now.strftime('%m'), + 'dd': now.strftime('%d'), + 'source': self.slugname, + 'source_full': self.name, + 'uploader': 'Some Channel Name', + 'title': 'some-media-title-name', + 'title_full': 'Some Media Title Name', + 'key': 'SoMeUnIqUiD', + 'format': '-'.join(fmt), + 'playlist_title': 'Some Playlist Title', + 'video_order': '01', + 'ext': self.extension, + 'resolution': self.source_resolution if self.source_resolution else '', + 'height': '720' if self.source_resolution else '', + 'width': '1280' if self.source_resolution else '', + 'vcodec': self.source_vcodec.lower() if self.source_vcodec else '', + 'acodec': self.source_acodec.lower(), + 'fps': '24' if self.source_resolution else '', + 'hdr': 'hdr' if self.source_resolution else '' + } + + def get_example_media_format(self): + try: + return self.media_format.format(**self.example_media_format_dict) + except Exception as e: + return '' + + def is_regex_match(self, media_item_title): + if not self.filter_text: + return True + return bool(re.search(self.filter_text, media_item_title)) + + def get_index(self, type): + indexer = self.INDEXERS.get(self.source_type, None) + if not callable(indexer): + raise Exception(f'Source type f"{self.source_type}" has no indexer') + days = None + if self.download_cap_date: + days = timezone.timedelta(seconds=self.download_cap).days + response = indexer(self.get_index_url(type=type), days=days) + if not isinstance(response, dict): + return [] + entries = response.get('entries', []) + return entries + + def index_media(self): + ''' + Index the media source returning a list of media metadata as dicts. + ''' + entries = list() + if self.index_videos: + entries += self.get_index('videos') + # Playlists do something different that I have yet to figure out + if not self.is_playlist: + if self.index_streams: + entries += self.get_index('streams') + + if settings.MAX_ENTRIES_PROCESSING: + entries = entries[:settings.MAX_ENTRIES_PROCESSING] + return entries + diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index 32b0b5f6..790ce1c2 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -2,6 +2,7 @@ from functools import partial from pathlib import Path from tempfile import TemporaryDirectory from django.conf import settings +from django.db import IntegrityError from django.db.models.signals import pre_save, post_save, pre_delete, post_delete from django.db.transaction import on_commit from django.dispatch import receiver @@ -9,7 +10,7 @@ from django.utils.translation import gettext_lazy as _ from background_task.signals import task_failed from background_task.models import Task from common.logger import log -from .models import Source, Media, MediaServer +from .models import Source, Media, MediaServer, Metadata from .tasks import (delete_task_by_source, delete_task_by_media, index_source_task, download_media_thumbnail, download_media_metadata, map_task_to_instance, check_source_directory_exists, @@ -21,20 +22,6 @@ from .filtering import filter_media from .choices import Val, YouTube_SourceType -def is_relative_to(self, *other): - """Return True if the path is relative to another path or False. - """ - try: - self.relative_to(*other) - return True - except ValueError: - return False - -# patch Path for Python 3.8 -if not hasattr(Path, 'is_relative_to'): - Path.is_relative_to = is_relative_to - - @receiver(pre_save, sender=Source) def source_pre_save(sender, instance, **kwargs): # Triggered before a source is saved, if the schedule has been updated recreate @@ -272,8 +259,15 @@ def media_post_save(sender, instance, created, **kwargs): thumbnail_url, verbose_name=verbose_name.format(instance.name), ) + media_file_exists = False + try: + media_file_exists |= instance.media_file_exists + media_file_exists |= instance.filepath.exists() + except OSError as e: + log.exception(e) + pass # If the media has not yet been downloaded schedule it to be downloaded - if not (instance.media_file_exists or instance.filepath.exists() or existing_media_download_task): + if not (media_file_exists or existing_media_download_task): # The file was deleted after it was downloaded, skip this media. if instance.can_download and instance.downloaded: skip_changed = True != instance.skip @@ -310,12 +304,36 @@ def media_pre_delete(sender, instance, **kwargs): # Remove thumbnail file for deleted media if instance.thumb: instance.thumb.delete(save=False) + # Save the metadata site & thumbnail URL to the metadata column + existing_metadata = instance.loaded_metadata + metadata_str = instance.metadata or '{}' + arg_dict = instance.metadata_loads(metadata_str) + site_field = instance.get_metadata_field('extractor_key') + thumbnail_field = instance.get_metadata_field('thumbnail') + arg_dict.update({ + site_field: instance.get_metadata_first_value( + 'extractor_key', + 'Youtube', + arg_dict=existing_metadata, + ), + thumbnail_field: thumbnail_url, + }) + instance.metadata = instance.metadata_dumps(arg_dict=arg_dict) + # Do not create more tasks before deleting + instance.manual_skip = True + instance.save() @receiver(post_delete, sender=Media) def media_post_delete(sender, instance, **kwargs): # Remove the video file, when configured to do so - if instance.source.delete_files_on_disk and instance.media_file: + remove_files = ( + instance.source and + instance.source.delete_files_on_disk and + instance.downloaded and + instance.media_file + ) + if remove_files: video_path = Path(str(instance.media_file.path)).resolve(strict=False) instance.media_file.delete(save=False) # the other files we created have these known suffixes @@ -370,3 +388,58 @@ def media_post_delete(sender, instance, **kwargs): log.info(f'Deleting file for: {instance} path: {file}') delete_file(file) + # Create a media entry for the indexing task to find + # Requirements: + # source, key, duration, title, published + created = False + create_for_indexing_task = ( + not ( + #not instance.downloaded and + instance.skip and + instance.manual_skip + ) + ) + if create_for_indexing_task: + skipped_media, created = Media.objects.get_or_create( + key=instance.key, + source=instance.source, + ) + if created: + old_metadata = instance.loaded_metadata + site_field = instance.get_metadata_field('extractor_key') + thumbnail_url = instance.thumbnail + thumbnail_field = instance.get_metadata_field('thumbnail') + skipped_media.downloaded = False + skipped_media.duration = instance.duration + arg_dict=dict( + _media_instance_was_deleted=True, + ) + arg_dict.update({ + site_field: old_metadata.get(site_field), + thumbnail_field: thumbnail_url, + }) + skipped_media.metadata = skipped_media.metadata_dumps( + arg_dict=arg_dict, + ) + skipped_media.published = instance.published + skipped_media.title = instance.title + skipped_media.skip = True + skipped_media.manual_skip = True + skipped_media.save() + # Re-use the old metadata if it exists + instance_qs = Metadata.objects.filter( + media__isnull=True, + site=old_metadata.get(site_field) or 'Youtube', + key=skipped_media.key, + ) + try: + instance_qs.update(media=skipped_media) + except IntegrityError: + # Delete the new metadata + Metadata.objects.filter(media=skipped_media).delete() + try: + instance_qs.update(media=skipped_media) + except IntegrityError: + # Delete the old metadata if it still failed + instance_qs.delete() + diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index efd03152..d29e8239 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -8,6 +8,7 @@ import os import json import math import random +import requests import time import uuid from io import BytesIO @@ -29,7 +30,8 @@ from background_task.exceptions import InvalidTaskError from background_task.models import Task, CompletedTask from common.logger import log from common.errors import ( NoFormatException, NoMediaException, - NoMetadataException, DownloadFailedException, ) + NoMetadataException, NoThumbnailException, + DownloadFailedException, ) from common.utils import ( django_queryset_generator as qs_gen, remove_enclosed, ) from .choices import Val, TaskQueue @@ -330,9 +332,13 @@ def index_source_task(source_id): media.duration = float(video.get(fields('duration', media), None) or 0) or None media.title = str(video.get(fields('title', media), ''))[:200] timestamp = video.get(fields('timestamp', media), None) - published_dt = media.metadata_published(timestamp) - if published_dt is not None: - media.published = published_dt + try: + published_dt = media.ts_to_dt(timestamp) + except AssertionError: + pass + else: + if published_dt: + media.published = published_dt try: media.save() except IntegrityError as e: @@ -348,8 +354,21 @@ def index_source_task(source_id): ) if new_media_instance: log.info(f'Indexed new media: {source} / {media}') + log.info(f'Scheduling tasks to download thumbnail for: {media.key}') + thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg' + vn_fmt = _('Downloading {} thumbnail for: "{}": {}') + for prefix in ('hq', 'sd', 'maxres',): + thumbnail_url = thumbnail_fmt.format( + media.key, + prefix, + ) + download_media_thumbnail( + str(media.pk), + thumbnail_url, + verbose_name=vn_fmt.format(prefix, media.key, media.name), + ) log.info(f'Scheduling task to download metadata for: {media.url}') - verbose_name = _('Downloading metadata for: {}: "{}"') + verbose_name = _('Downloading metadata for: "{}": {}') download_media_metadata( str(media.pk), verbose_name=verbose_name.format(media.key, media.name), @@ -500,9 +519,17 @@ def download_media_metadata(media_id): # Media must have a valid upload date if upload_date: media.published = timezone.make_aware(upload_date) - published = media.metadata_published() - if published: - media.published = published + timestamp = media.get_metadata_first_value( + ('release_timestamp', 'timestamp',), + arg_dict=response, + ) + try: + published_dt = media.ts_to_dt(timestamp) + except AssertionError: + pass + else: + if published_dt: + media.published = published_dt # Store title in DB so it's fast to access if media.metadata_title: @@ -536,7 +563,15 @@ def download_media_thumbnail(media_id, url): return width = getattr(settings, 'MEDIA_THUMBNAIL_WIDTH', 430) height = getattr(settings, 'MEDIA_THUMBNAIL_HEIGHT', 240) - i = get_remote_image(url) + try: + try: + i = get_remote_image(url) + except requests.HTTPError as re: + if 404 != re.response.status_code: + raise + raise NoThumbnailException(re.response.reason) from re + except NoThumbnailException as e: + raise InvalidTaskError(str(e.__cause__)) from e if (i.width > width) and (i.height > height): log.info(f'Resizing {i.width}x{i.height} thumbnail to ' f'{width}x{height}: {url}') @@ -555,6 +590,16 @@ def download_media_thumbnail(media_id, url): ) i = image_file = None log.info(f'Saved thumbnail for: {media} from: {url}') + # After media is downloaded, copy the updated thumbnail. + copy_thumbnail = ( + media.downloaded and + media.source.copy_thumbnails and + media.thumb_file_exists + ) + if copy_thumbnail: + log.info(f'Copying media thumbnail from: {media.thumb.path} ' + f'to: {media.thumbpath}') + copyfile(media.thumb.path, media.thumbpath) return True @@ -888,6 +933,10 @@ def delete_all_media_for_source(source_id, source_name, source_directory): for media in qs_gen(mqs): log.info(f'Deleting media for source: {source_name} item: {media.name}') with atomic(): + #media.downloaded = False + media.skip = True + media.manual_skip = True + media.save() media.delete() # Remove the directory, if the user requested that directory_path = Path(source_directory) diff --git a/tubesync/sync/templates/sync/mediaserver.html b/tubesync/sync/templates/sync/mediaserver.html index 23546eba..2626d77c 100644 --- a/tubesync/sync/templates/sync/mediaserver.html +++ b/tubesync/sync/templates/sync/mediaserver.html @@ -28,7 +28,7 @@ Verify HTTPS Verify HTTPS
{% if mediaserver.verify_https %}{% else %}{% endif %} - {% for name, value in mediaserver.loaded_options.items %} + {% for name, value in mediaserver.options.items %} {{ name|title }} {{ name|title }}
{% if name in private_options %}{{ value|truncatechars:6 }} (hidden){% else %}{{ value }}{% endif %} diff --git a/tubesync/sync/templates/sync/task-schedule.html b/tubesync/sync/templates/sync/task-schedule.html new file mode 100644 index 00000000..63af2eb3 --- /dev/null +++ b/tubesync/sync/templates/sync/task-schedule.html @@ -0,0 +1,34 @@ +{% extends 'base.html' %} + +{% block headtitle %}Schedule task{% endblock %} + +{% block content %} +
+
+

Schedule task

+

+ If you don't want to wait for the existing schedule to be triggered, + you can use this to change when the task will be scheduled to run. + It is not guaranteed to run at any exact time, because when a task + requests to run and when a slot to execute it, in the appropriate + queue and with the priority level assigned, is dependent on how long + other tasks are taking to complete the assigned work. +

+

+ This will change the time that the task is requesting to be the + current time, or a chosen future time. +

+
+
+
+
+ {% csrf_token %} + {% include 'simpleform.html' with form=form %} +
+
+ +
+
+
+
+{% endblock %} diff --git a/tubesync/sync/templates/sync/tasks.html b/tubesync/sync/templates/sync/tasks.html index 9cb9dfe1..33f6c40b 100644 --- a/tubesync/sync/templates/sync/tasks.html +++ b/tubesync/sync/templates/sync/tasks.html @@ -43,11 +43,16 @@

{% for task in errors %} - - {{ task }}, attempted {{ task.attempts }} time{{ task.attempts|pluralize }}
- Error: "{{ task.error_message }}"
+
{% empty %} There are no tasks with errors on this page. {% endfor %} @@ -66,11 +71,17 @@

{% for task in scheduled %} - - {{ task }}
- {% if task.instance.index_schedule and task.repeat > 0 %}Scheduled to run {{ task.instance.get_index_schedule_display|lower }}.
{% endif %} - Task will run {% if task.run_now %}immediately{% else %}at {{ task.run_at|date:'Y-m-d H:i:s' }}{% endif %} -
+ {% empty %} There are no scheduled tasks on this page. {% endfor %} diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 303aa18a..24f0d092 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1836,5 +1836,6 @@ class TasksTestCase(TestCase): cleanup_old_media() self.assertEqual(src1.media_source.all().count(), 3) - self.assertEqual(src2.media_source.all().count(), 2) + self.assertEqual(src2.media_source.all().count(), 3) self.assertEqual(Media.objects.filter(pk=m22.pk).exists(), False) + self.assertEqual(Media.objects.filter(source=src2, key=m22.key, skip=True).exists(), True) diff --git a/tubesync/sync/urls.py b/tubesync/sync/urls.py index 9cec74ee..2dffea5d 100644 --- a/tubesync/sync/urls.py +++ b/tubesync/sync/urls.py @@ -3,7 +3,7 @@ from .views import (DashboardView, SourcesView, ValidateSourceView, AddSourceVie SourceView, UpdateSourceView, DeleteSourceView, MediaView, MediaThumbView, MediaItemView, MediaRedownloadView, MediaSkipView, MediaEnableView, MediaContent, TasksView, CompletedTasksView, ResetTasks, - MediaServersView, AddMediaServerView, MediaServerView, + TaskScheduleView, MediaServersView, AddMediaServerView, MediaServerView, DeleteMediaServerView, UpdateMediaServerView) @@ -122,6 +122,18 @@ urlpatterns = [ name='tasks', ), + path( + 'task//schedule/now', + TaskScheduleView.as_view(), + name='run-task', + ), + + path( + 'task//schedule/', + TaskScheduleView.as_view(), + name='schedule-task', + ), + path( 'tasks-completed', CompletedTasksView.as_view(), diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index 917a9531..5bc90d25 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -65,6 +65,7 @@ def get_remote_image(url, force_rgb=True): '(KHTML, like Gecko) Chrome/69.0.3497.64 Safari/537.36') } r = requests.get(url, headers=headers, stream=True, timeout=60) + r.raise_for_status() r.raw.decode_content = True i = Image.open(r.raw) if force_rgb: diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index a23597ce..10cfc5db 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -1,6 +1,5 @@ import glob import os -import json from base64 import b64decode import pathlib import sys @@ -20,11 +19,12 @@ from django.utils.text import slugify from django.utils._os import safe_join from django.utils import timezone from django.utils.translation import gettext_lazy as _ +from common.timestamp import timestamp_to_datetime from common.utils import append_uri_params from background_task.models import Task, CompletedTask from .models import Source, Media, MediaServer from .forms import (ValidateSourceForm, ConfirmDeleteSourceForm, RedownloadMediaForm, - SkipMediaForm, EnableMediaForm, ResetTasksForm, + SkipMediaForm, EnableMediaForm, ResetTasksForm, ScheduleTaskForm, ConfirmDeleteMediaServerForm) from .utils import validate_url, delete_file, multi_key_sort, mkdir_p from .tasks import (map_task_to_instance, get_error_message, @@ -168,6 +168,7 @@ class ValidateSourceView(FormView): template_name = 'sync/source-validate.html' form_class = ValidateSourceForm errors = { + 'invalid_source': _('Invalid type for the source.'), 'invalid_url': _('Invalid URL, the URL must for a "{item}" must be in ' 'the format of "{example}". The error was: {error}.'), } @@ -523,6 +524,9 @@ class MediaThumbView(DetailView): def get(self, request, *args, **kwargs): media = self.get_object() + # Thumbnail media is never updated so we can ask the browser to cache it + # for ages, 604800 = 7 days + max_age = 604800 if media.thumb_file_exists: thumb_path = pathlib.Path(media.thumb.path) thumb = thumb_path.read_bytes() @@ -532,10 +536,10 @@ class MediaThumbView(DetailView): thumb = b64decode('R0lGODlhAQABAIABAP///wAAACH5BAEKAAEALAA' 'AAAABAAEAAAICTAEAOw==') content_type = 'image/gif' + max_age = 600 response = HttpResponse(thumb, content_type=content_type) - # Thumbnail media is never updated so we can ask the browser to cache it - # for ages, 604800 = 7 days - response['Cache-Control'] = 'public, max-age=604800' + + response['Cache-Control'] = f'public, max-age={max_age}' return response @@ -1001,6 +1005,91 @@ class ResetTasks(FormView): return append_uri_params(url, {'message': 'reset'}) +class TaskScheduleView(FormView, SingleObjectMixin): + ''' + Confirm that the task should be re-scheduled. + ''' + + template_name = 'sync/task-schedule.html' + form_class = ScheduleTaskForm + model = Task + errors = dict( + invalid_when=_('The type ({}) was incorrect.'), + when_before_now=_('The date and time must be in the future.'), + ) + + def __init__(self, *args, **kwargs): + self.now = timezone.now() + self.object = None + self.timestamp = None + self.when = None + super().__init__(*args, **kwargs) + + def dispatch(self, request, *args, **kwargs): + self.now = timezone.now() + self.object = self.get_object() + self.timestamp = kwargs.get('timestamp') + try: + self.when = timestamp_to_datetime(self.timestamp) + except AssertionError: + self.when = None + if self.when is None: + self.when = self.now + # Use the next minute and zero seconds + # The web browser does not select seconds by default + self.when = self.when.replace(second=0) + timezone.timedelta(minutes=1) + return super().dispatch(request, *args, **kwargs) + + def get_initial(self): + initial = super().get_initial() + initial['now'] = self.now + initial['when'] = self.when + return initial + + def get_context_data(self, *args, **kwargs): + data = super().get_context_data(*args, **kwargs) + data['now'] = self.now + data['when'] = self.when + return data + + def get_success_url(self): + return append_uri_params( + reverse_lazy('sync:tasks'), + dict( + message='scheduled', + pk=str(self.object.pk), + ), + ) + + def form_valid(self, form): + max_attempts = getattr(settings, 'MAX_ATTEMPTS', 15) + when = form.cleaned_data.get('when') + + if not isinstance(when, self.now.__class__): + form.add_error( + 'when', + ValidationError( + self.errors['invalid_when'].format( + type(when), + ), + ), + ) + if when < self.now: + form.add_error( + 'when', + ValidationError(self.errors['when_before_now']), + ) + + if form.errors: + return super().form_invalid(form) + + self.object.attempts = max_attempts // 2 + self.object.run_at = max(self.now, when) + self.object.save() + + return super().form_valid(form) + + class MediaServersView(ListView): ''' List of media servers which have been added. @@ -1063,14 +1152,14 @@ class AddMediaServerView(FormView): def form_valid(self, form): # Assign mandatory fields, bundle other fields into options mediaserver = MediaServer(server_type=self.server_type) - options = {} + options = dict() model_fields = [field.name for field in MediaServer._meta.fields] for field_name, field_value in form.cleaned_data.items(): if field_name in model_fields: setattr(mediaserver, field_name, field_value) else: options[field_name] = field_value - mediaserver.options = json.dumps(options) + mediaserver.options = options # Test the media server details are valid try: mediaserver.validate() @@ -1177,21 +1266,21 @@ class UpdateMediaServerView(FormView, SingleObjectMixin): for field in self.object._meta.fields: if field.name in self.form_class.declared_fields: initial[field.name] = getattr(self.object, field.name) - for option_key, option_val in self.object.loaded_options.items(): + for option_key, option_val in self.object.options.items(): if option_key in self.form_class.declared_fields: initial[option_key] = option_val return initial def form_valid(self, form): # Assign mandatory fields, bundle other fields into options - options = {} + options = dict() model_fields = [field.name for field in MediaServer._meta.fields] for field_name, field_value in form.cleaned_data.items(): if field_name in model_fields: setattr(self.object, field_name, field_value) else: options[field_name] = field_value - self.object.options = json.dumps(options) + self.object.options = options # Test the media server details are valid try: self.object.validate() diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index ffcbb074..9814db24 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -198,6 +198,7 @@ def get_media_info(url, /, *, days=None, info_json=None): 'clean_infojson': False, 'daterange': yt_dlp.utils.DateRange(start=start), 'extractor_args': { + 'youtube': {'formats': ['missing_pot']}, 'youtubetab': {'approximate_date': ['true']}, }, 'outtmpl': outtmpl, @@ -335,12 +336,15 @@ def download_media( ) # assignment is the quickest way to cover both 'get' cases pp_opts.exec_cmd['after_move'] = cmds + elif '+' not in media_format: + pp_opts.remuxvideo = extension ytopts = { 'format': media_format, 'final_ext': extension, 'merge_output_format': extension, 'outtmpl': os.path.basename(output_file), + 'remuxvideo': pp_opts.remuxvideo, 'quiet': False if settings.DEBUG else True, 'verbose': True if settings.DEBUG else False, 'noprogress': None if settings.DEBUG else True, @@ -355,6 +359,7 @@ def download_media( 'sleep_interval': 10, 'max_sleep_interval': min(20*60, max(60, settings.DOWNLOAD_MEDIA_DELAY)), 'sleep_interval_requests': 1 + (2 * settings.BACKGROUND_TASK_ASYNC_THREADS), + 'extractor_args': opts.get('extractor_args', dict()), 'paths': opts.get('paths', dict()), 'postprocessor_args': opts.get('postprocessor_args', dict()), 'postprocessor_hooks': opts.get('postprocessor_hooks', list()), @@ -378,6 +383,18 @@ def download_media( 'temp': str(temp_dir_path), }) + # Allow download of formats that tested good with 'missing_pot' + youtube_ea_dict = ytopts['extractor_args'].get('youtube', dict()) + formats_list = youtube_ea_dict.get('formats', list()) + if 'missing_pot' not in formats_list: + formats_list += ('missing_pot',) + youtube_ea_dict.update({ + 'formats': formats_list, + }) + ytopts['extractor_args'].update({ + 'youtube': youtube_ea_dict, + }) + postprocessor_hook_func = postprocessor_hook.get('function', None) if postprocessor_hook_func: ytopts['postprocessor_hooks'].append(postprocessor_hook_func) diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index dac5896f..7f5922ae 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -1,3 +1,4 @@ +from django import VERSION as DJANGO_VERSION from pathlib import Path from common.utils import getenv @@ -7,7 +8,7 @@ CONFIG_BASE_DIR = BASE_DIR DOWNLOADS_BASE_DIR = BASE_DIR -VERSION = '0.15.0' +VERSION = '0.15.2' SECRET_KEY = '' DEBUG = False ALLOWED_HOSTS = [] @@ -99,7 +100,10 @@ AUTH_PASSWORD_VALIDATORS = [ LANGUAGE_CODE = 'en-us' TIME_ZONE = getenv('TZ', 'UTC') USE_I18N = True -USE_L10N = True +# Removed in Django 5.0, set to True by default in Django 4.0 +# https://docs.djangoproject.com/en/4.1/releases/4.0/#localization +if DJANGO_VERSION[0:3] < (4, 0, 0): + USE_L10N = True USE_TZ = True @@ -135,7 +139,7 @@ HEALTHCHECK_ALLOWED_IPS = ('127.0.0.1',) MAX_ATTEMPTS = 15 # Number of times tasks will be retried -MAX_RUN_TIME = 1*(24*60*60) # Maximum amount of time in seconds a task can run +MAX_RUN_TIME = 12*(60*60) # Maximum amount of time in seconds a task can run BACKGROUND_TASK_RUN_ASYNC = False # Run tasks async in the background BACKGROUND_TASK_ASYNC_THREADS = 1 # Number of async tasks to run at once MAX_BACKGROUND_TASK_ASYNC_THREADS = 8 # For sanity reasons @@ -173,6 +177,8 @@ YOUTUBE_DEFAULTS = { 'cachedir': False, # Disable on-disk caching 'addmetadata': True, # Embed metadata during postprocessing where available 'geo_verification_proxy': getenv('geo_verification_proxy').strip() or None, + 'max_sleep_interval': (60)*5, + 'sleep_interval': 0.25, } COOKIES_FILE = CONFIG_BASE_DIR / 'cookies.txt' @@ -210,7 +216,7 @@ except: if MAX_RUN_TIME < 600: MAX_RUN_TIME = 600 -DOWNLOAD_MEDIA_DELAY = 60 + (MAX_RUN_TIME / 50) +DOWNLOAD_MEDIA_DELAY = 1 + round(MAX_RUN_TIME / 100) if BACKGROUND_TASK_ASYNC_THREADS > MAX_BACKGROUND_TASK_ASYNC_THREADS: BACKGROUND_TASK_ASYNC_THREADS = MAX_BACKGROUND_TASK_ASYNC_THREADS