Merge branch 'main' into patch-17

This commit is contained in:
tcely 2025-06-11 06:51:58 -04:00 committed by GitHub
commit 6c5951c219
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
53 changed files with 621 additions and 162 deletions

View File

@ -132,6 +132,8 @@ jobs:
pip install --system --strict --requirements requirements.txt
- name: Set up Django environment
run: |
mkdir -v -p ~/.config/TubeSync/config
sudo ln -v -s -f -T ~/.config/TubeSync/config /config
cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py
cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/background_task/ patches/background_task/*
cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/*

View File

@ -25,6 +25,7 @@ ARG TARGETARCH
ENV DEBIAN_FRONTEND="noninteractive" \
APT_KEEP_ARCHIVES=1 \
EDITOR="editor" \
HOME="/root" \
LANGUAGE="en_US.UTF-8" \
LANG="en_US.UTF-8" \
@ -321,6 +322,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va
apt-get -y autoclean && \
rm -v -f /var/cache/debconf/*.dat-old
# The preference for openresty over nginx,
# is for the newer version.
FROM tubesync-openresty AS tubesync
ARG S6_VERSION
@ -342,20 +345,31 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va
# Install dependencies we keep
# Install required distro packages
apt-get -y --no-install-recommends install \
libjpeg62-turbo \
libmariadb3 \
libpq5 \
libwebp7 \
libonig5 \
pkgconf \
python3 \
python3-libsass \
python3-pip-whl \
python3-socks \
curl \
indent \
less \
lua-lpeg \
tre-agrep \
vis \
xxd \
&& \
# Link to the current python3 version
ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \
# Configure the editor alternatives
touch /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \
update-alternatives --install /usr/bin/editor editor /usr/local/bin/babi 50 && \
update-alternatives --install /usr/local/bin/nano nano /bin/nano 10 && \
update-alternatives --install /usr/local/bin/nano nano /usr/local/bin/babi 20 && \
update-alternatives --install /usr/local/bin/vim vim /usr/bin/vim.tiny 15 && \
update-alternatives --install /usr/local/bin/vim vim /usr/bin/vis 35 && \
rm -v /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \
# Create a 'app' user which the application will run as
groupadd app && \
useradd -M -d /app -s /bin/false -g app app && \
@ -407,6 +421,7 @@ RUN --mount=type=tmpfs,target=/cache \
g++ \
gcc \
libjpeg-dev \
libonig-dev \
libpq-dev \
libwebp-dev \
make \
@ -450,6 +465,7 @@ RUN --mount=type=tmpfs,target=/cache \
g++ \
gcc \
libjpeg-dev \
libonig-dev \
libpq-dev \
libwebp-dev \
make \
@ -459,8 +475,21 @@ RUN --mount=type=tmpfs,target=/cache \
&& \
apt-get -y autopurge && \
apt-get -y autoclean && \
LD_LIBRARY_PATH=/usr/local/lib/python3/dist-packages/pillow.libs:/usr/local/lib/python3/dist-packages/psycopg_binary.libs \
find /usr/local/lib/python3/dist-packages/ \
-name '*.so*' -print \
-exec du -h '{}' ';' \
-exec ldd '{}' ';' \
>| /cache/python-shared-objects 2>&1 && \
rm -v -f /var/cache/debconf/*.dat-old && \
rm -v -rf /tmp/*
rm -v -rf /tmp/* ; \
if grep >/dev/null -Fe ' => not found' /cache/python-shared-objects ; \
then \
cat -v /cache/python-shared-objects ; \
printf -- 1>&2 '%s\n' \
ERROR: ' An unresolved shared object was found.' ; \
exit 1 ; \
fi
# Copy root
COPY config/root /
@ -480,13 +509,14 @@ COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings.
# Build app
RUN set -x && \
# Make absolutely sure we didn't accidentally bundle a SQLite dev database
rm -rf /app/db.sqlite3 && \
test '!' -e /app/db.sqlite3 && \
# Run any required app commands
/usr/bin/python3 -B /app/manage.py compilescss && \
/usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \
rm -rf /config /downloads /run/app && \
# Create config, downloads and run dirs
mkdir -v -p /run/app && \
mkdir -v -p /config/media && \
mkdir -v -p /config/media /config/tasks && \
mkdir -v -p /config/cache/pycache && \
mkdir -v -p /downloads/audio && \
mkdir -v -p /downloads/video && \

View File

@ -8,6 +8,7 @@ autopep8 = "*"
[packages]
django = "~=5.2.1"
django-huey = "*"
django-sass-processor = {extras = ["management-command"], version = "*"}
pillow = "*"
whitenoise = "*"
@ -25,3 +26,4 @@ emoji = "*"
brotli = "*"
html5lib = "*"
bgutil-ytdlp-pot-provider = "~=1.0"
babi = "*"

View File

@ -0,0 +1 @@
bundle

View File

@ -0,0 +1 @@
bundle

View File

@ -0,0 +1 @@
SIGINT

View File

@ -0,0 +1,5 @@
#!/command/with-contenv bash
exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \
/usr/bin/python3 /app/manage.py djangohuey \
--queue database

View File

@ -0,0 +1 @@
longrun

View File

@ -0,0 +1 @@
SIGINT

View File

@ -0,0 +1,5 @@
#!/command/with-contenv bash
exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \
/usr/bin/python3 /app/manage.py djangohuey \
--queue filesystem

View File

@ -0,0 +1 @@
longrun

View File

@ -0,0 +1 @@
SIGINT

View File

@ -0,0 +1,5 @@
#!/command/with-contenv bash
exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \
/usr/bin/python3 /app/manage.py djangohuey \
--queue limited

View File

@ -0,0 +1 @@
longrun

View File

@ -0,0 +1 @@
SIGINT

View File

@ -0,0 +1,5 @@
#!/command/with-contenv bash
exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \
/usr/bin/python3 /app/manage.py djangohuey \
--queue network

View File

@ -0,0 +1 @@
longrun

View File

@ -4,6 +4,21 @@
groupmod -o -g "${PGID:=911}" app
usermod -o -u "${PUID:=911}" app
# Ensure /config directories exist
mkdir -v -p /config/{cache,media,tasks,tubesync}
# Copy local_settings.py for the user
if [ -f /config/tubesync/local_settings.py ]
then
# from the image for an example or comparison
cp -v -p /app/tubesync/local_settings.py \
/config/tubesync/local_settings.py.image
# to the image to apply the user's settings
cp -v -p /config/tubesync/local_settings.py \
/app/tubesync/local_settings.py
fi
# Reset permissions
chown -R app:app /run/app
chmod -R 0700 /run/app

View File

@ -42,3 +42,8 @@ class DatabaseConnectionError(Exception):
Raised when parsing or initially connecting to a database.
'''
pass
class BgTaskWorkerError(Exception):
# Raised when the worker process is not in a normal working state.
pass

37
tubesync/common/huey.py Normal file
View File

@ -0,0 +1,37 @@
def sqlite_tasks(key, /, prefix=None):
name_fmt = 'huey_{}'
if prefix is None:
prefix = ''
if prefix:
name_fmt = f'huey_{prefix}_' + '{}'
name = name_fmt.format(key)
return dict(
huey_class='huey.SqliteHuey',
name=name,
immediate=False,
results=True,
store_none=False,
utc=True,
compression=True,
connection=dict(
filename=f'/config/tasks/{name}.db',
fsync=True,
strict_fifo=True,
),
consumer=dict(
workers=1,
worker_type='process',
max_delay=20.0,
flush_locks=True,
scheduler_interval=10,
simple_log=False,
# verbose has three positions:
# DEBUG: True
# INFO: None
# WARNING: False
verbose=False,
),
)

View File

@ -7,9 +7,19 @@ import pstats
import string
import time
from django.core.paginator import Paginator
from functools import partial
from operator import attrgetter, itemgetter
from pathlib import Path
from urllib.parse import urlunsplit, urlencode, urlparse
from .errors import DatabaseConnectionError
def directory_and_stem(arg_path, /, all_suffixes=False):
filepath = Path(arg_path)
stem = Path(filepath.stem)
while all_suffixes and stem.suffixes and '' != stem.suffix:
stem = Path(stem.stem)
return (filepath.parent, str(stem),)
def getenv(key, default=None, /, *, integer=False, string=True):
'''
@ -46,6 +56,51 @@ def getenv(key, default=None, /, *, integer=False, string=True):
return r
def glob_quote(filestr, /):
_glob_specials = {
'?': '[?]',
'*': '[*]',
'[': '[[]',
']': '[]]', # probably not needed, but it won't hurt
}
if not isinstance(filestr, str):
raise TypeError(f'expected a str, got "{type(filestr)}"')
return filestr.translate(str.maketrans(_glob_specials))
def list_of_dictionaries(arg_list, /, arg_function=lambda x: x):
assert callable(arg_function)
if isinstance(arg_list, list):
_map_func = partial(lambda f, d: f(d) if isinstance(d, dict) else d, arg_function)
return (True, list(map(_map_func, arg_list)),)
return (False, arg_list,)
def mkdir_p(arg_path, /, *, mode=0o777):
'''
Reminder: mode only affects the last directory
'''
dirpath = Path(arg_path)
return dirpath.mkdir(mode=mode, parents=True, exist_ok=True)
def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None):
result = list(iterable)
if key_func is None:
# itemgetter is the default
if item or not (item or attr):
key_func = itemgetter
elif attr:
key_func = attrgetter
for key, reverse in reversed(specs):
result.sort(key=key_func(key), reverse=reverse)
if use_reversed:
return list(reversed(result))
return result
def parse_database_connection_string(database_connection_string):
'''
Parses a connection string in a URL style format, such as:
@ -167,6 +222,15 @@ def clean_emoji(s):
return emoji.replace_emoji(s)
def seconds_to_timestr(seconds):
seconds = seconds % (24 * 3600)
hour = seconds // 3600
seconds %= 3600
minutes = seconds // 60
seconds %= 60
return '{:02d}:{:02d}:{:02d}'.format(hour, minutes, seconds)
def time_func(func):
def wrapper(*args, **kwargs):
start = time.perf_counter()

View File

@ -1,26 +1,51 @@
#!/usr/bin/env sh
dir='/run/service'
_dir='/run/service'
svc_path() (
cd "${dir}"
cd "${_dir}" &&
realpath -e -s "$@"
)
_bundles="$(
find '/etc/s6-overlay/s6-rc.d' -mindepth 2 -maxdepth 2 \
-name 'type' \
-execdir grep -F -q -e bundle '{}' ';' \
-printf '%P\n' | \
sed -e 's,/type$,,' ;
)"
is_a_bundle() {
local bundle
for bundle in ${_bundles}
do
if [ "$1" = "${bundle}" ]
then
return 0
fi
done
return 1
}
if [ 0 -eq $# ]
then
set -- \
$( cd "${dir}" && svc_path tubesync*-worker ) \
"$( svc_path gunicorn )" \
"$( svc_path nginx )"
set -- $(/command/s6-rc list user | grep -v -e '-init$')
fi
for service in $( svc_path "$@" )
for arg in "$@"
do
printf -- 'Restarting %-28s' "${service#${dir}/}..."
_began="$( date '+%s' )"
/command/s6-svc -wr -r "${service}"
_ended="$( date '+%s' )"
printf -- '\tcompleted (in %2.1d seconds).\n' \
"$( expr "${_ended}" - "${_began}" )"
_svcs="${arg}"
if is_a_bundle "${arg}"
then
_svcs="$(/command/s6-rc list "${arg}" | grep -v -e '-init$')"
fi
for service in $(svc_path ${_svcs})
do
printf -- 'Restarting %-28s' "${service#${_dir}/}..."
_began="$( date '+%s' )"
/command/s6-svc -wr -r "${service}"
_ended="$( date '+%s' )"
printf -- '\tcompleted (in %2.1d seconds).\n' \
"$( expr "${_ended}" - "${_began}" )"
done
done
unset -v _began _ended service
unset -v _began _ended _svcs arg service
unset -v _bundles _dir

View File

@ -165,6 +165,7 @@ class TaskQueue(models.TextChoices):
DB = 'database', _('Database')
FS = 'filesystem', _('Filesystem')
NET = 'network', _('Networking')
LIMIT = 'limited', _('Limited Networking')
class WeekDay(models.IntegerChoices):

View File

@ -6,7 +6,7 @@
from .choices import Val, Fallback
from .utils import multi_key_sort
from common.utils import multi_key_sort
from django.conf import settings

View File

@ -0,0 +1,58 @@
# Generated by Django 5.2.1 on 2025-05-28 09:57
import django.db.models.deletion
import django.utils.timezone
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sync', '0034_source_target_schedule_and_more'),
]
operations = [
migrations.AlterUniqueTogether(
name='metadata',
unique_together={('media', 'site', 'key')},
),
migrations.AddField(
model_name='metadata',
name='source',
field=models.ForeignKey(
blank=True,
help_text='Source from which the video was retrieved',
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name='videos',
related_query_name='video',
to='sync.source',
),
),
migrations.AlterField(
model_name='metadata',
name='media',
field=models.OneToOneField(
blank=True,
help_text='Media the metadata belongs to',
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='new_metadata',
to='sync.media',
),
),
migrations.AlterField(
model_name='metadata',
name='retrieved',
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
help_text='Date and time the metadata was retrieved',
verbose_name='retrieved',
),
),
migrations.AlterUniqueTogether(
name='metadata',
unique_together={('media', 'site', 'key'), ('source', 'site', 'key')},
),
]

View File

@ -1,4 +1,3 @@
from pathlib import Path
from ..choices import Val, YouTube_SourceType # noqa
@ -11,11 +10,3 @@ def _nfo_element(nfo, label, text, /, *, attrs={}, tail='\n', char=' ', indent=2
element.tail = tail + (char * indent)
return element
def directory_and_stem(arg_path, /, all_suffixes=False):
filepath = Path(arg_path)
stem = Path(filepath.stem)
while all_suffixes and stem.suffixes and '' != stem.suffix:
stem = Path(stem.stem)
stem = str(stem)
return (filepath.parent, stem,)

View File

@ -17,15 +17,15 @@ from common.logger import log
from common.errors import NoFormatException
from common.json import JSONEncoder
from common.utils import (
clean_filename, clean_emoji,
clean_filename, clean_emoji, directory_and_stem,
glob_quote, mkdir_p, multi_key_sort, seconds_to_timestr,
)
from ..youtube import (
get_media_info as get_youtube_media_info,
download_media as download_youtube_media,
)
from ..utils import (
seconds_to_timestr, parse_media_format, filter_response,
write_text_file, mkdir_p, glob_quote, multi_key_sort,
filter_response, parse_media_format, write_text_file,
)
from ..matching import (
get_best_combined_format,
@ -38,7 +38,7 @@ from ..choices import (
from ._migrations import (
media_file_storage, get_media_thumb_path, get_media_file_path,
)
from ._private import _srctype_dict, _nfo_element, directory_and_stem
from ._private import _srctype_dict, _nfo_element
from .media__tasks import (
download_checklist, download_finished, wait_for_premiere,
)
@ -566,7 +566,11 @@ class Media(models.Model):
@property
def has_metadata(self):
return self.metadata is not None
result = self.metadata is not None
if not result:
return False
value = self.get_metadata_first_value(('id', 'display_id', 'channel_id', 'uploader_id',))
return value is not None
def metadata_clear(self, /, *, save=False):
@ -600,8 +604,10 @@ class Media(models.Model):
arg_dict=data,
)
md_model = self._meta.fields_map.get('new_metadata').related_model
md, created = md_model.objects.get_or_create(
media_id=self.pk,
md, created = md_model.objects.filter(
source__isnull=True,
).get_or_create(
media=self,
site=site,
key=self.key,
)
@ -697,8 +703,7 @@ class Media(models.Model):
data = self.loaded_metadata
metadata_seconds = data.get('epoch', None)
if not metadata_seconds:
self.metadata = None
self.save(update_fields={'metadata'})
self.metadata_clear(save=True)
return False
now = timezone.now()

View File

@ -3,8 +3,9 @@ from common.json import JSONEncoder
from common.timestamp import timestamp_to_datetime
from common.utils import django_queryset_generator as qs_gen
from django import db
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from .media import Media
from .media import Media, Source
class Metadata(db.models.Model):
@ -17,6 +18,7 @@ class Metadata(db.models.Model):
verbose_name_plural = _('Metadata about Media')
unique_together = (
('media', 'site', 'key'),
('source', 'site', 'key', ),
)
get_latest_by = ["-retrieved", "-created"]
@ -27,12 +29,22 @@ class Metadata(db.models.Model):
default=uuid.uuid4,
help_text=_('UUID of the metadata'),
)
source = db.models.ForeignKey(
Source,
on_delete=db.models.CASCADE,
related_name="videos",
related_query_name="video",
help_text=_('Source from which the video was retrieved'),
blank=True,
null=True,
)
media = db.models.OneToOneField(
Media,
# on_delete=models.DO_NOTHING,
on_delete=db.models.SET_NULL,
related_name='new_metadata',
help_text=_('Media the metadata belongs to'),
blank=True,
null=True,
parent_link=False,
)
@ -62,8 +74,8 @@ class Metadata(db.models.Model):
)
retrieved = db.models.DateTimeField(
_('retrieved'),
auto_now_add=True,
db_index=True,
default=timezone.now,
help_text=_('Date and time the metadata was retrieved'),
)
uploaded = db.models.DateTimeField(

View File

@ -404,7 +404,7 @@ class Source(db.models.Model):
)
elif Val(IndexSchedule.EVERY_7_DAYS) > self.index_schedule:
self.target_schedule = advance_hour(
when.replace(hour=1+when.hour),
when + timezone.timedelta(hours=1),
self.target_schedule.hour,
)

View File

@ -4,12 +4,13 @@ from tempfile import TemporaryDirectory
from django.conf import settings
from django.db import IntegrityError
from django.db.models.signals import pre_save, post_save, pre_delete, post_delete
from django.db.transaction import on_commit
from django.db.transaction import atomic, on_commit
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from background_task.signals import task_failed
from background_task.models import Task
from common.logger import log
from common.utils import glob_quote, mkdir_p
from .models import Source, Media, Metadata
from .tasks import (delete_task_by_source, delete_task_by_media, index_source_task,
download_media_thumbnail, download_media_metadata,
@ -17,7 +18,7 @@ from .tasks import (delete_task_by_source, delete_task_by_media, index_source_ta
download_media, download_source_images,
delete_all_media_for_source, save_all_media_for_source,
rename_media, get_media_metadata_task, get_media_download_task)
from .utils import delete_file, glob_quote, mkdir_p
from .utils import delete_file
from .filtering import filter_media
from .choices import Val, YouTube_SourceType
@ -429,17 +430,40 @@ def media_post_delete(sender, instance, **kwargs):
# Re-use the old metadata if it exists
instance_qs = Metadata.objects.filter(
media__isnull=True,
source__isnull=True,
site=old_metadata.get(site_field) or 'Youtube',
key=skipped_media.key,
)
try:
instance_qs.update(media=skipped_media)
if instance_qs.count():
with atomic(durable=False):
# clear the link to a media instance
Metadata.objects.filter(media=skipped_media).update(media=None)
# choose the oldest metadata for our key
md = instance_qs.filter(
key=skipped_media.key,
).order_by(
'key',
'created',
).first()
# set the link to a media instance only on our selected metadata
log.info(f'Reusing old metadata for "{skipped_media.key}": {skipped_media.name}')
instance_qs.filter(uuid=md.uuid).update(media=skipped_media)
# delete any metadata that we are no longer using
instance_qs.exclude(uuid=md.uuid).delete()
except IntegrityError:
# Delete the new metadata
Metadata.objects.filter(media=skipped_media).delete()
# this probably won't happen, but try it without a transaction
try:
instance_qs.update(media=skipped_media)
except IntegrityError:
# Delete the old metadata if it still failed
# clear the link to a media instance
Metadata.objects.filter(media=skipped_media).update(media=None)
# keep one metadata
md = instance_qs.order_by('created').first()
instance_qs.filter(uuid=md.uuid).update(media=skipped_media)
except IntegrityError as e:
log.exception(f'media_post_delete: could not update selected metadata: {e}')
finally:
log.debug(f'Deleting metadata for "{skipped_media.key}": {skipped_media.pk}')
# delete the old metadata
instance_qs.delete()

View File

@ -10,6 +10,7 @@ import random
import requests
import time
import uuid
from collections import deque as queue
from io import BytesIO
from hashlib import sha1
from pathlib import Path
@ -19,7 +20,7 @@ from django import db
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.files.uploadedfile import SimpleUploadedFile
from django.db import DatabaseError, IntegrityError
from django.db import DatabaseError
from django.db.transaction import atomic
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
@ -27,15 +28,15 @@ from background_task import background
from background_task.exceptions import InvalidTaskError
from background_task.models import Task, CompletedTask
from common.logger import log
from common.errors import ( NoFormatException, NoMediaException,
NoThumbnailException,
DownloadFailedException, )
from common.errors import ( BgTaskWorkerError, DownloadFailedException,
NoFormatException, NoMediaException,
NoThumbnailException, )
from common.utils import ( django_queryset_generator as qs_gen,
remove_enclosed, )
remove_enclosed, seconds_to_timestr, )
from .choices import Val, TaskQueue
from .models import Source, Media, MediaServer
from .models import Source, Media, MediaServer, Metadata
from .utils import ( get_remote_image, resize_image_to_height,
write_text_file, filter_response, seconds_to_timestr, )
write_text_file, filter_response, )
from .youtube import YouTubeError
db_vendor = db.connection.vendor
@ -56,6 +57,7 @@ def map_task_to_instance(task):
because UUID's are incompatible with background_task's "creator" feature.
'''
TASK_MAP = {
'sync.tasks.migrate_to_metadata': Media,
'sync.tasks.index_source_task': Source,
'sync.tasks.check_source_directory_exists': Source,
'sync.tasks.download_media_thumbnail': Media,
@ -302,6 +304,70 @@ def cleanup_removed_media(source, video_keys):
schedule_media_servers_update()
def save_db_batch(qs, objs, fields, /):
assert hasattr(qs, 'bulk_update')
assert callable(qs.bulk_update)
assert hasattr(objs, '__len__')
assert callable(objs.__len__)
assert isinstance(fields, (tuple, list, set, frozenset))
num_updated = 0
num_objs = len(objs)
with atomic(durable=False):
num_updated = qs.bulk_update(objs=objs, fields=fields)
if num_objs == num_updated:
# this covers at least: list, set, deque
if hasattr(objs, 'clear') and callable(objs.clear):
objs.clear()
return num_updated
@background(schedule=dict(priority=20, run_at=60), queue=Val(TaskQueue.DB), remove_existing_tasks=True)
def migrate_to_metadata(media_id):
try:
media = Media.objects.get(pk=media_id)
except Media.DoesNotExist as e:
# Task triggered but the media no longer exists, do nothing
log.error(f'Task migrate_to_metadata(pk={media_id}) called but no '
f'media exists with ID: {media_id}')
raise InvalidTaskError(_('no such media')) from e
try:
data = Metadata.objects.get(
media__isnull=True,
source=media.source,
key=media.key,
)
except Metadata.DoesNotExist as e:
raise InvalidTaskError(_('no indexed data to migrate to metadata')) from e
video = data.value
fields = lambda f, m: m.get_metadata_field(f)
timestamp = video.get(fields('timestamp', media), None)
for key in ('epoch', 'availability', 'extractor_key',):
field = fields(key, media)
value = video.get(field)
existing_value = media.get_metadata_first_value(key)
if value is None:
if 'epoch' == key:
value = timestamp
elif 'extractor_key' == key:
value = data.site
if value is not None:
if existing_value and ('epoch' == key or value == existing_value):
continue
media.save_to_metadata(field, value)
@background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.NET), remove_existing_tasks=False)
def wait_for_database_queue():
worker_down_path = Path('/run/service/tubesync-db-worker/down')
while Task.objects.unlocked(timezone.now()).filter(queue=Val(TaskQueue.DB)).count() > 0:
time.sleep(5)
if worker_down_path.exists() and worker_down_path.is_file():
raise BgTaskWorkerError(_('queue worker stopped'))
@background(schedule=dict(priority=20, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True)
def index_source_task(source_id):
'''
@ -322,13 +388,12 @@ def index_source_task(source_id):
# update the target schedule column
source.task_run_at_dt
# Reset any errors
# TODO: determine if this affects anything
source.has_failed = False
save_model(source)
# Index the source
videos = source.index_media()
if not videos:
# TODO: Record this error in source.has_failed ?
source.has_failed = True
save_model(source)
raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no '
f'media to index, is the source key valid? Check the '
f'source configuration is correct and that the source '
@ -336,8 +401,31 @@ def index_source_task(source_id):
# Got some media, update the last crawl timestamp
source.last_crawl = timezone.now()
save_model(source)
wait_for_database_queue(
priority=19, # the indexing task uses 20
verbose_name=_('Waiting for database tasks to complete'),
)
wait_for_database_queue(
priority=29, # the checking task uses 30
queue=Val(TaskQueue.FS),
verbose_name=_('Delaying checking all media for database tasks'),
)
delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk)
num_videos = len(videos)
log.info(f'Found {num_videos} media items for source: {source}')
tvn_format = '{:,}' + f'/{num_videos:,}'
db_batch_data = queue(list(), maxlen=50)
db_fields_data = frozenset((
'retrieved',
'site',
'value',
))
db_batch_media = queue(list(), maxlen=10)
db_fields_media = frozenset((
'duration',
'published',
'title',
))
fields = lambda f, m: m.get_metadata_field(f)
task = get_source_index_task(source_id)
if task:
@ -346,7 +434,6 @@ def index_source_task(source_id):
valid='0123456789/,',
end=task.verbose_name.find('Index'),
)
tvn_format = '{:,}' + f'/{num_videos:,}'
vn = 0
video_keys = set()
while len(videos) > 0:
@ -358,15 +445,17 @@ def index_source_task(source_id):
# Video has no unique key (ID), it can't be indexed
continue
video_keys.add(key)
if len(db_batch_data) == db_batch_data.maxlen:
save_db_batch(Metadata.objects, db_batch_data, db_fields_data)
if len(db_batch_media) == db_batch_media.maxlen:
save_db_batch(Media.objects, db_batch_media, db_fields_media)
update_task_status(task, tvn_format.format(vn))
# media, new_media = Media.objects.get_or_create(key=key, source=source)
try:
media = Media.objects.get(key=key, source=source)
except Media.DoesNotExist:
media = Media(key=key)
media.source = source
media.duration = float(video.get(fields('duration', media), None) or 0) or None
media.title = str(video.get(fields('title', media), ''))[:200]
media_defaults = dict()
# create a dummy instance to use its functions
media = Media(source=source, key=key)
media_defaults['duration'] = float(video.get(fields('duration', media), None) or 0) or None
media_defaults['title'] = str(video.get(fields('title', media), ''))[:200]
site = video.get(fields('ie_key', media), None)
timestamp = video.get(fields('timestamp', media), None)
try:
published_dt = media.ts_to_dt(timestamp)
@ -374,46 +463,75 @@ def index_source_task(source_id):
pass
else:
if published_dt:
media.published = published_dt
try:
media.save()
except IntegrityError as e:
log.error(f'Index media failed: {source} / {media} with "{e}"')
else:
media_defaults['published'] = published_dt
# Retrieve or create the actual media instance
media, new_media = source.media_source.only(
'uuid',
'source',
'key',
*db_fields_media,
).get_or_create(defaults=media_defaults, source=source, key=key)
db_batch_media.append(media)
data, new_data = source.videos.defer('value').filter(
media__isnull=True,
).get_or_create(source=source, key=key)
if site:
data.site = site
data.retrieved = source.last_crawl
data.value = video
db_batch_data.append(data)
vn_fmt = _('Updating metadata from indexing results for: "{}": {}')
migrate_to_metadata(
str(media.pk),
verbose_name=vn_fmt.format(media.key, media.name),
)
if not new_media:
# update the existing media
for key, value in media_defaults.items():
setattr(media, key, value)
log.debug(f'Indexed media: {vn}: {source} / {media}')
else:
# log the new media instances
new_media_instance = (
# new_media or
media.created and
source.last_crawl and
media.created >= source.last_crawl
)
if new_media_instance:
log.info(f'Indexed new media: {source} / {media}')
log.info(f'Scheduling tasks to download thumbnail for: {media.key}')
thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg'
vn_fmt = _('Downloading {} thumbnail for: "{}": {}')
for prefix in ('hq', 'sd', 'maxres',):
thumbnail_url = thumbnail_fmt.format(
media.key,
prefix,
)
download_media_thumbnail(
str(media.pk),
thumbnail_url,
verbose_name=vn_fmt.format(prefix, media.key, media.name),
)
log.info(f'Scheduling task to download metadata for: {media.url}')
verbose_name = _('Downloading metadata for: "{}": {}')
download_media_metadata(
str(media.pk),
verbose_name=verbose_name.format(media.key, media.name),
log.info(f'Indexed new media: {source} / {media}')
log.info(f'Scheduling tasks to download thumbnail for: {media.key}')
thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg'
vn_fmt = _('Downloading {} thumbnail for: "{}": {}')
for num, prefix in enumerate(('hq', 'sd', 'maxres',)):
thumbnail_url = thumbnail_fmt.format(
media.key,
prefix,
)
download_media_thumbnail(
str(media.pk),
thumbnail_url,
schedule=dict(run_at=10+(300*num)),
verbose_name=vn_fmt.format(prefix, media.key, media.name),
)
log.info(f'Scheduling task to download metadata for: {media.url}')
verbose_name = _('Downloading metadata for: "{}": {}')
download_media_metadata(
str(media.pk),
schedule=dict(priority=35),
verbose_name=verbose_name.format(media.key, media.name),
)
# Reset task.verbose_name to the saved value
update_task_status(task, None)
# Update any remaining items in the batches
save_db_batch(Metadata.objects, db_batch_data, db_fields_data)
save_db_batch(Media.objects, db_batch_media, db_fields_media)
# Cleanup of media no longer available from the source
cleanup_removed_media(source, video_keys)
# Clear references to indexed data
videos = video = None
db_batch_data.clear()
db_batch_media.clear()
# Trigger any signals that we skipped with batched updates
vn_fmt = _('Checking all media for "{}"')
save_all_media_for_source(
str(source.pk),
schedule=dict(run_at=60),
verbose_name=vn_fmt.format(source.name),
)
@background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.FS))

View File

@ -1,5 +1,6 @@
from django import template
from django.template.defaultfilters import filesizeformat
from math import ceil
register = template.Library()
@ -23,3 +24,64 @@ def sub(value, arg):
except Exception:
return ""
@register.filter
def timedelta(value, arg=None, /, *, fmt_2=None):
if hasattr(value, 'total_seconds') and callable(value.total_seconds):
seconds_total = value.total_seconds()
elif hasattr(value, 'seconds'):
seconds_total = value.seconds + (value.days * 24 * 60 * 60)
else:
seconds_total = value
dynamic_arg = False
if arg is None:
if seconds_total < 1.0:
return f'{seconds_total:.6f} seconds'
dynamic_arg = True
arg = '{hours2}:{minutes2}:{seconds2}'
if fmt_2 is None:
fmt_2 = '{:02d}'
seconds_total = ceil(seconds_total)
seconds = seconds_total % 60
minutes_total = seconds_total // 60
minutes = minutes_total % 60
hours_total = minutes_total // 60
hours = hours_total % 24
days_total = hours_total // 24
days = days_total % 365
years_total = days_total // 365
years = years_total
if dynamic_arg:
prefix_years = prefix_days = ''
if years_total > 0:
prefix_years = '{years_total} years, '
if prefix_years and days_total > 0:
prefix_days = '{days} days, '
elif days_total > 0:
prefix_days = '{days_total} days, '
arg = prefix_years + prefix_days + arg
return arg.format(**{
'seconds': seconds,
'seconds2': fmt_2.format(seconds),
'minutes': minutes,
'minutes2': fmt_2.format(minutes),
'hours': hours,
'hours2': fmt_2.format(hours),
'days': days,
'years': years,
'seconds_total': seconds_total,
'minutes_total': minutes_total,
'hours_total': hours_total,
'days_total': days_total,
'years_total': years_total,
})

View File

@ -354,6 +354,7 @@ class FrontEndTestCase(TestCase):
# Add some media
test_minimal_metadata = '''
{
"channel_id":"testkey",
"thumbnail":"https://example.com/thumb.jpg",
"formats": [{
"format_id":"251",

View File

@ -2,11 +2,11 @@ import os
import re
import math
from copy import deepcopy
from operator import attrgetter, itemgetter
from pathlib import Path
from tempfile import NamedTemporaryFile
import requests
from PIL import Image
from common.utils import list_of_dictionaries
from django.conf import settings
from urllib.parse import urlsplit, parse_qs
from django.forms import ValidationError
@ -95,20 +95,6 @@ def resize_image_to_height(image, width, height):
return image
def glob_quote(filestr):
_glob_specials = {
'?': '[?]',
'*': '[*]',
'[': '[[]',
']': '[]]', # probably not needed, but it won't hurt
}
if not isinstance(filestr, str):
raise TypeError(f'filestr must be a str, got "{type(filestr)}"')
return filestr.translate(str.maketrans(_glob_specials))
def file_is_editable(filepath):
'''
Checks that a file exists and the file is in an allowed predefined tuple of
@ -130,14 +116,6 @@ def file_is_editable(filepath):
return False
def mkdir_p(arg_path, mode=0o777):
'''
Reminder: mode only affects the last directory
'''
dirpath = Path(arg_path)
return dirpath.mkdir(mode=mode, parents=True, exist_ok=True)
def write_text_file(filepath, filedata):
if not isinstance(filedata, str):
raise TypeError(f'filedata must be a str, got "{type(filedata)}"')
@ -162,30 +140,6 @@ def delete_file(filepath):
return False
def seconds_to_timestr(seconds):
seconds = seconds % (24 * 3600)
hour = seconds // 3600
seconds %= 3600
minutes = seconds // 60
seconds %= 60
return '{:02d}:{:02d}:{:02d}'.format(hour, minutes, seconds)
def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None):
result = list(iterable)
if key_func is None:
# itemgetter is the default
if item or not (item or attr):
key_func = itemgetter
elif attr:
key_func = attrgetter
for key, reverse in reversed(specs):
result.sort(key=key_func(key), reverse=reverse)
if use_reversed:
return list(reversed(result))
return result
def normalize_codec(codec_str):
result = str(codec_str).upper()
parts = result.split('.')
@ -201,17 +155,6 @@ def normalize_codec(codec_str):
return result
def list_of_dictionaries(arg_list, arg_function=lambda x: x):
assert callable(arg_function)
if isinstance(arg_list, list):
def _call_func_with_dict(arg_dict):
if isinstance(arg_dict, dict):
return arg_function(arg_dict)
return arg_dict
return (True, list(map(_call_func_with_dict, arg_list)),)
return (False, arg_list,)
def _url_keys(arg_dict, filter_func):
result = {}
if isinstance(arg_dict, dict):

View File

@ -20,13 +20,13 @@ from django.utils._os import safe_join
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from common.timestamp import timestamp_to_datetime
from common.utils import append_uri_params
from common.utils import append_uri_params, mkdir_p, multi_key_sort
from background_task.models import Task, CompletedTask
from .models import Source, Media, MediaServer
from .forms import (ValidateSourceForm, ConfirmDeleteSourceForm, RedownloadMediaForm,
SkipMediaForm, EnableMediaForm, ResetTasksForm, ScheduleTaskForm,
ConfirmDeleteMediaServerForm, SourceForm)
from .utils import validate_url, delete_file, multi_key_sort, mkdir_p
from .utils import delete_file, validate_url
from .tasks import (map_task_to_instance, get_error_message,
get_source_completed_tasks, get_media_download_task,
delete_task_by_media, index_source_task,

View File

@ -7,6 +7,7 @@
import os
from common.logger import log
from common.utils import mkdir_p
from copy import deepcopy
from pathlib import Path
from tempfile import TemporaryDirectory
@ -15,7 +16,6 @@ from urllib.parse import urlsplit, parse_qs
from django.conf import settings
from .choices import Val, FileExtension
from .hooks import postprocessor_hook, progress_hook
from .utils import mkdir_p
import yt_dlp
import yt_dlp.patch.check_thumbnails
import yt_dlp.patch.fatal_http_errors

View File

@ -1,5 +1,6 @@
from django import VERSION as DJANGO_VERSION
from pathlib import Path
from common.huey import sqlite_tasks
from common.utils import getenv
@ -24,6 +25,7 @@ INSTALLED_APPS = [
'django.contrib.humanize',
'sass_processor',
'background_task',
'django_huey',
'common',
'sync',
]
@ -47,6 +49,22 @@ ROOT_URLCONF = 'tubesync.urls'
FORCE_SCRIPT_NAME = None
DJANGO_HUEY = {
'default': 'network',
'queues': {
'database': sqlite_tasks('database'),
'filesystem': sqlite_tasks('filesystem'),
'limited': sqlite_tasks('limited', prefix='net'),
'network': sqlite_tasks('network'),
},
}
for django_huey_queue in DJANGO_HUEY['queues'].values():
connection = django_huey_queue.get('connection')
if connection:
filepath = Path('/.' + connection.get('filename') or '').resolve(strict=False)
filepath.parent.mkdir(exist_ok=True, parents=True)
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',