mirror of
https://github.com/meeb/tubesync.git
synced 2025-06-22 13:06:34 +00:00
599 lines
20 KiB
Python
599 lines
20 KiB
Python
import os
|
|
import re
|
|
import uuid
|
|
from collections import deque as queue
|
|
from pathlib import Path
|
|
from django import db
|
|
from django.conf import settings
|
|
from django.core.exceptions import SuspiciousOperation
|
|
from django.core.validators import RegexValidator
|
|
from django.utils import timezone
|
|
from django.utils.text import slugify
|
|
from django.utils.translation import gettext_lazy as _
|
|
from ..choices import (Val,
|
|
SponsorBlock_Category, YouTube_SourceType, IndexSchedule,
|
|
CapChoices, Fallback, FileExtension, FilterSeconds,
|
|
SourceResolution, SourceResolutionInteger,
|
|
YouTube_VideoCodec, YouTube_AudioCodec,
|
|
)
|
|
from ..fields import CommaSepChoiceField
|
|
from ..youtube import (
|
|
get_media_info as get_youtube_media_info,
|
|
get_channel_image_info as get_youtube_channel_image_info,
|
|
)
|
|
from ._migrations import media_file_storage
|
|
from ._private import _srctype_dict
|
|
|
|
|
|
class Source(db.models.Model):
|
|
'''
|
|
A Source is a source of media. Currently, this is either a YouTube channel
|
|
or a YouTube playlist.
|
|
'''
|
|
|
|
embed_metadata = db.models.BooleanField(
|
|
_('embed metadata'),
|
|
default=False,
|
|
help_text=_('Embed metadata from source into file'),
|
|
)
|
|
embed_thumbnail = db.models.BooleanField(
|
|
_('embed thumbnail'),
|
|
default=False,
|
|
help_text=_('Embed thumbnail into the file'),
|
|
)
|
|
|
|
# Fontawesome icons used for the source on the front end
|
|
ICONS = _srctype_dict('<i class="fab fa-youtube"></i>')
|
|
|
|
# Format to use to display a URL for the source
|
|
URLS = dict(zip(
|
|
YouTube_SourceType.values,
|
|
(
|
|
'https://www.youtube.com/c/{key}',
|
|
'https://www.youtube.com/channel/{key}',
|
|
'https://www.youtube.com/playlist?list={key}',
|
|
),
|
|
))
|
|
|
|
# Format used to create indexable URLs
|
|
INDEX_URLS = dict(zip(
|
|
YouTube_SourceType.values,
|
|
(
|
|
'https://www.youtube.com/c/{key}/{type}',
|
|
'https://www.youtube.com/channel/{key}/{type}',
|
|
'https://www.youtube.com/playlist?list={key}',
|
|
),
|
|
))
|
|
|
|
# Callback functions to get a list of media from the source
|
|
INDEXERS = _srctype_dict(get_youtube_media_info)
|
|
|
|
# Field names to find the media ID used as the key when storing media
|
|
KEY_FIELD = _srctype_dict('id')
|
|
|
|
uuid = db.models.UUIDField(
|
|
_('uuid'),
|
|
primary_key=True,
|
|
editable=False,
|
|
default=uuid.uuid4,
|
|
help_text=_('UUID of the source'),
|
|
)
|
|
created = db.models.DateTimeField(
|
|
_('created'),
|
|
auto_now_add=True,
|
|
db_index=True,
|
|
help_text=_('Date and time the source was created'),
|
|
)
|
|
last_crawl = db.models.DateTimeField(
|
|
_('last crawl'),
|
|
db_index=True,
|
|
null=True,
|
|
blank=True,
|
|
help_text=_('Date and time the source was last crawled'),
|
|
)
|
|
source_type = db.models.CharField(
|
|
_('source type'),
|
|
max_length=1,
|
|
db_index=True,
|
|
choices=YouTube_SourceType.choices,
|
|
default=YouTube_SourceType.CHANNEL,
|
|
help_text=_('Source type'),
|
|
)
|
|
key = db.models.CharField(
|
|
_('key'),
|
|
max_length=100,
|
|
db_index=True,
|
|
unique=True,
|
|
help_text=_('Source key, such as exact YouTube channel name or playlist ID'),
|
|
)
|
|
name = db.models.CharField(
|
|
_('name'),
|
|
max_length=100,
|
|
db_index=True,
|
|
unique=True,
|
|
help_text=_('Friendly name for the source, used locally in TubeSync only'),
|
|
)
|
|
directory = db.models.CharField(
|
|
_('directory'),
|
|
max_length=100,
|
|
db_index=True,
|
|
unique=True,
|
|
help_text=_('Directory name to save the media into'),
|
|
)
|
|
media_format = db.models.CharField(
|
|
_('media format'),
|
|
max_length=200,
|
|
default=settings.MEDIA_FORMATSTR_DEFAULT,
|
|
help_text=_('File format to use for saving files, detailed options at bottom of page.'),
|
|
)
|
|
target_schedule = db.models.DateTimeField(
|
|
_('target schedule'),
|
|
blank=True,
|
|
db_index=True,
|
|
default=timezone.now,
|
|
help_text=_('Date and time when the task to index the source should begin'),
|
|
)
|
|
index_schedule = db.models.IntegerField(
|
|
_('index schedule'),
|
|
choices=IndexSchedule.choices,
|
|
db_index=True,
|
|
default=IndexSchedule.EVERY_24_HOURS,
|
|
help_text=_('Schedule of how often to index the source for new media'),
|
|
)
|
|
download_media = db.models.BooleanField(
|
|
_('download media'),
|
|
default=True,
|
|
help_text=_('Download media from this source, if not selected the source will only be indexed'),
|
|
)
|
|
index_videos = db.models.BooleanField(
|
|
_('index videos'),
|
|
default=True,
|
|
help_text=_('Index video media from this source'),
|
|
)
|
|
index_streams = db.models.BooleanField(
|
|
_('index streams'),
|
|
default=False,
|
|
help_text=_('Index live stream media from this source'),
|
|
)
|
|
download_cap = db.models.IntegerField(
|
|
_('download cap'),
|
|
choices=CapChoices.choices,
|
|
default=CapChoices.CAP_NOCAP,
|
|
help_text=_('Do not download media older than this capped date'),
|
|
)
|
|
delete_old_media = db.models.BooleanField(
|
|
_('delete old media'),
|
|
default=False,
|
|
help_text=_('Delete old media after "days to keep" days?'),
|
|
)
|
|
days_to_keep = db.models.PositiveSmallIntegerField(
|
|
_('days to keep'),
|
|
default=14,
|
|
help_text=_(
|
|
'If "delete old media" is ticked, the number of days after which '
|
|
'to automatically delete media'
|
|
),
|
|
)
|
|
filter_text = db.models.CharField(
|
|
_('filter string'),
|
|
max_length=200,
|
|
default='',
|
|
blank=True,
|
|
help_text=_('Regex compatible filter string for video titles'),
|
|
)
|
|
filter_text_invert = db.models.BooleanField(
|
|
_('invert filter text matching'),
|
|
default=False,
|
|
help_text=_('Invert filter string regex match, skip any matching titles when selected'),
|
|
)
|
|
filter_seconds = db.models.PositiveIntegerField(
|
|
_('filter seconds'),
|
|
blank=True,
|
|
null=True,
|
|
help_text=_('Filter Media based on Min/Max duration. Leave blank or 0 to disable filtering'),
|
|
)
|
|
filter_seconds_min = db.models.BooleanField(
|
|
_('filter seconds min/max'),
|
|
choices=FilterSeconds.choices,
|
|
default=Val(FilterSeconds.MIN),
|
|
help_text=_(
|
|
'When Filter Seconds is > 0, do we skip on minimum (video shorter than limit) or maximum (video '
|
|
'greater than maximum) video duration'
|
|
),
|
|
)
|
|
delete_removed_media = db.models.BooleanField(
|
|
_('delete removed media'),
|
|
default=False,
|
|
help_text=_('Delete media that is no longer on this playlist'),
|
|
)
|
|
delete_files_on_disk = db.models.BooleanField(
|
|
_('delete files on disk'),
|
|
default=False,
|
|
help_text=_('Delete files on disk when they are removed from TubeSync'),
|
|
)
|
|
source_resolution = db.models.CharField(
|
|
_('source resolution'),
|
|
max_length=8,
|
|
db_index=True,
|
|
choices=SourceResolution.choices,
|
|
default=SourceResolution.VIDEO_1080P,
|
|
help_text=_('Source resolution, desired video resolution to download'),
|
|
)
|
|
source_vcodec = db.models.CharField(
|
|
_('source video codec'),
|
|
max_length=8,
|
|
db_index=True,
|
|
choices=YouTube_VideoCodec.choices,
|
|
default=YouTube_VideoCodec.VP9,
|
|
help_text=_('Source video codec, desired video encoding format to download (ignored if "resolution" is audio only)'),
|
|
)
|
|
source_acodec = db.models.CharField(
|
|
_('source audio codec'),
|
|
max_length=8,
|
|
db_index=True,
|
|
choices=YouTube_AudioCodec.choices,
|
|
default=YouTube_AudioCodec.OPUS,
|
|
help_text=_('Source audio codec, desired audio encoding format to download'),
|
|
)
|
|
prefer_60fps = db.models.BooleanField(
|
|
_('prefer 60fps'),
|
|
default=True,
|
|
help_text=_('Where possible, prefer 60fps media for this source'),
|
|
)
|
|
prefer_hdr = db.models.BooleanField(
|
|
_('prefer hdr'),
|
|
default=False,
|
|
help_text=_('Where possible, prefer HDR media for this source'),
|
|
)
|
|
fallback = db.models.CharField(
|
|
_('fallback'),
|
|
max_length=1,
|
|
db_index=True,
|
|
choices=Fallback.choices,
|
|
default=Fallback.NEXT_BEST_HD,
|
|
help_text=_('What do do when media in your source resolution and codecs is not available'),
|
|
)
|
|
copy_channel_images = db.models.BooleanField(
|
|
_('copy channel images'),
|
|
default=False,
|
|
help_text=_('Copy channel banner and avatar. These may be detected and used by some media servers'),
|
|
)
|
|
copy_thumbnails = db.models.BooleanField(
|
|
_('copy thumbnails'),
|
|
default=False,
|
|
help_text=_('Copy thumbnails with the media, these may be detected and used by some media servers'),
|
|
)
|
|
write_nfo = db.models.BooleanField(
|
|
_('write nfo'),
|
|
default=False,
|
|
help_text=_('Write an NFO file in XML with the media info, these may be detected and used by some media servers'),
|
|
)
|
|
write_json = db.models.BooleanField(
|
|
_('write json'),
|
|
default=False,
|
|
help_text=_('Write a JSON file with the media info, these may be detected and used by some media servers'),
|
|
)
|
|
has_failed = db.models.BooleanField(
|
|
_('has failed'),
|
|
default=False,
|
|
help_text=_('Source has failed to index media'),
|
|
)
|
|
|
|
write_subtitles = db.models.BooleanField(
|
|
_('write subtitles'),
|
|
default=False,
|
|
help_text=_('Download video subtitles'),
|
|
)
|
|
|
|
auto_subtitles = db.models.BooleanField(
|
|
_('accept auto-generated subs'),
|
|
default=False,
|
|
help_text=_('Accept auto-generated subtitles'),
|
|
)
|
|
sub_langs = db.models.CharField(
|
|
_('subs langs'),
|
|
max_length=30,
|
|
default='en',
|
|
help_text=_('List of subtitles langs to download, comma-separated. Example: en,fr or all,-fr,-live_chat'),
|
|
validators=[
|
|
RegexValidator(
|
|
regex=r"^(\-?[\_\.a-zA-Z-]+(,|$))+",
|
|
message=_('Subtitle langs must be a comma-separated list of langs. example: en,fr or all,-fr,-live_chat'),
|
|
),
|
|
],
|
|
)
|
|
enable_sponsorblock = db.models.BooleanField(
|
|
_('enable sponsorblock'),
|
|
default=True,
|
|
help_text=_('Use SponsorBlock?'),
|
|
)
|
|
sponsorblock_categories = CommaSepChoiceField(
|
|
_('removed categories'),
|
|
max_length=128,
|
|
possible_choices=SponsorBlock_Category.choices,
|
|
all_choice='all',
|
|
allow_all=True,
|
|
all_label='(All Categories)',
|
|
default='all',
|
|
help_text=_('Select the SponsorBlock categories that you wish to be removed from downloaded videos.'),
|
|
)
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
class Meta:
|
|
verbose_name = _('Source')
|
|
verbose_name_plural = _('Sources')
|
|
|
|
@property
|
|
def icon(self):
|
|
return self.ICONS.get(self.source_type)
|
|
|
|
@property
|
|
def slugname(self):
|
|
replaced = self.name.replace('_', '-').replace('&', 'and').replace('+', 'and')
|
|
return slugify(replaced)[:80]
|
|
|
|
def deactivate(self):
|
|
self.download_media = False
|
|
self.index_streams = False
|
|
self.index_videos = False
|
|
self.index_schedule = IndexSchedule.NEVER
|
|
self.save(update_fields={
|
|
'download_media',
|
|
'index_streams',
|
|
'index_videos',
|
|
'index_schedule',
|
|
})
|
|
|
|
@property
|
|
def is_active(self):
|
|
active = (
|
|
self.download_media or
|
|
self.index_streams or
|
|
self.index_videos
|
|
)
|
|
return self.index_schedule and active
|
|
|
|
@property
|
|
def is_audio(self):
|
|
return self.source_resolution == SourceResolution.AUDIO.value
|
|
|
|
@property
|
|
def is_playlist(self):
|
|
return self.source_type == YouTube_SourceType.PLAYLIST.value
|
|
|
|
@property
|
|
def is_video(self):
|
|
return not self.is_audio
|
|
|
|
@property
|
|
def download_cap_date(self):
|
|
delta = self.download_cap
|
|
if delta > 0:
|
|
return timezone.now() - timezone.timedelta(seconds=delta)
|
|
else:
|
|
return False
|
|
|
|
@property
|
|
def days_to_keep_date(self):
|
|
delta = self.days_to_keep
|
|
if delta > 0:
|
|
return timezone.now() - timezone.timedelta(days=delta)
|
|
else:
|
|
return False
|
|
|
|
@property
|
|
def task_run_at_dt(self):
|
|
now = timezone.now()
|
|
when = now.replace(minute=0, second=0, microsecond=0)
|
|
|
|
def advance_hour(arg_dt, target_hour, /):
|
|
delta_hours = ((24 + target_hour) - arg_dt.hour) % 24
|
|
return arg_dt + timezone.timedelta(hours=delta_hours)
|
|
|
|
def advance_day(arg_dt, target_weekday, /):
|
|
delta_days = ((7 + target_weekday) - arg_dt.weekday) % 7
|
|
return arg_dt + timezone.timedelta(days=delta_days)
|
|
|
|
if self.target_schedule is None:
|
|
self.target_schedule = when
|
|
if Val(IndexSchedule.EVERY_24_HOURS) > self.index_schedule:
|
|
self.target_schedule = now + timezone.timedelta(
|
|
seconds=1+self.index_schedule,
|
|
)
|
|
elif Val(IndexSchedule.EVERY_7_DAYS) > self.index_schedule:
|
|
self.target_schedule = advance_hour(
|
|
when.replace(hour=1+when.hour),
|
|
self.target_schedule.hour,
|
|
)
|
|
|
|
if now < self.target_schedule:
|
|
return self.target_schedule
|
|
|
|
when = advance_hour(when, self.target_schedule.hour)
|
|
when = advance_day(when, self.target_schedule.weekday)
|
|
self.target_schedule = when
|
|
return when
|
|
|
|
@property
|
|
def extension(self):
|
|
'''
|
|
The extension is also used by youtube-dl to set the output container. As
|
|
it is possible to quite easily pick combinations of codecs and containers
|
|
which are invalid (e.g. OPUS audio in an MP4 container) just set this for
|
|
people. All video is set to mkv containers, audio-only is set to m4a or ogg
|
|
depending on audio codec.
|
|
'''
|
|
if self.is_audio:
|
|
if self.source_acodec == Val(YouTube_AudioCodec.MP4A):
|
|
return Val(FileExtension.M4A)
|
|
elif self.source_acodec == Val(YouTube_AudioCodec.OPUS):
|
|
return Val(FileExtension.OGG)
|
|
else:
|
|
raise ValueError('Unable to choose audio extension, uknown acodec')
|
|
else:
|
|
return Val(FileExtension.MKV)
|
|
|
|
@classmethod
|
|
def create_url(cls, source_type, key):
|
|
url = cls.URLS.get(source_type)
|
|
return url.format(key=key)
|
|
|
|
@classmethod
|
|
def create_index_url(cls, source_type, key, type):
|
|
url = cls.INDEX_URLS.get(source_type)
|
|
return url.format(key=key, type=type)
|
|
|
|
@property
|
|
def url(self):
|
|
return self.__class__.create_url(self.source_type, self.key)
|
|
|
|
def get_index_url(self, type):
|
|
return self.__class__.create_index_url(self.source_type, self.key, type)
|
|
|
|
@property
|
|
def format_summary(self):
|
|
if self.is_audio:
|
|
vc = 'none'
|
|
else:
|
|
vc = self.source_vcodec
|
|
ac = self.source_acodec
|
|
f = ' 60FPS' if self.is_video and self.prefer_60fps else ''
|
|
h = ' HDR' if self.is_video and self.prefer_hdr else ''
|
|
return f'{self.source_resolution} (video:{vc}, audio:{ac}){f}{h}'.strip()
|
|
|
|
@property
|
|
def directory_path(self):
|
|
download_dir = Path(media_file_storage.location)
|
|
return download_dir / self.type_directory_path
|
|
|
|
@property
|
|
def type_directory_path(self):
|
|
if settings.SOURCE_DOWNLOAD_DIRECTORY_PREFIX:
|
|
if self.is_audio:
|
|
return Path(settings.DOWNLOAD_AUDIO_DIR) / self.directory
|
|
else:
|
|
return Path(settings.DOWNLOAD_VIDEO_DIR) / self.directory
|
|
else:
|
|
return Path(self.directory)
|
|
|
|
def make_directory(self):
|
|
return os.makedirs(self.directory_path, exist_ok=True)
|
|
|
|
@property
|
|
def get_image_url(self):
|
|
if self.is_playlist:
|
|
raise SuspiciousOperation('This source is a playlist so it doesn\'t have thumbnail.')
|
|
|
|
return get_youtube_channel_image_info(self.url)
|
|
|
|
|
|
def directory_exists(self):
|
|
return (os.path.isdir(self.directory_path) and
|
|
os.access(self.directory_path, os.W_OK))
|
|
|
|
@property
|
|
def key_field(self):
|
|
return self.KEY_FIELD.get(self.source_type, '')
|
|
|
|
@property
|
|
def source_resolution_height(self):
|
|
return SourceResolutionInteger.get(self.source_resolution, 0)
|
|
|
|
@property
|
|
def can_fallback(self):
|
|
return self.fallback != Val(Fallback.FAIL)
|
|
|
|
@property
|
|
def example_media_format_dict(self):
|
|
'''
|
|
Populates a dict with real-ish and some placeholder data for media name
|
|
format strings. Used for example filenames and media_format validation.
|
|
'''
|
|
fmt = []
|
|
if self.source_resolution:
|
|
fmt.append(self.source_resolution)
|
|
if self.source_vcodec:
|
|
fmt.append(self.source_vcodec.lower())
|
|
if self.source_acodec:
|
|
fmt.append(self.source_acodec.lower())
|
|
if self.prefer_60fps:
|
|
fmt.append('60fps')
|
|
if self.prefer_hdr:
|
|
fmt.append('hdr')
|
|
now = timezone.now()
|
|
return {
|
|
'yyyymmdd': now.strftime('%Y%m%d'),
|
|
'yyyy_mm_dd': now.strftime('%Y-%m-%d'),
|
|
'yyyy': now.strftime('%Y'),
|
|
'mm': now.strftime('%m'),
|
|
'dd': now.strftime('%d'),
|
|
'source': self.slugname,
|
|
'source_full': self.name,
|
|
'uploader': 'Some Channel Name',
|
|
'title': 'some-media-title-name',
|
|
'title_full': 'Some Media Title Name',
|
|
'key': 'SoMeUnIqUiD',
|
|
'format': '-'.join(fmt),
|
|
'playlist_title': 'Some Playlist Title',
|
|
'video_order': '01',
|
|
'ext': self.extension,
|
|
'resolution': self.source_resolution if self.source_resolution else '',
|
|
'height': '720' if self.source_resolution else '',
|
|
'width': '1280' if self.source_resolution else '',
|
|
'vcodec': self.source_vcodec.lower() if self.source_vcodec else '',
|
|
'acodec': self.source_acodec.lower(),
|
|
'fps': '24' if self.source_resolution else '',
|
|
'hdr': 'hdr' if self.source_resolution else ''
|
|
}
|
|
|
|
def get_example_media_format(self):
|
|
try:
|
|
return self.media_format.format(**self.example_media_format_dict)
|
|
except Exception:
|
|
return ''
|
|
|
|
def is_regex_match(self, media_item_title):
|
|
if not self.filter_text:
|
|
return True
|
|
return bool(re.search(self.filter_text, media_item_title))
|
|
|
|
def get_index(self, type):
|
|
indexer = self.INDEXERS.get(self.source_type, None)
|
|
if not callable(indexer):
|
|
raise Exception(f'Source type f"{self.source_type}" has no indexer')
|
|
days = None
|
|
if self.download_cap_date:
|
|
days = timezone.timedelta(seconds=self.download_cap).days
|
|
response = indexer(self.get_index_url(type=type), days=days)
|
|
if not isinstance(response, dict):
|
|
return list()
|
|
entries = response.get('entries', list())
|
|
return entries
|
|
|
|
def index_media(self):
|
|
'''
|
|
Index the media source returning a queue of media metadata as dicts.
|
|
'''
|
|
entries = queue(list(), getattr(settings, 'MAX_ENTRIES_PROCESSING', 0) or None)
|
|
if self.index_videos:
|
|
entries.extend(reversed(self.get_index('videos')))
|
|
|
|
# Playlists do something different that I have yet to figure out
|
|
if not self.is_playlist:
|
|
if self.index_streams:
|
|
streams = self.get_index('streams')
|
|
if entries.maxlen is None or 0 == len(entries):
|
|
entries.extend(reversed(streams))
|
|
else:
|
|
# share the queue between streams and videos
|
|
allowed_streams = max(
|
|
entries.maxlen // 2,
|
|
entries.maxlen - len(entries),
|
|
)
|
|
entries.extend(reversed(streams[: allowed_streams]))
|
|
|
|
return entries
|
|
|