From df40a1367a8252448b1c4201c255d7b8769793fc Mon Sep 17 00:00:00 2001 From: meeb Date: Wed, 20 Jan 2021 17:34:19 +1100 Subject: [PATCH] sanitise youtube video titles for use in sane filenames, resolves #35 --- tubesync/common/utils.py | 10 ++++++++++ tubesync/sync/models.py | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index a803f514..988b4f11 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -14,3 +14,13 @@ def append_uri_params(uri, params): uri = str(uri) qs = urlencode(params) return urlunsplit(('', '', uri, qs, '')) + + +def clean_filename(filename): + if not isinstance(filename, str): + raise ValueError(f'filename must be a str, got {type(filename)}') + to_scrub = '<>\/:*?"|' + for char in to_scrub: + filename = filename.replace(char, '') + filename = ''.join([c for c in filename if ord(c) > 30]) + return ' '.join(filename.split()) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 59011684..3b6955fe 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -12,6 +12,7 @@ from django.utils.text import slugify from django.utils import timezone from django.utils.translation import gettext_lazy as _ from common.errors import NoFormatException +from common.utils import clean_filename from .youtube import (get_media_info as get_youtube_media_info, download_media as download_youtube_media) from .utils import seconds_to_timestr, parse_media_format @@ -887,7 +888,7 @@ class Media(models.Model): 'source': self.source.slugname, 'source_full': self.source.name, 'title': self.slugtitle, - 'title_full': self.title, + 'title_full': clean_filename(self.title), 'key': self.key, 'format': '-'.join(display_format['format']), 'playlist_index': self.playlist_index, @@ -1005,7 +1006,7 @@ class Media(models.Model): @property def filename(self): - # Otherwise, create a suitable filename from the source media_format + # Create a suitable filename from the source media_format media_format = str(self.source.media_format) media_details = self.format_dict return media_format.format(**media_details)