Merge branch 'main' into patch-4

This commit is contained in:
tcely 2025-01-15 04:35:43 -05:00 committed by GitHub
commit 53ad31d128
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 453 additions and 91 deletions

View File

@ -1,20 +1,231 @@
# syntax=docker/dockerfile:1
# check=error=true
FROM debian:bookworm-slim
ARG TARGETARCH
ARG TARGETPLATFORM
ARG FFMPEG_DATE="2025-01-10-19-43"
ARG FFMPEG_VERSION="N-118280-g5cd49e1bfd"
ARG S6_VERSION="3.2.0.2"
ARG SHA256_S6_AMD64="59289456ab1761e277bd456a95e737c06b03ede99158beb24f12b165a904f478"
ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f66f785"
ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae"
ARG FFMPEG_DATE="autobuild-2024-12-24-14-15"
ARG FFMPEG_VERSION="N-118163-g954d55c2a4"
ARG SHA256_FFMPEG_AMD64="798a7e5a0724139e6bb70df8921522b23be27028f9f551dfa83c305ec4ffaf3a"
ARG SHA256_FFMPEG_ARM64="c3e6cc0fec42cc7e3804014fbb02c1384a1a31ef13f6f9a36121f2e1216240c0"
ARG ALPINE_VERSION="latest"
ARG FFMPEG_PREFIX_FILE="ffmpeg-${FFMPEG_VERSION}"
ARG FFMPEG_SUFFIX_FILE=".tar.xz"
ARG FFMPEG_CHECKSUM_ALGORITHM="sha256"
ARG S6_CHECKSUM_ALGORITHM="sha256"
FROM alpine:${ALPINE_VERSION} AS ffmpeg-download
ARG FFMPEG_DATE
ARG FFMPEG_VERSION
ARG FFMPEG_PREFIX_FILE
ARG FFMPEG_SUFFIX_FILE
ARG SHA256_FFMPEG_AMD64
ARG SHA256_FFMPEG_ARM64
ARG FFMPEG_CHECKSUM_ALGORITHM
ARG CHECKSUM_ALGORITHM="${FFMPEG_CHECKSUM_ALGORITHM}"
ARG FFMPEG_CHECKSUM_AMD64="${SHA256_FFMPEG_AMD64}"
ARG FFMPEG_CHECKSUM_ARM64="${SHA256_FFMPEG_ARM64}"
ARG FFMPEG_FILE_SUMS="checksums.${CHECKSUM_ALGORITHM}"
ARG FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/autobuild-${FFMPEG_DATE}"
ARG DESTDIR="/downloaded"
ARG TARGETARCH
ADD "${FFMPEG_URL}/${FFMPEG_FILE_SUMS}" "${DESTDIR}/"
RUN set -eu ; \
apk --no-cache --no-progress add cmd:aria2c cmd:awk ; \
\
aria2c_options() { \
algorithm="${CHECKSUM_ALGORITHM%[0-9]??}" ; \
bytes="${CHECKSUM_ALGORITHM#${algorithm}}" ; \
hash="$( awk -v fn="${1##*/}" '$0 ~ fn"$" { print $1; exit; }' "${DESTDIR}/${FFMPEG_FILE_SUMS}" )" ; \
\
printf -- '\t%s\n' \
'allow-overwrite=true' \
'always-resume=false' \
'check-integrity=true' \
"checksum=${algorithm}-${bytes}=${hash}" \
'max-connection-per-server=2' \
; \
printf -- '\n' ; \
} ; \
\
decide_arch() { \
case "${TARGETARCH}" in \
(amd64) printf -- 'linux64' ;; \
(arm64) printf -- 'linuxarm64' ;; \
esac ; \
} ; \
\
FFMPEG_ARCH="$(decide_arch)" ; \
FFMPEG_PREFIX_FILE="$( printf -- '%s' "${FFMPEG_PREFIX_FILE}" | cut -d '-' -f 1,2 )" ; \
for url in $(awk ' \
$2 ~ /^[*]?'"${FFMPEG_PREFIX_FILE}"'/ && /-'"${FFMPEG_ARCH}"'-/ { $1=""; print; } \
' "${DESTDIR}/${FFMPEG_FILE_SUMS}") ; \
do \
url="${FFMPEG_URL}/${url# }" ; \
printf -- '%s\n' "${url}" ; \
aria2c_options "${url}" ; \
printf -- '\n' ; \
done > /tmp/downloads ; \
unset -v url ; \
\
aria2c --no-conf=true \
--dir /downloaded \
--lowest-speed-limit='16K' \
--show-console-readout=false \
--summary-interval=0 \
--input-file /tmp/downloads ; \
\
apk --no-cache --no-progress add "cmd:${CHECKSUM_ALGORITHM}sum" ; \
\
decide_expected() { \
case "${TARGETARCH}" in \
(amd64) printf -- '%s' "${FFMPEG_CHECKSUM_AMD64}" ;; \
(arm64) printf -- '%s' "${FFMPEG_CHECKSUM_ARM64}" ;; \
esac ; \
} ; \
\
FFMPEG_HASH="$(decide_expected)" ; \
\
cd "${DESTDIR}" ; \
if [ -n "${FFMPEG_HASH}" ] ; \
then \
printf -- '%s *%s\n' "${FFMPEG_HASH}" "${FFMPEG_PREFIX_FILE}"*-"${FFMPEG_ARCH}"-*"${FFMPEG_SUFFIX_FILE}" >> /tmp/SUMS ; \
"${CHECKSUM_ALGORITHM}sum" --check --warn --strict /tmp/SUMS || exit ; \
fi ; \
"${CHECKSUM_ALGORITHM}sum" --check --warn --strict --ignore-missing "${DESTDIR}/${FFMPEG_FILE_SUMS}" ; \
\
mkdir -v -p "/verified/${TARGETARCH}" ; \
ln -v "${FFMPEG_PREFIX_FILE}"*-"${FFMPEG_ARCH}"-*"${FFMPEG_SUFFIX_FILE}" "/verified/${TARGETARCH}/" ; \
rm -rf "${DESTDIR}" ;
FROM alpine:${ALPINE_VERSION} AS ffmpeg-extracted
COPY --from=ffmpeg-download /verified /verified
ARG FFMPEG_PREFIX_FILE
ARG FFMPEG_SUFFIX_FILE
ARG TARGETARCH
RUN set -eux ; \
mkdir -v /extracted ; \
cd /extracted ; \
ln -s "/verified/${TARGETARCH}"/"${FFMPEG_PREFIX_FILE}"*"${FFMPEG_SUFFIX_FILE}" "/tmp/ffmpeg${FFMPEG_SUFFIX_FILE}" ; \
tar -tf "/tmp/ffmpeg${FFMPEG_SUFFIX_FILE}" | grep '/bin/\(ffmpeg\|ffprobe\)' > /tmp/files ; \
tar -xop \
--strip-components=2 \
-f "/tmp/ffmpeg${FFMPEG_SUFFIX_FILE}" \
-T /tmp/files ; \
\
ls -AlR /extracted ;
FROM scratch AS ffmpeg
COPY --from=ffmpeg-extracted /extracted /usr/local/bin/
FROM alpine:${ALPINE_VERSION} AS s6-overlay-download
ARG S6_VERSION
ARG SHA256_S6_AMD64
ARG SHA256_S6_ARM64
ARG SHA256_S6_NOARCH
ARG DESTDIR="/downloaded"
ARG S6_CHECKSUM_ALGORITHM
ARG CHECKSUM_ALGORITHM="${S6_CHECKSUM_ALGORITHM}"
ARG S6_CHECKSUM_AMD64="${CHECKSUM_ALGORITHM}:${SHA256_S6_AMD64}"
ARG S6_CHECKSUM_ARM64="${CHECKSUM_ALGORITHM}:${SHA256_S6_ARM64}"
ARG S6_CHECKSUM_NOARCH="${CHECKSUM_ALGORITHM}:${SHA256_S6_NOARCH}"
ARG S6_OVERLAY_URL="https://github.com/just-containers/s6-overlay/releases/download/v${S6_VERSION}"
ARG S6_PREFIX_FILE="s6-overlay-"
ARG S6_SUFFIX_FILE=".tar.xz"
ARG S6_FILE_AMD64="${S6_PREFIX_FILE}x86_64${S6_SUFFIX_FILE}"
ARG S6_FILE_ARM64="${S6_PREFIX_FILE}aarch64${S6_SUFFIX_FILE}"
ARG S6_FILE_NOARCH="${S6_PREFIX_FILE}noarch${S6_SUFFIX_FILE}"
ADD "${S6_OVERLAY_URL}/${S6_FILE_AMD64}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/"
ADD "${S6_OVERLAY_URL}/${S6_FILE_ARM64}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/"
ADD "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}.${CHECKSUM_ALGORITHM}" "${DESTDIR}/"
##ADD --checksum="${S6_CHECKSUM_AMD64}" "${S6_OVERLAY_URL}/${S6_FILE_AMD64}" "${DESTDIR}/"
##ADD --checksum="${S6_CHECKSUM_ARM64}" "${S6_OVERLAY_URL}/${S6_FILE_ARM64}" "${DESTDIR}/"
##ADD --checksum="${S6_CHECKSUM_NOARCH}" "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}" "${DESTDIR}/"
# --checksum wasn't recognized, so use busybox to check the sums instead
ADD "${S6_OVERLAY_URL}/${S6_FILE_AMD64}" "${DESTDIR}/"
RUN set -eu ; checksum="${S6_CHECKSUM_AMD64}" ; file="${S6_FILE_AMD64}" ; cd "${DESTDIR}/" && \
printf -- '%s *%s\n' "$(printf -- '%s' "${checksum}" | cut -d : -f 2-)" "${file}" | "${CHECKSUM_ALGORITHM}sum" -cw
ADD "${S6_OVERLAY_URL}/${S6_FILE_ARM64}" "${DESTDIR}/"
RUN set -eu ; checksum="${S6_CHECKSUM_ARM64}" ; file="${S6_FILE_ARM64}" ; cd "${DESTDIR}/" && \
printf -- '%s *%s\n' "$(printf -- '%s' "${checksum}" | cut -d : -f 2-)" "${file}" | "${CHECKSUM_ALGORITHM}sum" -cw
ADD "${S6_OVERLAY_URL}/${S6_FILE_NOARCH}" "${DESTDIR}/"
RUN set -eu ; checksum="${S6_CHECKSUM_NOARCH}" ; file="${S6_FILE_NOARCH}" ; cd "${DESTDIR}/" && \
printf -- '%s *%s\n' "$(printf -- '%s' "${checksum}" | cut -d : -f 2-)" "${file}" | "${CHECKSUM_ALGORITHM}sum" -cw
FROM alpine:${ALPINE_VERSION} AS s6-overlay-extracted
COPY --from=s6-overlay-download /downloaded /downloaded
ARG S6_CHECKSUM_ALGORITHM
ARG CHECKSUM_ALGORITHM="${S6_CHECKSUM_ALGORITHM}"
ARG TARGETARCH
RUN set -eu ; \
\
decide_arch() { \
local arg1 ; \
arg1="${1:-$(uname -m)}" ; \
\
case "${arg1}" in \
(amd64) printf -- 'x86_64' ;; \
(arm64) printf -- 'aarch64' ;; \
(armv7l) printf -- 'arm' ;; \
(*) printf -- '%s' "${arg1}" ;; \
esac ; \
unset -v arg1 ; \
} ; \
\
apk --no-cache --no-progress add "cmd:${CHECKSUM_ALGORITHM}sum" ; \
mkdir -v /verified ; \
cd /downloaded ; \
for f in *.sha256 ; \
do \
"${CHECKSUM_ALGORITHM}sum" --check --warn --strict "${f}" || exit ; \
ln -v "${f%.sha256}" /verified/ || exit ; \
done ; \
unset -v f ; \
\
S6_ARCH="$(decide_arch "${TARGETARCH}")" ; \
set -x ; \
mkdir -v /s6-overlay-rootfs ; \
cd /s6-overlay-rootfs ; \
for f in /verified/*.tar* ; \
do \
case "${f}" in \
(*-noarch.tar*|*-"${S6_ARCH}".tar*) \
tar -xpf "${f}" || exit ;; \
esac ; \
done ; \
set +x ; \
unset -v f ;
FROM scratch AS s6-overlay
COPY --from=s6-overlay-extracted /s6-overlay-rootfs /
FROM debian:bookworm-slim AS tubesync
ARG TARGETARCH
ARG TARGETPLATFORM
ARG S6_VERSION
ARG FFMPEG_DATE
ARG FFMPEG_VERSION
ENV S6_VERSION="${S6_VERSION}" \
FFMPEG_DATE="${FFMPEG_DATE}" \
@ -29,89 +240,20 @@ ENV DEBIAN_FRONTEND="noninteractive" \
S6_CMD_WAIT_FOR_SERVICES_MAXTIME="0"
# Install third party software
COPY --from=s6-overlay / /
COPY --from=ffmpeg /usr/local/bin/ /usr/local/bin/
# Reminder: the SHELL handles all variables
RUN decide_arch() { \
case "${TARGETARCH:=amd64}" in \
(arm64) printf -- 'aarch64' ;; \
(*) printf -- '%s' "${TARGETARCH}" ;; \
esac ; \
} && \
decide_expected() { \
case "${1}" in \
(ffmpeg) case "${2}" in \
(amd64) printf -- '%s' "${SHA256_FFMPEG_AMD64}" ;; \
(arm64) printf -- '%s' "${SHA256_FFMPEG_ARM64}" ;; \
esac ;; \
(s6) case "${2}" in \
(amd64) printf -- '%s' "${SHA256_S6_AMD64}" ;; \
(arm64) printf -- '%s' "${SHA256_S6_ARM64}" ;; \
(noarch) printf -- '%s' "${SHA256_S6_NOARCH}" ;; \
esac ;; \
esac ; \
} && \
decide_url() { \
case "${1}" in \
(ffmpeg) printf -- \
'https://github.com/yt-dlp/FFmpeg-Builds/releases/download/%s/ffmpeg-%s-linux%s-gpl%s.tar.xz' \
"${FFMPEG_DATE}" \
"${FFMPEG_VERSION}" \
"$(case "${2}" in \
(amd64) printf -- '64' ;; \
(*) printf -- '%s' "${2}" ;; \
esac)" \
"$(case "${FFMPEG_VERSION%%-*}" in \
(n*) printf -- '-%s\n' "${FFMPEG_VERSION#n}" | cut -d '-' -f 1,2 ;; \
(*) printf -- '' ;; \
esac)" ;; \
(s6) printf -- \
'https://github.com/just-containers/s6-overlay/releases/download/v%s/s6-overlay-%s.tar.xz' \
"${S6_VERSION}" \
"$(case "${2}" in \
(amd64) printf -- 'x86_64' ;; \
(arm64) printf -- 'aarch64' ;; \
(*) printf -- '%s' "${2}" ;; \
esac)" ;; \
esac ; \
} && \
verify_download() { \
while [ $# -ge 2 ] ; do \
sha256sum "${2}" ; \
printf -- '%s %s\n' "${1}" "${2}" | sha256sum -c || return ; \
shift ; shift ; \
done ; \
} && \
download_expected_file() { \
local arg1 expected file url ; \
arg1="$(printf -- '%s\n' "${1}" | awk '{print toupper($0);}')" ; \
expected="$(decide_expected "${1}" "${2}")" ; \
file="${3}" ; \
url="$(decide_url "${1}" "${2}")" ; \
printf -- '%s\n' \
"Building for arch: ${2}|${ARCH}, downloading ${arg1} from: ${url}, expecting ${arg1} SHA256: ${expected}" && \
rm -rf "${file}" && \
curl --disable --output "${file}" --clobber --location --no-progress-meter --url "${url}" && \
verify_download "${expected}" "${file}" ; \
} && \
export ARCH="$(decide_arch)" && \
set -x && \
RUN set -x && \
apt-get update && \
apt-get -y --no-install-recommends install locales && \
printf -- "en_US.UTF-8 UTF-8\n" > /etc/locale.gen && \
locale-gen en_US.UTF-8 && \
# Install required distro packages
apt-get -y --no-install-recommends install curl ca-certificates file binutils xz-utils && \
# Install s6
_file="/tmp/s6-overlay-noarch.tar.xz" && \
download_expected_file s6 noarch "${_file}" && \
tar -C / -xpf "${_file}" && rm -f "${_file}" && \
_file="/tmp/s6-overlay-${ARCH}.tar.xz" && \
download_expected_file s6 "${TARGETARCH}" "${_file}" && \
tar -C / -xpf "${_file}" && rm -f "${_file}" && \
# Installed s6 (using COPY earlier)
file -L /command/s6-overlay-suexec && \
# Install ffmpeg
_file="/tmp/ffmpeg-${ARCH}.tar.xz" && \
download_expected_file ffmpeg "${TARGETARCH}" "${_file}" && \
tar -xvvpf "${_file}" --strip-components=2 --no-anchored -C /usr/local/bin/ "ffmpeg" "ffprobe" && rm -f "${_file}" && \
# Installed ffmpeg (using COPY earlier)
/usr/local/bin/ffmpeg -version && \
file /usr/local/bin/ff* && \
# Clean up
@ -157,7 +299,9 @@ ENV PIP_NO_COMPILE=1 \
WORKDIR /app
# Set up the app
RUN set -x && \
#BuildKit#RUN --mount=type=bind,source=Pipfile,target=/app/Pipfile \
RUN \
set -x && \
apt-get update && \
# Install required build packages
apt-get -y --no-install-recommends install \

View File

@ -19,7 +19,7 @@ from common.utils import clean_filename, clean_emoji
from .youtube import (get_media_info as get_youtube_media_info,
download_media as download_youtube_media,
get_channel_image_info as get_youtube_channel_image_info)
from .utils import seconds_to_timestr, parse_media_format
from .utils import seconds_to_timestr, parse_media_format, filter_response
from .matching import (get_best_combined_format, get_best_audio_format,
get_best_video_format)
from .mediaservers import PlexMediaServer
@ -589,6 +589,7 @@ class Source(models.Model):
'key': 'SoMeUnIqUiD',
'format': '-'.join(fmt),
'playlist_title': 'Some Playlist Title',
'video_order': '01',
'ext': self.extension,
'resolution': self.source_resolution if self.source_resolution else '',
'height': '720' if self.source_resolution else '',
@ -1128,6 +1129,7 @@ class Media(models.Model):
'key': self.key,
'format': '-'.join(display_format['format']),
'playlist_title': self.playlist_title,
'video_order': self.get_episode_str(True),
'ext': self.source.extension,
'resolution': display_format['resolution'],
'height': display_format['height'],
@ -1143,8 +1145,39 @@ class Media(models.Model):
def has_metadata(self):
return self.metadata is not None
@property
def reduce_data(self):
try:
from common.logger import log
from common.utils import json_serial
old_mdl = len(self.metadata or "")
data = json.loads(self.metadata or "")
compact_json = json.dumps(data, separators=(',', ':'), default=json_serial)
filtered_data = filter_response(data, True)
filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial)
except Exception as e:
log.exception('reduce_data: %s', e)
else:
# log the results of filtering / compacting on metadata size
new_mdl = len(compact_json)
if old_mdl > new_mdl:
delta = old_mdl - new_mdl
log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})')
new_mdl = len(filtered_json)
if old_mdl > new_mdl:
delta = old_mdl - new_mdl
log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})')
if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False):
self.metadata = filtered_json
@property
def loaded_metadata(self):
if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False):
self.reduce_data
try:
data = json.loads(self.metadata)
if not isinstance(data, dict):
@ -1263,8 +1296,7 @@ class Media(models.Model):
@property
def directory_path(self):
dirname = self.source.directory_path / self.filename
return dirname.parent
return self.filepath.parent
@property
def filepath(self):
@ -1373,8 +1405,7 @@ class Media(models.Model):
nfo.append(season)
# episode = number of video in the year
episode = nfo.makeelement('episode', {})
episode_number = self.calculate_episode_number()
episode.text = str(episode_number) if episode_number else ''
episode.text = self.get_episode_str()
episode.tail = '\n '
nfo.append(episode)
# ratings = media metadata youtube rating
@ -1524,6 +1555,16 @@ class Media(models.Model):
return position_counter
position_counter += 1
def get_episode_str(self, use_padding=False):
episode_number = self.calculate_episode_number()
if not episode_number:
return ''
if use_padding:
return f'{episode_number:02}'
return str(episode_number)
class MediaServer(models.Model):
'''

View File

@ -26,7 +26,7 @@ from common.errors import NoMediaException, DownloadFailedException
from common.utils import json_serial
from .models import Source, Media, MediaServer
from .utils import (get_remote_image, resize_image_to_height, delete_file,
write_text_file)
write_text_file, filter_response)
from .filtering import filter_media
@ -304,7 +304,10 @@ def download_media_metadata(media_id):
return
source = media.source
metadata = media.index_metadata()
media.metadata = json.dumps(metadata, default=json_serial)
response = metadata
if getattr(settings, 'SHRINK_NEW_MEDIA_METADATA', False):
response = filter_response(metadata, True)
media.metadata = json.dumps(response, separators=(',', ':'), default=json_serial)
upload_date = media.upload_date
# Media must have a valid upload date
if upload_date:

View File

@ -73,6 +73,11 @@
<td>Playlist title of media, if it's in a playlist</td>
<td>Some Playlist</td>
</tr>
<tr>
<td>{video_order}</td>
<td>Episode order in playlist, if in playlist <sub><sup>(can cause issues if playlist is changed after adding)</sup></sub></td>
<td>01</td>
</tr>
<tr>
<td>{ext}</td>
<td>File extension</td>

View File

@ -18,6 +18,7 @@ from background_task.models import Task
from .models import Source, Media
from .tasks import cleanup_old_media
from .filtering import filter_media
from .utils import filter_response
class FrontEndTestCase(TestCase):
@ -1709,6 +1710,84 @@ class FormatMatchingTestCase(TestCase):
f'expected {expected_match_result}')
class ResponseFilteringTestCase(TestCase):
def setUp(self):
# Disable general logging for test case
logging.disable(logging.CRITICAL)
# Add a test source
self.source = Source.objects.create(
source_type=Source.SOURCE_TYPE_YOUTUBE_CHANNEL,
key='testkey',
name='testname',
directory='testdirectory',
index_schedule=3600,
delete_old_media=False,
days_to_keep=14,
source_resolution=Source.SOURCE_RESOLUTION_1080P,
source_vcodec=Source.SOURCE_VCODEC_VP9,
source_acodec=Source.SOURCE_ACODEC_OPUS,
prefer_60fps=False,
prefer_hdr=False,
fallback=Source.FALLBACK_FAIL
)
# Add some media
self.media = Media.objects.create(
key='mediakey',
source=self.source,
metadata='{}'
)
def test_metadata_20230629(self):
self.media.metadata = all_test_metadata['20230629']
self.media.save()
unfiltered = self.media.loaded_metadata
filtered = filter_response(self.media.loaded_metadata)
self.assertIn('formats', unfiltered.keys())
self.assertIn('formats', filtered.keys())
# filtered 'downloader_options'
self.assertIn('downloader_options', unfiltered['formats'][10].keys())
self.assertNotIn('downloader_options', filtered['formats'][10].keys())
# filtered 'http_headers'
self.assertIn('http_headers', unfiltered['formats'][0].keys())
self.assertNotIn('http_headers', filtered['formats'][0].keys())
# did not lose any formats
self.assertEqual(48, len(unfiltered['formats']))
self.assertEqual(48, len(filtered['formats']))
self.assertEqual(len(unfiltered['formats']), len(filtered['formats']))
# did not remove everything with url
self.assertIn('original_url', unfiltered.keys())
self.assertIn('original_url', filtered.keys())
self.assertEqual(unfiltered['original_url'], filtered['original_url'])
# did reduce the size of the metadata
self.assertTrue(len(str(filtered)) < len(str(unfiltered)))
url_keys = []
for format in unfiltered['formats']:
for key in format.keys():
if 'url' in key:
url_keys.append((format['format_id'], key, format[key],))
unfiltered_url_keys = url_keys
self.assertEqual(63, len(unfiltered_url_keys), msg=str(unfiltered_url_keys))
url_keys = []
for format in filtered['formats']:
for key in format.keys():
if 'url' in key:
url_keys.append((format['format_id'], key, format[key],))
filtered_url_keys = url_keys
self.assertEqual(3, len(filtered_url_keys), msg=str(filtered_url_keys))
url_keys = []
for lang_code, captions in filtered['automatic_captions'].items():
for caption in captions:
for key in caption.keys():
if 'url' in key:
url_keys.append((lang_code, caption['ext'], caption[key],))
self.assertEqual(0, len(url_keys), msg=str(url_keys))
class TasksTestCase(TestCase):
def setUp(self):

View File

@ -1,6 +1,7 @@
import os
import re
import math
from copy import deepcopy
from operator import itemgetter
from pathlib import Path
from tempfile import NamedTemporaryFile
@ -171,6 +172,95 @@ def normalize_codec(codec_str):
return result
def _url_keys(arg_dict, filter_func):
result = {}
for key in arg_dict.keys():
if 'url' in key:
result.update(
{key: filter_func(key=key, url=arg_dict[key])}
)
return result
def _drop_url_keys(arg_dict, key, filter_func):
if key in arg_dict.keys():
for val_dict in arg_dict[key]:
for url_key, remove in _url_keys(val_dict, filter_func).items():
if remove is True:
del val_dict[url_key]
def filter_response(arg_dict, copy_arg=False):
'''
Clean up the response so as to not store useless metadata in the database.
'''
response_dict = arg_dict
# raise an exception for an unexpected argument type
if not isinstance(response_dict, dict):
raise TypeError(f'response_dict must be a dict, got "{type(response_dict)}"')
if copy_arg:
response_dict = deepcopy(arg_dict)
# optimize the empty case
if not response_dict:
return response_dict
# beginning of formats cleanup {{{
# drop urls that expire, or restrict IPs
def drop_format_url(**kwargs):
url = kwargs['url']
return (
url
and '://' in url
and (
'/ip/' in url
or 'ip=' in url
or '/expire/' in url
or 'expire=' in url
)
)
# these format keys are not useful to us
drop_keys = frozenset((
'downloader_options',
'fragments',
'http_headers',
'__needs_testing',
'__working',
))
for key in frozenset(('formats', 'requested_formats',)):
_drop_url_keys(response_dict, key, drop_format_url)
if key in response_dict.keys():
for format in response_dict[key]:
for drop_key in drop_keys:
if drop_key in format.keys():
del format[drop_key]
# end of formats cleanup }}}
# beginning of subtitles cleanup {{{
# drop urls that expire
def drop_subtitles_url(**kwargs):
url = kwargs['url']
return (
url
and '://' in url
and (
'/expire/' in url
or '&expire=' in url
)
)
for key in frozenset(('subtitles', 'automatic_captions',)):
if key in response_dict.keys():
key_dict = response_dict[key]
for lang_code in key_dict:
_drop_url_keys(key_dict, lang_code, drop_subtitles_url)
# end of subtitles cleanup }}}
return response_dict
def parse_media_format(format_dict):
'''
This parser primarily adapts the format dict returned by youtube-dl into a