Files
yt-dlp/yt_dlp/extractor/onsen.py
doe1080 17bfaa53ed
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Download Tests / Quick Download Tests (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.10) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.11) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.12) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.13) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.14-dev) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.11) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, 3.9) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, pypy-3.11) (push) Has been cancelled
Quick Test / Core Test (push) Has been cancelled
Quick Test / Code check (push) Has been cancelled
Release (master) / release (push) Has been cancelled
Release (master) / publish_pypi (push) Has been cancelled
[ie/onsen] Add extractor (#10971)
Closes #10902
Authored by: doe1080
2025-09-11 22:51:31 +00:00

152 lines
6.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import base64
import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
parse_qs,
str_or_none,
strftime_or_none,
update_url,
update_url_query,
url_or_none,
)
from ..utils.traversal import traverse_obj
class OnsenIE(InfoExtractor):
IE_NAME = 'onsen'
IE_DESC = 'インターネットラジオステーション<音泉>'
_BASE_URL = 'https://www.onsen.ag'
_HEADERS = {'Referer': f'{_BASE_URL}/'}
_NETRC_MACHINE = 'onsen'
_VALID_URL = r'https?://(?:(?:share|www)\.)onsen\.ag/program/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://share.onsen.ag/program/onsenking?p=90&c=MTA0NjI',
'info_dict': {
'id': '10462',
'ext': 'm4a',
'title': '第SP回',
'cast': 'count:3',
'description': 'md5:de62c80a41c4c8d84da53a1ee681ad18',
'display_id': 'MTA0NjI=',
'media_type': 'sound',
'section_start': 0,
'series': '音泉キング「下野紘」のラジオ きみはもちろん、<音泉>ファミリーだよね?',
'series_id': 'onsenking',
'tags': 'count:2',
'thumbnail': r're:https?://d3bzklg4lms4gh\.cloudfront\.net/program_info/image/default/production/.+',
'upload_date': '20220627',
'webpage_url': 'https://www.onsen.ag/program/onsenking?c=MTA0NjI=',
},
}, {
'url': 'https://share.onsen.ag/program/girls-band-cry-radio?p=370&c=MTgwMDE',
'info_dict': {
'id': '18001',
'ext': 'mp4',
'title': '第4回',
'cast': 'count:5',
'description': 'md5:bbca8a389d99c90cbbce8f383c85fedd',
'display_id': 'MTgwMDE=',
'media_type': 'movie',
'section_start': 0,
'series': 'TVアニメ『ガールズバンドクライ』WEBラジオ「ガールズバンドクライラジオにも全部ぶち込め。',
'series_id': 'girls-band-cry-radio',
'tags': 'count:3',
'thumbnail': r're:https?://d3bzklg4lms4gh\.cloudfront\.net/program_info/image/default/production/.+',
'upload_date': '20240425',
'webpage_url': 'https://www.onsen.ag/program/girls-band-cry-radio?c=MTgwMDE=',
},
'skip': 'Only available for premium supporters',
}, {
'url': 'https://www.onsen.ag/program/uma',
'info_dict': {
'id': 'uma',
'title': 'UMA YELL RADIO',
},
'playlist_mincount': 35,
}]
@staticmethod
def _get_encoded_id(program):
return base64.urlsafe_b64encode(str(program['id']).encode()).decode()
def _perform_login(self, username, password):
sign_in = self._download_json(
f'{self._BASE_URL}/web_api/signin', None, 'Logging in', headers={
'Accept': 'application/json',
'Content-Type': 'application/json',
}, data=json.dumps({
'session': {
'email': username,
'password': password,
},
}).encode(), expected_status=401)
if sign_in.get('error'):
raise ExtractorError('Invalid username or password', expected=True)
def _real_extract(self, url):
program_id = self._match_id(url)
try:
programs = self._download_json(
f'{self._BASE_URL}/web_api/programs/{program_id}', program_id)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
raise ExtractorError('Invalid URL', expected=True)
raise
query = {k: v[-1] for k, v in parse_qs(url).items() if v}
if 'c' not in query:
entries = [
self.url_result(update_url_query(url, {'c': self._get_encoded_id(program)}), OnsenIE)
for program in traverse_obj(programs, ('contents', lambda _, v: v['id']))
]
return self.playlist_result(
entries, program_id, traverse_obj(programs, ('program_info', 'title', {clean_html})))
raw_id = base64.urlsafe_b64decode(f'{query["c"]}===').decode()
p_keys = ('contents', lambda _, v: v['id'] == int(raw_id))
program = traverse_obj(programs, (*p_keys, any))
if not program:
raise ExtractorError(
'This program is no longer available', expected=True)
m3u8_url = traverse_obj(program, ('streaming_url', {url_or_none}))
if not m3u8_url:
self.raise_login_required(
'This program is only available for premium supporters')
display_id = self._get_encoded_id(program)
date_str = self._search_regex(
rf'{program_id}0?(\d{{6}})', m3u8_url, 'date string', default=None)
return {
'display_id': display_id,
'formats': self._extract_m3u8_formats(m3u8_url, raw_id, headers=self._HEADERS),
'http_headers': self._HEADERS,
'section_start': int_or_none(query.get('t', 0)),
'upload_date': strftime_or_none(f'20{date_str}'),
'webpage_url': f'{self._BASE_URL}/program/{program_id}?c={display_id}',
**traverse_obj(program, {
'id': ('id', {int}, {str_or_none}),
'title': ('title', {clean_html}),
'media_type': ('media_type', {str}),
'thumbnail': ('poster_image_url', {url_or_none}, {update_url(query=None)}),
}),
**traverse_obj(programs, {
'cast': (('performers', (*p_keys, 'guests')), ..., 'name', {str}, filter),
'series_id': ('directory_name', {str}),
}),
**traverse_obj(programs, ('program_info', {
'description': ('description', {clean_html}, filter),
'series': ('title', {clean_html}),
'tags': ('hashtag_list', ..., {str}, filter),
})),
}