mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-08-02 18:30:27 +00:00
[ie/archive.org] Fix metadata extraction (#13880)
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Download Tests / Quick Download Tests (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.10) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.11) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.12) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.13) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.11) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, 3.9) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, pypy-3.11) (push) Has been cancelled
Quick Test / Core Test (push) Has been cancelled
Quick Test / Code check (push) Has been cancelled
Release (master) / release (push) Has been cancelled
Release (master) / publish_pypi (push) Has been cancelled
Release (nightly) / check_nightly (push) Has been cancelled
Release (nightly) / release (push) Has been cancelled
Release (nightly) / publish_pypi (push) Has been cancelled
Some checks failed
CodeQL / Analyze (python) (push) Has been cancelled
Download Tests / Quick Download Tests (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.10) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.11) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.12) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, 3.13) (push) Has been cancelled
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.11) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, 3.9) (push) Has been cancelled
Download Tests / Full Download Tests (windows-latest, pypy-3.11) (push) Has been cancelled
Quick Test / Core Test (push) Has been cancelled
Quick Test / Code check (push) Has been cancelled
Release (master) / release (push) Has been cancelled
Release (master) / publish_pypi (push) Has been cancelled
Release (nightly) / check_nightly (push) Has been cancelled
Release (nightly) / release (push) Has been cancelled
Release (nightly) / publish_pypi (push) Has been cancelled
Closes #13881 Authored by: bashonly
This commit is contained in:
@@ -33,7 +33,6 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@@ -232,6 +231,23 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'release_date': '19950402',
|
||||
'timestamp': 1084927901,
|
||||
},
|
||||
}, {
|
||||
# metadata['metadata']['description'] is a list of strings instead of str
|
||||
'url': 'https://archive.org/details/pra-KZ1908.02',
|
||||
'info_dict': {
|
||||
'id': 'pra-KZ1908.02',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'KZ1908.02_01.wav',
|
||||
'title': 'Crips and Bloods speak about gang life',
|
||||
'description': 'md5:2b56b35ff021311e3554b47a285e70b3',
|
||||
'uploader': 'jake@archive.org',
|
||||
'duration': 1733.74,
|
||||
'track': 'KZ1908.02 01',
|
||||
'track_number': 1,
|
||||
'timestamp': 1336026026,
|
||||
'upload_date': '20120503',
|
||||
'release_year': 1992,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -274,34 +290,40 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
m = metadata['metadata']
|
||||
identifier = m['identifier']
|
||||
|
||||
info = {
|
||||
info = traverse_obj(m, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
|
||||
'uploader': (('uploader', 'adder'), {str}, any),
|
||||
'creators': ('creator', (None, ...), {str}, filter, all, filter),
|
||||
'license': ('licenseurl', {url_or_none}),
|
||||
'release_date': ('date', {unified_strdate}),
|
||||
'timestamp': (('publicdate', 'addeddate'), {unified_timestamp}, any),
|
||||
'location': ('venue', {str}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
})
|
||||
info.update({
|
||||
'id': identifier,
|
||||
'title': m['title'],
|
||||
'description': clean_html(m.get('description')),
|
||||
'uploader': dict_get(m, ['uploader', 'adder']),
|
||||
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'license': m.get('licenseurl'),
|
||||
'release_date': unified_strdate(m.get('date')),
|
||||
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
|
||||
'webpage_url': f'https://archive.org/details/{identifier}',
|
||||
'location': m.get('venue'),
|
||||
'release_year': int_or_none(m.get('year'))}
|
||||
})
|
||||
|
||||
for f in metadata['files']:
|
||||
if f['name'] in entries:
|
||||
entries[f['name']] = merge_dicts(entries[f['name']], {
|
||||
'id': identifier + '/' + f['name'],
|
||||
'title': f.get('title') or f['name'],
|
||||
'display_id': f['name'],
|
||||
'description': clean_html(f.get('description')),
|
||||
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'duration': parse_duration(f.get('length')),
|
||||
'track_number': int_or_none(f.get('track')),
|
||||
'album': f.get('album'),
|
||||
'discnumber': int_or_none(f.get('disc')),
|
||||
'release_year': int_or_none(f.get('year'))})
|
||||
**traverse_obj(f, {
|
||||
'title': (('title', 'name'), {str}, any),
|
||||
'display_id': ('name', {str}),
|
||||
'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
|
||||
'creators': ('creator', (None, ...), {str}, filter, all, filter),
|
||||
'duration': ('length', {parse_duration}),
|
||||
'track_number': ('track', {int_or_none}),
|
||||
'album': ('album', {str}),
|
||||
'discnumber': ('disc', {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
entry = entries[f['name']]
|
||||
elif traverse_obj(f, 'original', expected_type=str) in entries:
|
||||
elif traverse_obj(f, ('original', {str})) in entries:
|
||||
entry = entries[f['original']]
|
||||
else:
|
||||
continue
|
||||
|
Reference in New Issue
Block a user