mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-09-23 23:30:11 +00:00
[ie/googledrive] Fix subtitles extraction (#14139)
Authored by: zakaryan2004
This commit is contained in:
@@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
get_element_html_by_id,
|
get_element_html_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
parse_qs,
|
||||||
try_get,
|
try_get,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
@@ -111,14 +112,18 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
self._caption_formats_ext.append(f.attrib['fmt_code'])
|
self._caption_formats_ext.append(f.attrib['fmt_code'])
|
||||||
|
|
||||||
def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
|
def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
|
||||||
origin_lang_code=None):
|
origin_lang_code=None, origin_lang_name=None):
|
||||||
if not subtitles_id or not caption_type:
|
if not subtitles_id or not caption_type:
|
||||||
return
|
return
|
||||||
captions = {}
|
captions = {}
|
||||||
for caption_entry in self._captions_xml.findall(
|
for caption_entry in self._captions_xml.findall(
|
||||||
self._CAPTIONS_ENTRY_TAG[caption_type]):
|
self._CAPTIONS_ENTRY_TAG[caption_type]):
|
||||||
caption_lang_code = caption_entry.attrib.get('lang_code')
|
caption_lang_code = caption_entry.attrib.get('lang_code')
|
||||||
if not caption_lang_code:
|
caption_name = caption_entry.attrib.get('name') or origin_lang_name
|
||||||
|
if not caption_lang_code or not caption_name:
|
||||||
|
self.report_warning(f'Missing necessary caption metadata. '
|
||||||
|
f'Need lang_code and name attributes. '
|
||||||
|
f'Found: {caption_entry.attrib}')
|
||||||
continue
|
continue
|
||||||
caption_format_data = []
|
caption_format_data = []
|
||||||
for caption_format in self._caption_formats_ext:
|
for caption_format in self._caption_formats_ext:
|
||||||
@@ -129,7 +134,7 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
'lang': (caption_lang_code if origin_lang_code is None
|
'lang': (caption_lang_code if origin_lang_code is None
|
||||||
else origin_lang_code),
|
else origin_lang_code),
|
||||||
'type': 'track',
|
'type': 'track',
|
||||||
'name': '',
|
'name': caption_name,
|
||||||
'kind': '',
|
'kind': '',
|
||||||
}
|
}
|
||||||
if origin_lang_code is not None:
|
if origin_lang_code is not None:
|
||||||
@@ -155,14 +160,15 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
||||||
if not self._captions_xml:
|
if not self._captions_xml:
|
||||||
return
|
return
|
||||||
track = self._captions_xml.find('track')
|
track = next((t for t in self._captions_xml.findall('track') if t.attrib.get('cantran') == 'true'), None)
|
||||||
if track is None:
|
if track is None:
|
||||||
return
|
return
|
||||||
origin_lang_code = track.attrib.get('lang_code')
|
origin_lang_code = track.attrib.get('lang_code')
|
||||||
if not origin_lang_code:
|
origin_lang_name = track.attrib.get('name')
|
||||||
|
if not origin_lang_code or not origin_lang_name:
|
||||||
return
|
return
|
||||||
return self._get_captions_by_type(
|
return self._get_captions_by_type(
|
||||||
video_id, subtitles_id, 'automatic_captions', origin_lang_code)
|
video_id, subtitles_id, 'automatic_captions', origin_lang_code, origin_lang_name)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -268,10 +274,8 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
subtitles_id = None
|
subtitles_id = None
|
||||||
ttsurl = get_value('ttsurl')
|
ttsurl = get_value('ttsurl')
|
||||||
if ttsurl:
|
if ttsurl:
|
||||||
# the video Id for subtitles will be the last value in the ttsurl
|
# the subtitles ID is the vid param of the ttsurl query
|
||||||
# query string
|
subtitles_id = parse_qs(ttsurl).get('vid', [None])[-1]
|
||||||
subtitles_id = ttsurl.encode().decode(
|
|
||||||
'unicode_escape').split('=')[-1]
|
|
||||||
|
|
||||||
self.cookiejar.clear(domain='.google.com', path='/', name='NID')
|
self.cookiejar.clear(domain='.google.com', path='/', name='NID')
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user