mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:40:45 +00:00 
			
		
		
		
	[ie/cda] Fix age-gated web extraction (#9939)
Closes #5980, Closes #6638 Authored by: Podiumnoche, Szpachlarz, dirkf, emqi
This commit is contained in:
		| @@ -16,7 +16,6 @@ from ..utils import ( | ||||
|     merge_dicts, | ||||
|     multipart_encode, | ||||
|     parse_duration, | ||||
|     random_birthday, | ||||
|     traverse_obj, | ||||
|     try_call, | ||||
|     try_get, | ||||
| @@ -63,38 +62,57 @@ class CDAIE(InfoExtractor): | ||||
|             'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'crash404', | ||||
|             'view_count': int, | ||||
|             'average_rating': float, | ||||
|             'duration': 137, | ||||
|             'age_limit': 0, | ||||
|             'upload_date': '20160220', | ||||
|             'timestamp': 1455968218, | ||||
|         } | ||||
|     }, { | ||||
|         # Age-restricted | ||||
|         'url': 'http://www.cda.pl/video/1273454c4', | ||||
|         # Age-restricted with vfilm redirection | ||||
|         'url': 'https://www.cda.pl/video/8753244c4', | ||||
|         'md5': 'd8eeb83d63611289507010d3df3bb8b3', | ||||
|         'info_dict': { | ||||
|             'id': '1273454c4', | ||||
|             'id': '8753244c4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bronson (2008) napisy HD 1080p', | ||||
|             'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', | ||||
|             'title': '[18+] Bez Filtra: Rezerwowe Psy czyli...  najwulgarniejsza polska gra?', | ||||
|             'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e', | ||||
|             'height': 1080, | ||||
|             'uploader': 'boniek61', | ||||
|             'uploader': 'arhn eu', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 5554, | ||||
|             'duration': 991, | ||||
|             'age_limit': 18, | ||||
|             'view_count': int, | ||||
|             'average_rating': float, | ||||
|         }, | ||||
|             'timestamp': 1633888264, | ||||
|             'upload_date': '20211010', | ||||
|         } | ||||
|     }, { | ||||
|         # Age-restricted without vfilm redirection | ||||
|         'url': 'https://www.cda.pl/video/17028157b8', | ||||
|         'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992', | ||||
|         'info_dict': { | ||||
|             'id': '17028157b8', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'STENDUPY MICHAŁ OGIŃSKI', | ||||
|             'description': 'md5:5851f3272bfc31f762d616040a1d609a', | ||||
|             'height': 480, | ||||
|             'uploader': 'oginski', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 18855, | ||||
|             'age_limit': 18, | ||||
|             'average_rating': float, | ||||
|             'timestamp': 1699705901, | ||||
|             'upload_date': '20231111', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://ebd.cda.pl/0x0/5749950c', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     def _download_age_confirm_page(self, url, video_id, *args, **kwargs): | ||||
|         form_data = random_birthday('rok', 'miesiac', 'dzien') | ||||
|         form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) | ||||
|         data, content_type = multipart_encode(form_data) | ||||
|         data, content_type = multipart_encode({'age_confirm': ''}) | ||||
|         return self._download_webpage( | ||||
|             urljoin(url, '/a/validatebirth'), video_id, *args, | ||||
|             url, video_id, *args, | ||||
|             data=data, headers={ | ||||
|                 'Referer': url, | ||||
|                 'Content-Type': content_type, | ||||
| @@ -164,7 +182,7 @@ class CDAIE(InfoExtractor): | ||||
|         if 'Authorization' in self._API_HEADERS: | ||||
|             return self._api_extract(video_id) | ||||
|         else: | ||||
|             return self._web_extract(video_id, url) | ||||
|             return self._web_extract(video_id) | ||||
| 
 | ||||
|     def _api_extract(self, video_id): | ||||
|         meta = self._download_json( | ||||
| @@ -197,9 +215,9 @@ class CDAIE(InfoExtractor): | ||||
|             'view_count': meta.get('views'), | ||||
|         } | ||||
| 
 | ||||
|     def _web_extract(self, video_id, url): | ||||
|     def _web_extract(self, video_id): | ||||
|         self._set_cookie('cda.pl', 'cda.player', 'html5') | ||||
|         webpage = self._download_webpage( | ||||
|         webpage, urlh = self._download_webpage_handle( | ||||
|             f'{self._BASE_URL}/video/{video_id}/vfilm', video_id) | ||||
| 
 | ||||
|         if 'Ten film jest dostępny dla użytkowników premium' in webpage: | ||||
| @@ -209,10 +227,10 @@ class CDAIE(InfoExtractor): | ||||
|             self.raise_geo_restricted() | ||||
| 
 | ||||
|         need_confirm_age = False | ||||
|         if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")', | ||||
|         if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")', | ||||
|                                    webpage, 'birthday validate form', default=None): | ||||
|             webpage = self._download_age_confirm_page( | ||||
|                 url, video_id, note='Confirming age') | ||||
|                 urlh.url, video_id, note='Confirming age') | ||||
|             need_confirm_age = True | ||||
| 
 | ||||
|         formats = [] | ||||
| @@ -222,9 +240,6 @@ class CDAIE(InfoExtractor): | ||||
|             (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*? | ||||
|             <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> | ||||
|         ''', webpage, 'uploader', default=None, group='uploader') | ||||
|         view_count = self._search_regex( | ||||
|             r'Odsłony:(?:\s| )*([0-9]+)', webpage, | ||||
|             'view_count', default=None) | ||||
|         average_rating = self._search_regex( | ||||
|             (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', | ||||
|              r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False, | ||||
| @@ -235,7 +250,6 @@ class CDAIE(InfoExtractor): | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'uploader': uploader, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'average_rating': float_or_none(average_rating), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Podiumnoche
					Podiumnoche