mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-08-08 05:02:29 +00:00
[utils] Fix and improve find_element
and find_elements
(#11443)
Fix d710a6ca7c
Authored by: bashonly, Grub4K
Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
This commit is contained in:
@@ -20,6 +20,7 @@ from ._utils import (
|
||||
get_elements_html_by_class,
|
||||
get_elements_html_by_attribute,
|
||||
get_elements_by_attribute,
|
||||
get_element_by_class,
|
||||
get_element_html_by_attribute,
|
||||
get_element_by_attribute,
|
||||
get_element_html_by_id,
|
||||
@@ -373,7 +374,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
||||
def find_element(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
@@ -381,14 +382,14 @@ def find_element(*, cls: str, html=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_element(*, id: str, tag: str | None = None, html=False): ...
|
||||
def find_element(*, id: str, tag: str | None = None, html=False, regex=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_element(*, tag: str, html=False): ...
|
||||
def find_element(*, tag: str, html=False, regex=False): ...
|
||||
|
||||
|
||||
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
|
||||
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False, regex=False):
|
||||
# deliberately using `id=` and `cls=` for ease of readability
|
||||
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
|
||||
ANY_TAG = r'[\w:.-]+'
|
||||
@@ -397,17 +398,18 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
|
||||
assert not cls, 'Cannot match both attr and cls'
|
||||
assert not id, 'Cannot match both attr and id'
|
||||
func = get_element_html_by_attribute if html else get_element_by_attribute
|
||||
return functools.partial(func, attr, value, tag=tag or ANY_TAG)
|
||||
return functools.partial(func, attr, value, tag=tag or ANY_TAG, escape_value=not regex)
|
||||
|
||||
elif cls:
|
||||
assert not id, 'Cannot match both cls and id'
|
||||
assert tag is None, 'Cannot match both cls and tag'
|
||||
func = get_element_html_by_class if html else get_elements_by_class
|
||||
assert not regex, 'Cannot use regex with cls'
|
||||
func = get_element_html_by_class if html else get_element_by_class
|
||||
return functools.partial(func, cls)
|
||||
|
||||
elif id:
|
||||
func = get_element_html_by_id if html else get_element_by_id
|
||||
return functools.partial(func, id, tag=tag or ANY_TAG)
|
||||
return functools.partial(func, id, tag=tag or ANY_TAG, escape_value=not regex)
|
||||
|
||||
index = int(bool(html))
|
||||
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
|
||||
@@ -418,19 +420,20 @@ def find_elements(*, cls: str, html=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
||||
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
|
||||
|
||||
|
||||
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
|
||||
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False, regex=False):
|
||||
# deliberately using `cls=` for ease of readability
|
||||
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
|
||||
|
||||
if attr and value:
|
||||
assert not cls, 'Cannot match both attr and cls'
|
||||
func = get_elements_html_by_attribute if html else get_elements_by_attribute
|
||||
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+')
|
||||
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+', escape_value=not regex)
|
||||
|
||||
assert not tag, 'Cannot match both cls and tag'
|
||||
assert not regex, 'Cannot use regex with cls'
|
||||
func = get_elements_html_by_class if html else get_elements_by_class
|
||||
return functools.partial(func, cls)
|
||||
|
||||
|
Reference in New Issue
Block a user