[utils] Fix and improve find_element and find_elements (#11443)

Fix d710a6ca7c

Authored by: bashonly, Grub4K

Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
This commit is contained in:
bashonly
2024-11-03 18:19:45 +00:00
committed by GitHub
parent 5c7a5aaab2
commit b103aca24d
2 changed files with 67 additions and 10 deletions

View File

@@ -20,6 +20,7 @@ from ._utils import (
get_elements_html_by_class,
get_elements_html_by_attribute,
get_elements_by_attribute,
get_element_by_class,
get_element_html_by_attribute,
get_element_by_attribute,
get_element_html_by_id,
@@ -373,7 +374,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
@typing.overload
def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ...
def find_element(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
@typing.overload
@@ -381,14 +382,14 @@ def find_element(*, cls: str, html=False): ...
@typing.overload
def find_element(*, id: str, tag: str | None = None, html=False): ...
def find_element(*, id: str, tag: str | None = None, html=False, regex=False): ...
@typing.overload
def find_element(*, tag: str, html=False): ...
def find_element(*, tag: str, html=False, regex=False): ...
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False, regex=False):
# deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
ANY_TAG = r'[\w:.-]+'
@@ -397,17 +398,18 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute
return functools.partial(func, attr, value, tag=tag or ANY_TAG)
return functools.partial(func, attr, value, tag=tag or ANY_TAG, escape_value=not regex)
elif cls:
assert not id, 'Cannot match both cls and id'
assert tag is None, 'Cannot match both cls and tag'
func = get_element_html_by_class if html else get_elements_by_class
assert not regex, 'Cannot use regex with cls'
func = get_element_html_by_class if html else get_element_by_class
return functools.partial(func, cls)
elif id:
func = get_element_html_by_id if html else get_element_by_id
return functools.partial(func, id, tag=tag or ANY_TAG)
return functools.partial(func, id, tag=tag or ANY_TAG, escape_value=not regex)
index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
@@ -418,19 +420,20 @@ def find_elements(*, cls: str, html=False): ...
@typing.overload
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ...
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False, regex=False):
# deliberately using `cls=` for ease of readability
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
if attr and value:
assert not cls, 'Cannot match both attr and cls'
func = get_elements_html_by_attribute if html else get_elements_by_attribute
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+')
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+', escape_value=not regex)
assert not tag, 'Cannot match both cls and tag'
assert not regex, 'Cannot use regex with cls'
func = get_elements_html_by_class if html else get_elements_by_class
return functools.partial(func, cls)