[extractor/wordpress:mb.miniAudioPlayer] Add embed extractor (#5087)

Closes https://github.com/yt-dlp/yt-dlp/issues/4994

Authored by: coletdjnz
This commit is contained in:
Matthew
2022-10-09 18:55:26 +13:00
committed by GitHub
parent 1d55ebabc9
commit 4c9a1a3ba5
4 changed files with 94 additions and 3 deletions

View File

@@ -408,7 +408,7 @@ def get_elements_html_by_attribute(*args, **kwargs):
return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
"""
Return the text (content) and the html (whole) of the tag with the specified
attribute in the passed HTML document
@@ -419,7 +419,7 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value
value = re.escape(value) if escape_value else value
partial_element_re = rf'''(?x)
<(?P<tag>[a-zA-Z0-9:._-]+)
<(?P<tag>{tag})
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
\s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
'''