[RaiplayRadio] Add extractors (#780)

Original PR: https://github.com/ytdl-org/youtube-dl/pull/21837
Authored by: frafra
This commit is contained in:
Francesco Frassinelli
2021-11-05 17:54:56 +01:00
committed by GitHub
parent aeb2a9ad27
commit 73673ccff3
3 changed files with 112 additions and 0 deletions

View File

@@ -2006,6 +2006,23 @@ class HTMLAttributeParser(compat_HTMLParser):
self.attrs = dict(attrs)
class HTMLListAttrsParser(compat_HTMLParser):
"""HTML parser to gather the attributes for the elements of a list"""
def __init__(self):
compat_HTMLParser.__init__(self)
self.items = []
self._level = 0
def handle_starttag(self, tag, attrs):
if tag == 'li' and self._level == 0:
self.items.append(dict(attrs))
self._level += 1
def handle_endtag(self, tag):
self._level -= 1
def extract_attributes(html_element):
"""Given a string for an HTML element such as
<el
@@ -2032,6 +2049,15 @@ def extract_attributes(html_element):
return parser.attrs
def parse_list(webpage):
"""Given a string for an series of HTML <li> elements,
return a dictionary of their attributes"""
parser = HTMLListAttrsParser()
parser.feed(webpage)
parser.close()
return parser.items
def clean_html(html):
"""Clean an HTML snippet into a readable string"""