[extractor/sbs] Overhaul extractor for new API (#6839)

Closes #6543
Authored by: vidiot720, dirkf, bashonly
This commit is contained in:
vidiot720
2023-04-19 09:46:57 +10:00
committed by GitHub
parent ab29e47029
commit 6a765f135c
2 changed files with 86 additions and 27 deletions

View File

@@ -4093,6 +4093,10 @@ def dfxp2srt(dfxp_data):
def close(self):
return self._out.strip()
# Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
# This will not trigger false positives since only UTF-8 text is being replaced
dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
def parse_node(node):
target = TTMLPElementParser()
parser = xml.etree.ElementTree.XMLParser(target=target)