mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2025-06-17 02:26:40 +00:00
Add support for unparsing nested entities into HTML (#1209)
This commit is contained in:
parent
962949008f
commit
8b28f4ffbf
@ -5,13 +5,15 @@ import struct
|
|||||||
from collections import deque
|
from collections import deque
|
||||||
from html import escape, unescape
|
from html import escape, unescape
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
from typing import Iterable, Optional, Tuple, List
|
||||||
|
|
||||||
from .. import helpers
|
from .. import helpers
|
||||||
from ..tl.types import (
|
from ..tl.types import (
|
||||||
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
||||||
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
|
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
|
||||||
MessageEntityTextUrl, MessageEntityMentionName,
|
MessageEntityTextUrl, MessageEntityMentionName,
|
||||||
MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote
|
MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote,
|
||||||
|
TypeMessageEntity
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -121,7 +123,7 @@ class HTMLToTelegramParser(HTMLParser):
|
|||||||
self.entities.append(entity)
|
self.entities.append(entity)
|
||||||
|
|
||||||
|
|
||||||
def parse(html):
|
def parse(html: str) -> Tuple[str, List[TypeMessageEntity]]:
|
||||||
"""
|
"""
|
||||||
Parses the given HTML message and returns its stripped representation
|
Parses the given HTML message and returns its stripped representation
|
||||||
plus a list of the MessageEntity's that were found.
|
plus a list of the MessageEntity's that were found.
|
||||||
@ -138,7 +140,8 @@ def parse(html):
|
|||||||
return _del_surrogate(text), parser.entities
|
return _del_surrogate(text), parser.entities
|
||||||
|
|
||||||
|
|
||||||
def unparse(text, entities):
|
def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0,
|
||||||
|
_length: Optional[int] = None) -> str:
|
||||||
"""
|
"""
|
||||||
Performs the reverse operation to .parse(), effectively returning HTML
|
Performs the reverse operation to .parse(), effectively returning HTML
|
||||||
given a normal text and its MessageEntity's.
|
given a normal text and its MessageEntity's.
|
||||||
@ -147,20 +150,29 @@ def unparse(text, entities):
|
|||||||
:param entities: the MessageEntity's applied to the text.
|
:param entities: the MessageEntity's applied to the text.
|
||||||
:return: a HTML representation of the combination of both inputs.
|
:return: a HTML representation of the combination of both inputs.
|
||||||
"""
|
"""
|
||||||
if not text or not entities:
|
if not text:
|
||||||
return text
|
return text
|
||||||
|
elif not entities:
|
||||||
|
return escape(text)
|
||||||
|
|
||||||
text = _add_surrogate(text)
|
text = _add_surrogate(text)
|
||||||
|
if _length is None:
|
||||||
|
_length = len(text)
|
||||||
html = []
|
html = []
|
||||||
last_offset = 0
|
last_offset = 0
|
||||||
for entity in entities:
|
for i, entity in enumerate(entities):
|
||||||
if entity.offset > last_offset:
|
if entity.offset > _offset + _length:
|
||||||
html.append(escape(text[last_offset:entity.offset]))
|
break
|
||||||
elif entity.offset < last_offset:
|
relative_offset = entity.offset - _offset
|
||||||
|
if relative_offset > last_offset:
|
||||||
|
html.append(escape(text[last_offset:relative_offset]))
|
||||||
|
elif relative_offset < last_offset:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
skip_entity = False
|
skip_entity = False
|
||||||
entity_text = escape(text[entity.offset:entity.offset + entity.length])
|
entity_text = unparse(text=text[relative_offset:relative_offset + entity.length],
|
||||||
|
entities=entities[i + 1:],
|
||||||
|
_offset=entity.offset, _length=entity.length)
|
||||||
entity_type = type(entity)
|
entity_type = type(entity)
|
||||||
|
|
||||||
if entity_type == MessageEntityBold:
|
if entity_type == MessageEntityBold:
|
||||||
@ -198,6 +210,6 @@ def unparse(text, entities):
|
|||||||
.format(entity.user_id, entity_text))
|
.format(entity.user_id, entity_text))
|
||||||
else:
|
else:
|
||||||
skip_entity = True
|
skip_entity = True
|
||||||
last_offset = entity.offset + (0 if skip_entity else entity.length)
|
last_offset = relative_offset + (0 if skip_entity else entity.length)
|
||||||
html.append(text[last_offset:])
|
html.append(escape(text[last_offset:]))
|
||||||
return _del_surrogate(''.join(html))
|
return _del_surrogate(''.join(html))
|
||||||
|
Loading…
Reference in New Issue
Block a user