diff --git a/telethon/extensions/html.py b/telethon/extensions/html.py index 1ea81029..99f10dee 100644 --- a/telethon/extensions/html.py +++ b/telethon/extensions/html.py @@ -5,13 +5,15 @@ import struct from collections import deque from html import escape, unescape from html.parser import HTMLParser +from typing import Iterable, Optional, Tuple, List from .. import helpers from ..tl.types import ( MessageEntityBold, MessageEntityItalic, MessageEntityCode, MessageEntityPre, MessageEntityEmail, MessageEntityUrl, MessageEntityTextUrl, MessageEntityMentionName, - MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote + MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote, + TypeMessageEntity ) @@ -121,7 +123,7 @@ class HTMLToTelegramParser(HTMLParser): self.entities.append(entity) -def parse(html): +def parse(html: str) -> Tuple[str, List[TypeMessageEntity]]: """ Parses the given HTML message and returns its stripped representation plus a list of the MessageEntity's that were found. @@ -138,7 +140,8 @@ def parse(html): return _del_surrogate(text), parser.entities -def unparse(text, entities): +def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0, + _length: Optional[int] = None) -> str: """ Performs the reverse operation to .parse(), effectively returning HTML given a normal text and its MessageEntity's. @@ -147,20 +150,29 @@ def unparse(text, entities): :param entities: the MessageEntity's applied to the text. :return: a HTML representation of the combination of both inputs. """ - if not text or not entities: + if not text: return text + elif not entities: + return escape(text) text = _add_surrogate(text) + if _length is None: + _length = len(text) html = [] last_offset = 0 - for entity in entities: - if entity.offset > last_offset: - html.append(escape(text[last_offset:entity.offset])) - elif entity.offset < last_offset: + for i, entity in enumerate(entities): + if entity.offset > _offset + _length: + break + relative_offset = entity.offset - _offset + if relative_offset > last_offset: + html.append(escape(text[last_offset:relative_offset])) + elif relative_offset < last_offset: continue skip_entity = False - entity_text = escape(text[entity.offset:entity.offset + entity.length]) + entity_text = unparse(text=text[relative_offset:relative_offset + entity.length], + entities=entities[i + 1:], + _offset=entity.offset, _length=entity.length) entity_type = type(entity) if entity_type == MessageEntityBold: @@ -198,6 +210,6 @@ def unparse(text, entities): .format(entity.user_id, entity_text)) else: skip_entity = True - last_offset = entity.offset + (0 if skip_entity else entity.length) - html.append(text[last_offset:]) + last_offset = relative_offset + (0 if skip_entity else entity.length) + html.append(escape(text[last_offset:])) return _del_surrogate(''.join(html))