diff --git a/client/src/telethon/_impl/client/parsers/html.py b/client/src/telethon/_impl/client/parsers/html.py index 5fbaddd7..aa5ae110 100644 --- a/client/src/telethon/_impl/client/parsers/html.py +++ b/client/src/telethon/_impl/client/parsers/html.py @@ -30,7 +30,7 @@ from ...tl.types import ( MessageEntityUnderline, MessageEntityUrl, ) -from .strings import add_surrogate, del_surrogate, strip_text, within_surrogate +from .strings import add_surrogate, del_surrogate, within_surrogate class HTMLToTelegramParser(HTMLParser): @@ -141,8 +141,7 @@ def parse(html: str) -> Tuple[str, List[MessageEntity]]: parser = HTMLToTelegramParser() parser.feed(add_surrogate(html)) - text = strip_text(parser.text, parser.entities) - return del_surrogate(text), parser.entities + return del_surrogate(parser.text), parser.entities ENTITY_TO_FORMATTER: Dict[ diff --git a/client/src/telethon/_impl/client/parsers/strings.py b/client/src/telethon/_impl/client/parsers/strings.py index b70f8dfa..0fd69059 100644 --- a/client/src/telethon/_impl/client/parsers/strings.py +++ b/client/src/telethon/_impl/client/parsers/strings.py @@ -1,7 +1,5 @@ import struct -from typing import List, Optional - -from ...tl.abcs import MessageEntity +from typing import Optional def add_surrogate(text: str) -> str: @@ -33,51 +31,3 @@ def within_surrogate(text: str, index: int, *, length: Optional[int] = None) -> and "\ud800" <= text[index - 1] <= "\udfff" # previous is and "\ud800" <= text[index] <= "\udfff" # current is ) - - -def strip_text(text: str, entities: List[MessageEntity]) -> str: - """ - Strips whitespace from the given text modifying the provided entities. - - This assumes that there are no overlapping entities, that their length - is greater or equal to one, and that their length is not out of bounds. - """ - if not entities: - return text.strip() - - assert all(isinstance(getattr(e, "offset"), int) for e in entities) - - while text and text[-1].isspace(): - e = entities[-1] - offset, length = getattr(e, "offset", None), getattr(e, "length", None) - assert isinstance(offset, int) and isinstance(length, int) - - if offset + length == len(text): - if length == 1: - del entities[-1] - if not entities: - return text.strip() - else: - length -= 1 - text = text[:-1] - - while text and text[0].isspace(): - for i in reversed(range(len(entities))): - e = entities[i] - offset, length = getattr(e, "offset", None), getattr(e, "length", None) - assert isinstance(offset, int) and isinstance(length, int) - - if offset != 0: - setattr(e, "offset", offset - 1) - continue - - if length == 1: - del entities[0] - if not entities: - return text.lstrip() - else: - setattr(e, "length", length - 1) - - text = text[1:] - - return text