Locally strip outgoing message text respecting entities

This commit is contained in:
Lonami Exo
2018-11-19 10:15:56 +01:00
parent d854babf22
commit aaee092a46
3 changed files with 42 additions and 4 deletions

View File

@@ -6,11 +6,12 @@ from collections import deque
from html import escape, unescape
from html.parser import HTMLParser
from .. import helpers
from ..tl.types import (
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
MessageEntityTextUrl, MessageEntityMentionName
)
)
# Helpers from markdown.py
@@ -126,7 +127,8 @@ def parse(html):
parser = HTMLToTelegramParser()
parser.feed(_add_surrogate(html))
return _del_surrogate(parser.text), parser.entities
text = helpers.strip_text(parser.text, parser.entities)
return _del_surrogate(text), parser.entities
def unparse(text, entities):

View File

@@ -5,12 +5,12 @@ since they seem to count as two characters and it's a bit strange.
"""
import re
from ..helpers import add_surrogate, del_surrogate
from ..helpers import add_surrogate, del_surrogate, strip_text
from ..tl import TLObject
from ..tl.types import (
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
MessageEntityPre, MessageEntityTextUrl, MessageEntityMentionName
)
)
DEFAULT_DELIMITERS = {
'**': MessageEntityBold,
@@ -125,6 +125,7 @@ def parse(message, delimiters=None, url_re=None):
+ message[current.offset:]
)
message = strip_text(message, result)
return del_surrogate(message), result