mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2025-06-19 19:46:41 +00:00
Fix UnicodeDecodeError with malformed input on unparse text
This commit is contained in:
parent
619e4dc2d6
commit
229969192a
@ -169,6 +169,28 @@ def unparse(text, entities, delimiters=None, url_fmt=None):
|
|||||||
elif isinstance(entity, MessageEntityMentionName):
|
elif isinstance(entity, MessageEntityMentionName):
|
||||||
url = 'tg://user?id={}'.format(entity.user_id)
|
url = 'tg://user?id={}'.format(entity.user_id)
|
||||||
if url:
|
if url:
|
||||||
|
# It's possible that entities are malformed and end up in the
|
||||||
|
# middle of some character, like emoji, by using malformed
|
||||||
|
# clients or bots. Try decoding the current one to check if
|
||||||
|
# this is the case, and if it is, advance the entity.
|
||||||
|
while e <= len(text):
|
||||||
|
try:
|
||||||
|
del_surrogate(text[s:e])
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
e += 1
|
||||||
|
else:
|
||||||
|
# Out of bounds, no luck going forward
|
||||||
|
while e > s:
|
||||||
|
try:
|
||||||
|
del_surrogate(text[s:e])
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
e -= 1
|
||||||
|
else:
|
||||||
|
# No luck going backwards either, ignore entity
|
||||||
|
continue
|
||||||
|
|
||||||
text = (
|
text = (
|
||||||
text[:s] +
|
text[:s] +
|
||||||
add_surrogate(url_fmt.format(text[s:e], url)) +
|
add_surrogate(url_fmt.format(text[s:e], url)) +
|
||||||
|
Loading…
Reference in New Issue
Block a user