Fix unparsing text with malformed message entities

This commit is contained in:
Lonami Exo
2019-12-19 15:48:27 +01:00
parent ccbc1c669c
commit f3111f93b2
2 changed files with 27 additions and 3 deletions

View File

@@ -184,6 +184,14 @@ def unparse(text, entities, delimiters=None, url_fmt=None):
insert_at.sort(key=lambda t: t[0])
while insert_at:
at, what = insert_at.pop()
# If we are in the middle of a surrogate nudge the position by +1.
# Otherwise we would end up with malformed text and fail to encode.
# For example of bad input: "Hi \ud83d\ude1c"
# https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
if '\ud800' <= text[at] <= '\udfff':
at += 1
text = text[:at] + what + text[at:]
return del_surrogate(text)