diff options
author | Aleksander Morgado <aleksander@aleksander.es> | 2020-08-20 12:18:05 +0200 |
---|---|---|
committer | Aleksander Morgado <aleksander@aleksander.es> | 2020-08-20 18:15:37 +0200 |
commit | 599f545c0d905505516c6546ff77caced2aa14f1 (patch) | |
tree | a06b77c6dce87ec3aca97ea3c88037c6e7653d67 /src/mm-sms-part.h | |
parent | 81162df15dc9a409d0979ff8d472a026f31ed883 (diff) |
sms-part-3gpp: allow sending UTF-16 as if it were UCS-2
Despite 3GPP TS 23.038 specifies that Unicode SMS messages are
encoded in UCS-2, UTF-16 encoding is commonly used instead on many
modern platforms to allow encoding code points that fall outside the
Basic Multilingual Plane (BMP), such as Emoji.
Update the logic to always use UTF-16 instead of UCS-2 when creating
or parsing PDUs (even if we always report as sending or receiving
UCS-2). For all purposes, UCS-2 is considered a subset of UTF-16
(assuming that code points out of the [U+0000,U+D7FF] and
[U+E000,U+FFFF] ranges are not applicable in UCS-2).
Fixes https://gitlab.freedesktop.org/mobile-broadband/ModemManager/-/issues/250
Diffstat (limited to 'src/mm-sms-part.h')
-rw-r--r-- | src/mm-sms-part.h | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/src/mm-sms-part.h b/src/mm-sms-part.h index 92f39b11..2ee7f308 100644 --- a/src/mm-sms-part.h +++ b/src/mm-sms-part.h @@ -20,11 +20,27 @@ #include <glib.h> #include <ModemManager.h> +/* Despite 3GPP TS 23.038 specifies that Unicode SMS messages are + * encoded in UCS-2, UTF-16 encoding is commonly used instead on many + * modern platforms to allow encoding code points that fall outside the + * Basic Multilingual Plane (BMP), such as Emoji. Most of the UCS-2 + * code points are identical to their equivalent UTF-16 code points. + * In UTF-16, non-BMP code points are encoded in a pair of surrogate + * code points (i.e. a high surrogate in 0xD800..0xDBFF, followed by a + * low surrogate in 0xDC00..0xDFFF). An isolated surrogate code point + * has no general interpretation in UTF-16, but could be a valid + * (though unmapped) code point in UCS-2. + * + * The current implementation in ModemManager just assumes that whenever + * possible (i.e. when parsing received PDUs or when creating submit + * PDUs) UTF-16 will be used instead of plain UCS-2 (even if the PDUs + * report the encoding as UCS-2). + */ typedef enum { /*< underscore_name=mm_sms_encoding >*/ MM_SMS_ENCODING_UNKNOWN = 0x0, MM_SMS_ENCODING_GSM7, MM_SMS_ENCODING_8BIT, - MM_SMS_ENCODING_UCS2 + MM_SMS_ENCODING_UCS2, } MMSmsEncoding; typedef struct _MMSmsPart MMSmsPart; |