diff options
author | Aleksander Morgado <aleksander@aleksander.es> | 2020-08-20 12:18:05 +0200 |
---|---|---|
committer | Aleksander Morgado <aleksander@aleksander.es> | 2020-08-20 18:15:37 +0200 |
commit | 599f545c0d905505516c6546ff77caced2aa14f1 (patch) | |
tree | a06b77c6dce87ec3aca97ea3c88037c6e7653d67 /src/tests/test-sms-part-3gpp.c | |
parent | 81162df15dc9a409d0979ff8d472a026f31ed883 (diff) |
sms-part-3gpp: allow sending UTF-16 as if it were UCS-2
Despite 3GPP TS 23.038 specifies that Unicode SMS messages are
encoded in UCS-2, UTF-16 encoding is commonly used instead on many
modern platforms to allow encoding code points that fall outside the
Basic Multilingual Plane (BMP), such as Emoji.
Update the logic to always use UTF-16 instead of UCS-2 when creating
or parsing PDUs (even if we always report as sending or receiving
UCS-2). For all purposes, UCS-2 is considered a subset of UTF-16
(assuming that code points out of the [U+0000,U+D7FF] and
[U+E000,U+FFFF] ranges are not applicable in UCS-2).
Fixes https://gitlab.freedesktop.org/mobile-broadband/ModemManager/-/issues/250
Diffstat (limited to 'src/tests/test-sms-part-3gpp.c')
-rw-r--r-- | src/tests/test-sms-part-3gpp.c | 78 |
1 files changed, 66 insertions, 12 deletions
diff --git a/src/tests/test-sms-part-3gpp.c b/src/tests/test-sms-part-3gpp.c index c3d59d87..db6aa7a0 100644 --- a/src/tests/test-sms-part-3gpp.c +++ b/src/tests/test-sms-part-3gpp.c @@ -553,8 +553,7 @@ common_test_create_pdu (const gchar *smsc, g_assert_no_error (error); g_assert (pdu != NULL); - g_assert_cmpuint (len, ==, expected_size); - g_assert_cmpint (memcmp (pdu, expected, len), ==, 0); + g_assert_cmpmem (pdu, len, expected, expected_size); g_assert_cmpint (msgstart, ==, expected_msgstart); g_free (pdu); @@ -735,7 +734,7 @@ common_test_text_split (const gchar *text, } static void -test_text_split_short (void) +test_text_split_short_gsm7 (void) { const gchar *text = "Hello"; const gchar *expected [] = { @@ -749,7 +748,7 @@ test_text_split_short (void) static void test_text_split_short_ucs2 (void) { - const gchar *text = "你好"; + const gchar *text = "你好"; /* (UTF-8) e4 bd a0 e5 a5 bd */ const gchar *expected [] = { "你好", NULL @@ -759,7 +758,19 @@ test_text_split_short_ucs2 (void) } static void -test_text_split_max_single_pdu (void) +test_text_split_short_utf16 (void) +{ + const gchar *text = "😉"; /* U+1F609, winking face */ + const gchar *expected [] = { + "😉", + NULL + }; + + common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2); +} + +static void +test_text_split_max_single_pdu_gsm7 (void) { const gchar *text = "0123456789012345678901234567890123456789" @@ -798,7 +809,23 @@ test_text_split_max_single_pdu_ucs2 (void) } static void -test_text_split_two_pdu (void) +test_text_split_max_single_pdu_utf16 (void) +{ + /* NOTE: this string contains 35 Bhaiksuki characters, each of + * them requiring 4 bytes both in UTF-8 and in UTF-16 (140 bytes + * in total). */ + const gchar *text = + "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡𑰢𑰣"; + const gchar *expected [] = { + "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡𑰢𑰣", + NULL + }; + + common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2); +} + +static void +test_text_split_two_pdu_gsm7 (void) { const gchar *text = "0123456789012345678901234567890123456789" @@ -839,6 +866,30 @@ test_text_split_two_pdu_ucs2 (void) common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2); } +static void +test_text_split_two_pdu_utf16 (void) +{ + /* NOTE: this string contains 35 Bhaiksuki characters, each of + * them requiring 4 bytes both in UTF-8 and in UTF-16 (140 bytes + * in total) plus one ASCII char (encoded with 1 byte in UTF-8 and + * 2 bytes in UTF-16), making it a total of 142 bytes when in + * UTF-16 (so not fitting in one single PDU) + * + * When split in chunks, the last chunk will hold 2 Bhaiksuki + * characters plus the last ASCII one (9 bytes in UTF-16) so that + * the first chunk contains the leading 33 Bhaiksuki characters + * (132 characters, less than 134) */ + const gchar *text = + "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡𑰢𑰣a"; + const gchar *expected [] = { + "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡", + "𑰢𑰣a", + NULL + }; + + common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2); +} + /************************************************************/ int main (int argc, char **argv) @@ -874,12 +925,15 @@ int main (int argc, char **argv) g_test_add_func ("/MM/SMS/3GPP/PDU-Creator/GSM-3", test_create_pdu_gsm_3); g_test_add_func ("/MM/SMS/3GPP/PDU-Creator/GSM-no-validity", test_create_pdu_gsm_no_validity); - g_test_add_func ("/MM/SMS/3GPP/Text-Split/short", test_text_split_short); - g_test_add_func ("/MM/SMS/3GPP/Text-Split/short-UCS2", test_text_split_short_ucs2); - g_test_add_func ("/MM/SMS/3GPP/Text-Split/max-single-pdu", test_text_split_max_single_pdu); - g_test_add_func ("/MM/SMS/3GPP/Text-Split/max-single-pdu-UCS2", test_text_split_max_single_pdu_ucs2); - g_test_add_func ("/MM/SMS/3GPP/Text-Split/two-pdu", test_text_split_two_pdu); - g_test_add_func ("/MM/SMS/3GPP/Text-Split/two-pdu-UCS2", test_text_split_two_pdu_ucs2); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/gsm7/short", test_text_split_short_gsm7); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/ucs2/short", test_text_split_short_ucs2); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/utf16/short", test_text_split_short_utf16); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/gsm7/max-single-pdu", test_text_split_max_single_pdu_gsm7); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/ucs2/max-single-pdu", test_text_split_max_single_pdu_ucs2); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/utf16/max-single-pdu", test_text_split_max_single_pdu_utf16); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/gsm7/two-pdu", test_text_split_two_pdu_gsm7); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/ucs2/two-pdu", test_text_split_two_pdu_ucs2); + g_test_add_func ("/MM/SMS/3GPP/Text-Split/utf16/two-pdu", test_text_split_two_pdu_utf16); return g_test_run (); } |