sms-part-3gpp: allow sending UTF-16 as if it were UCS-2

Despite 3GPP TS 23.038 specifies that Unicode SMS messages are encoded in UCS-2, UTF-16 encoding is commonly used instead on many modern platforms to allow encoding code points that fall outside the Basic Multilingual Plane (BMP), such as Emoji. Update the logic to always use UTF-16 instead of UCS-2 when creating or parsing PDUs (even if we always report as sending or receiving UCS-2). For all purposes, UCS-2 is considered a subset of UTF-16 (assuming that code points out of the [U+0000,U+D7FF] and [U+E000,U+FFFF] ranges are not applicable in UCS-2). Fixes https://gitlab.freedesktop.org/mobile-broadband/ModemManager/-/issues/250
author: Aleksander Morgado <aleksander@aleksander.es> 2020-08-20 12:18:05 +0200
committer: Aleksander Morgado <aleksander@aleksander.es> 2020-08-20 18:15:37 +0200
commit: 599f545c0d905505516c6546ff77caced2aa14f1 (patch)
tree: a06b77c6dce87ec3aca97ea3c88037c6e7653d67 /src/tests/test-sms-part-3gpp.c
parent: 81162df15dc9a409d0979ff8d472a026f31ed883 (diff)
1 files changed, 66 insertions, 12 deletions
diff --git a/src/tests/test-sms-part-3gpp.c b/src/tests/test-sms-part-3gpp.c
index c3d59d87..db6aa7a0 100644
--- a/src/tests/test-sms-part-3gpp.c
+++ b/src/tests/test-sms-part-3gpp.c
@@ -553,8 +553,7 @@ common_test_create_pdu (const gchar *smsc,
 
     g_assert_no_error (error);
     g_assert (pdu != NULL);
-    g_assert_cmpuint (len, ==, expected_size);
-    g_assert_cmpint (memcmp (pdu, expected, len), ==, 0);
+    g_assert_cmpmem (pdu, len, expected, expected_size);
     g_assert_cmpint (msgstart, ==, expected_msgstart);
 
     g_free (pdu);
@@ -735,7 +734,7 @@ common_test_text_split (const gchar *text,
 }
 
 static void
-test_text_split_short (void)
+test_text_split_short_gsm7 (void)
 {
     const gchar *text = "Hello";
     const gchar *expected [] = {
@@ -749,7 +748,7 @@ test_text_split_short (void)
 static void
 test_text_split_short_ucs2 (void)
 {
-    const gchar *text = "你好";
+    const gchar *text = "你好"; /* (UTF-8) e4 bd a0 e5 a5 bd */
     const gchar *expected [] = {
         "你好",
         NULL
@@ -759,7 +758,19 @@ test_text_split_short_ucs2 (void)
 }
 
 static void
-test_text_split_max_single_pdu (void)
+test_text_split_short_utf16 (void)
+{
+    const gchar *text = "😉"; /* U+1F609, winking face */
+    const gchar *expected [] = {
+        "😉",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2);
+}
+
+static void
+test_text_split_max_single_pdu_gsm7 (void)
 {
     const gchar *text =
         "0123456789012345678901234567890123456789"
@@ -798,7 +809,23 @@ test_text_split_max_single_pdu_ucs2 (void)
 }
 
 static void
-test_text_split_two_pdu (void)
+test_text_split_max_single_pdu_utf16 (void)
+{
+    /* NOTE: this string contains 35 Bhaiksuki characters, each of
+     * them requiring 4 bytes both in UTF-8 and in UTF-16 (140 bytes
+     * in total). */
+    const gchar *text =
+        "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡𑰢𑰣";
+    const gchar *expected [] = {
+        "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡𑰢𑰣",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2);
+}
+
+static void
+test_text_split_two_pdu_gsm7 (void)
 {
     const gchar *text =
         "0123456789012345678901234567890123456789"
@@ -839,6 +866,30 @@ test_text_split_two_pdu_ucs2 (void)
     common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2);
 }
 
+static void
+test_text_split_two_pdu_utf16 (void)
+{
+    /* NOTE: this string contains 35 Bhaiksuki characters, each of
+     * them requiring 4 bytes both in UTF-8 and in UTF-16 (140 bytes
+     * in total) plus one ASCII char (encoded with 1 byte in UTF-8 and
+     * 2 bytes in UTF-16), making it a total of 142 bytes when in
+     * UTF-16 (so not fitting in one single PDU)
+     *
+     * When split in chunks, the last chunk will hold 2 Bhaiksuki
+     * characters plus the last ASCII one (9 bytes in UTF-16) so that
+     * the first chunk contains the leading 33 Bhaiksuki characters
+     * (132 characters, less than 134) */
+    const gchar *text =
+        "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡𑰢𑰣a";
+    const gchar *expected [] = {
+        "𑰀𑰁𑰂𑰃𑰄𑰅𑰆𑰇𑰈𑰊𑰋𑰌𑰍𑰎𑰏𑰐𑰑𑰒𑰓𑰔𑰕𑰖𑰗𑰘𑰙𑰚𑰛𑰜𑰝𑰞𑰟𑰠𑰡",
+        "𑰢𑰣a",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2);
+}
+
 /************************************************************/
 
 int main (int argc, char **argv)
@@ -874,12 +925,15 @@ int main (int argc, char **argv)
     g_test_add_func ("/MM/SMS/3GPP/PDU-Creator/GSM-3", test_create_pdu_gsm_3);
     g_test_add_func ("/MM/SMS/3GPP/PDU-Creator/GSM-no-validity", test_create_pdu_gsm_no_validity);
 
-    g_test_add_func ("/MM/SMS/3GPP/Text-Split/short", test_text_split_short);
-    g_test_add_func ("/MM/SMS/3GPP/Text-Split/short-UCS2", test_text_split_short_ucs2);
-    g_test_add_func ("/MM/SMS/3GPP/Text-Split/max-single-pdu", test_text_split_max_single_pdu);
-    g_test_add_func ("/MM/SMS/3GPP/Text-Split/max-single-pdu-UCS2", test_text_split_max_single_pdu_ucs2);
-    g_test_add_func ("/MM/SMS/3GPP/Text-Split/two-pdu", test_text_split_two_pdu);
-    g_test_add_func ("/MM/SMS/3GPP/Text-Split/two-pdu-UCS2", test_text_split_two_pdu_ucs2);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/gsm7/short",           test_text_split_short_gsm7);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/ucs2/short",           test_text_split_short_ucs2);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/utf16/short",          test_text_split_short_utf16);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/gsm7/max-single-pdu",  test_text_split_max_single_pdu_gsm7);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/ucs2/max-single-pdu",  test_text_split_max_single_pdu_ucs2);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/utf16/max-single-pdu", test_text_split_max_single_pdu_utf16);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/gsm7/two-pdu",         test_text_split_two_pdu_gsm7);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/ucs2/two-pdu",         test_text_split_two_pdu_ucs2);
+    g_test_add_func ("/MM/SMS/3GPP/Text-Split/utf16/two-pdu",        test_text_split_two_pdu_utf16);
 
     return g_test_run ();
 }
author	Aleksander Morgado <aleksander@aleksander.es>	2020-08-20 12:18:05 +0200
committer	Aleksander Morgado <aleksander@aleksander.es>	2020-08-20 18:15:37 +0200
commit	599f545c0d905505516c6546ff77caced2aa14f1 (patch)
tree	a06b77c6dce87ec3aca97ea3c88037c6e7653d67 /src/tests/test-sms-part-3gpp.c
parent	81162df15dc9a409d0979ff8d472a026f31ed883 (diff)