sms-part: new util to split input text string into chunks to fit in PDUs

author: Aleksander Morgado <aleksander@lanedo.com> 2012-09-07 17:00:03 +0200
committer: Aleksander Morgado <aleksander@lanedo.com> 2012-09-14 07:05:22 +0200
commit: e7b094ea3c67209f305b3f8b9cb9eb8ed89d52b5 (patch)
tree: 8527127cb82f0212f8ed44fb1d0f3bf5c7dcfe7c /src
parent: 6b575cece08f026710d4421c11dfba8051036dc4 (diff)
3 files changed, 247 insertions, 0 deletions
diff --git a/src/mm-sms-part.c b/src/mm-sms-part.c
index f37434d7..f8493bb5 100644
--- a/src/mm-sms-part.c
+++ b/src/mm-sms-part.c
@@ -855,3 +855,110 @@ error:
     g_free (pdu);
     return NULL;
 }
+
+gchar **
+mm_sms_part_util_split_text (const gchar *text,
+                             MMSmsEncoding *encoding)
+{
+    guint gsm_unsupported = 0;
+    gchar **out;
+    guint n_chunks;
+    guint i;
+    guint j;
+    gsize in_len;
+
+    if (!text)
+        return NULL;
+
+    in_len = strlen (text);
+
+    /* Some info about the rules for splitting.
+     *
+     * The User Data can be up to 140 bytes in the SMS part:
+     *  0) If we only need one chunk, it can be of up to 140 bytes.
+     *     If we need more than one chunk, these have to be of 140 - 6 = 134
+     *     bytes each, as we need place for the UDH header.
+     *  1) If we're using GSM7 encoding, this gives us up to 160 characters,
+     *     as we can pack 160 characters of 7bits each into 140 bytes.
+     *      160 * 7 = 140 * 8 = 1120.
+     *     If we only have 134 bytes allowed, that would mean that we can pack
+     *     up to 153 input characters:
+     *      134 * 8 = 1072; 1072/7=153.14
+     *  2) If we're using UCS2 encoding, we can pack up to 70 characters in
+     *     140 bytes (each with 2 bytes), or up to 67 characters in 134 bytes.
+     *
+     * This method does the split of the input string into N strings, so that
+     * each of the strings can be placed in a SMS part.
+     */
+
+    /* Check if we can do GSM encoding */
+    mm_charset_get_encoded_len (text,
+                                MM_MODEM_CHARSET_GSM,
+                                &gsm_unsupported);
+    if (gsm_unsupported > 0) {
+        /* If cannot do it in GSM encoding, do it in UCS-2 */
+        GByteArray *array;
+
+        *encoding = MM_SMS_ENCODING_UCS2;
+
+        /* Guess more or less the size of the output array to avoid multiple
+         * allocations */
+        array = g_byte_array_sized_new (in_len * 2);
+        if (!mm_modem_charset_byte_array_append (array,
+                                                 text,
+                                                 FALSE,
+                                                 MM_MODEM_CHARSET_UCS2)) {
+            g_byte_array_unref (array);
+            return NULL;
+        }
+
+        /* Our bytearray has it in UCS-2 now.
+         * UCS-2 is a fixed-size encoding, which means that the text has exactly
+         * 2 bytes for each unicode point. We can now split this array into
+         * chunks of 67 UCS-2 characters (134 bytes).
+         *
+         * Note that UCS-2 covers unicode points between U+0000 and U+FFFF, which
+         * means that there is no direct relationship between the size of the
+         * input text in UTF-8 and the size of the text in UCS-2. A 3-byte UTF-8
+         * encoded character will still be represented with 2 bytes in UCS-2.
+         */
+        if (array->len <= 140) {
+            out = g_new (gchar *, 2);
+            out[0] = g_strdup (text);
+            out[1] = NULL;
+        } else {
+            n_chunks = array->len / 134;
+            if (array->len % 134 != 0)
+                n_chunks++;
+
+            out = g_new0 (gchar *, n_chunks + 1);
+            for (i = 0, j = 0; i < n_chunks; i++, j += 134) {
+                out[i] = sms_decode_text (&array->data[j],
+                                          MIN (array->len - j, 134),
+                                          MM_SMS_ENCODING_UCS2,
+                                          0);
+            }
+        }
+        g_byte_array_unref (array);
+    } else {
+        /* Do it with GSM encoding */
+        *encoding = MM_SMS_ENCODING_GSM7;
+
+        if (in_len <= 160) {
+            out = g_new (gchar *, 2);
+            out[0] = g_strdup (text);
+            out[1] = NULL;
+        } else {
+            n_chunks = in_len / 153;
+            if (in_len % 153 != 0)
+                n_chunks++;
+
+            out = g_new0 (gchar *, n_chunks + 1);
+            for (i = 0, j = 0; i < n_chunks; i++, j += 153) {
+                out[i] = g_strndup (&text[j], 153);
+            }
+        }
+    }
+
+    return out;
+}
diff --git a/src/mm-sms-part.h b/src/mm-sms-part.h
index fe0efb49..27b73914 100644
--- a/src/mm-sms-part.h
+++ b/src/mm-sms-part.h
@@ -115,4 +115,7 @@ guint mm_sms_part_encode_address (const gchar *address,
                                   gsize buflen,
                                   gboolean is_smsc);
 
+gchar **mm_sms_part_util_split_text (const gchar *text,
+                                     MMSmsEncoding *encoding);
+
 #endif /* MM_SMS_PART_H */
diff --git a/src/tests/test-sms-part.c b/src/tests/test-sms-part.c
index e1f43f7d..9ff96fb2 100644
--- a/src/tests/test-sms-part.c
+++ b/src/tests/test-sms-part.c
@@ -669,6 +669,136 @@ test_create_pdu_gsm_no_validity (void)
                             1); /* expected_msgstart */
 }
 
+/********************* TEXT SPLIT TESTS *********************/
+
+static void
+common_test_text_split (const gchar *text,
+                        const gchar **expected,
+                        MMSmsEncoding expected_encoding)
+{
+    gchar **out;
+    MMSmsEncoding out_encoding = MM_SMS_ENCODING_UNKNOWN;
+    guint i;
+
+    out = mm_sms_part_util_split_text (text, &out_encoding);
+
+    g_assert (out != NULL);
+    g_assert (out_encoding != MM_SMS_ENCODING_UNKNOWN);
+
+    g_assert_cmpuint (g_strv_length (out), ==, g_strv_length ((gchar **)expected));
+
+    for (i = 0; out[i]; i++) {
+        g_assert_cmpstr (out[i], ==, expected[i]);
+    }
+}
+
+static void
+test_text_split_short (void)
+{
+    const gchar *text = "Hello";
+    const gchar *expected [] = {
+        "Hello",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_GSM7);
+}
+
+static void
+test_text_split_short_ucs2 (void)
+{
+    const gchar *text = "你好";
+    const gchar *expected [] = {
+        "你好",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2);
+}
+
+static void
+test_text_split_max_single_pdu (void)
+{
+    const gchar *text =
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789";
+    const gchar *expected [] = {
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_GSM7);
+}
+
+static void
+test_text_split_max_single_pdu_ucs2 (void)
+{
+    /* NOTE: This chinese string contains 210 bytes when encoded in
+     * UTF-8! But still, it can be placed into 140 bytes when in UCS-2
+     */
+    const gchar *text =
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你好你好";
+    const gchar *expected [] = {
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你好你好",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2);
+}
+
+static void
+test_text_split_two_pdu (void)
+{
+    const gchar *text =
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "01234567890123456789012345678901234567890";
+    const gchar *expected [] = {
+        /* First chunk */
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "0123456789012345678901234567890123456789"
+        "012345678901234567890123456789012",
+        /* Second chunk */
+        "34567890",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_GSM7);
+}
+
+static void
+test_text_split_two_pdu_ucs2 (void)
+{
+    const gchar *text =
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你好你好好";
+    const gchar *expected [] = {
+        /* First chunk */
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
+        "你好你",
+        /* Second chunk */
+        "好你好好",
+        NULL
+    };
+
+    common_test_text_split (text, expected, MM_SMS_ENCODING_UCS2);
+}
+
+/************************************************************/
+
 void
 _mm_log (const char *loc,
          const char *func,
@@ -709,5 +839,12 @@ int main (int argc, char **argv)
     g_test_add_func ("/MM/SMS/PDU-Creator/GSM-3", test_create_pdu_gsm_3);
     g_test_add_func ("/MM/SMS/PDU-Creator/GSM-no-validity", test_create_pdu_gsm_no_validity);
 
+    g_test_add_func ("/MM/SMS/Text-Split/short", test_text_split_short);
+    g_test_add_func ("/MM/SMS/Text-Split/short-UCS2", test_text_split_short_ucs2);
+    g_test_add_func ("/MM/SMS/Text-Split/max-single-pdu", test_text_split_max_single_pdu);
+    g_test_add_func ("/MM/SMS/Text-Split/max-single-pdu-UCS2", test_text_split_max_single_pdu_ucs2);
+    g_test_add_func ("/MM/SMS/Text-Split/two-pdu", test_text_split_two_pdu);
+    g_test_add_func ("/MM/SMS/Text-Split/two-pdu-UCS2", test_text_split_two_pdu_ucs2);
+
     return g_test_run ();
 }
author	Aleksander Morgado <aleksander@lanedo.com>	2012-09-07 17:00:03 +0200
committer	Aleksander Morgado <aleksander@lanedo.com>	2012-09-14 07:05:22 +0200
commit	e7b094ea3c67209f305b3f8b9cb9eb8ed89d52b5 (patch)
tree	8527127cb82f0212f8ed44fb1d0f3bf5c7dcfe7c /src
parent	6b575cece08f026710d4421c11dfba8051036dc4 (diff)