sms: fix spliting messages into chunks in gsm7 encoding

1) Not every allowed GSM7 character in UTF-8 incoding takes one byte. Some (for example, 'à') take several bytes in input string, but signle byte in GSM7. 2) Extended characters in GSM7 encoding take two bytes. Otherwise for example sending following SMS fails: ``` mmcli -m a --messaging-create-sms="text='[wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',number='+XXXXXXXXXXX'" Successfully created new SMS: /org/freedesktop/ModemManager1/SMS/99 mmcli --send -s 99 error: couldn't send the SMS: 'GDBus.Error:org.freedesktop.libqmi.Error.Protocol.WmsEncoding: Couldn't write SMS part: QMI protocol error (58): 'WmsEncoding'' ``` ``` mmcli -m a --messaging-create-sms="text='|àààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààà',number='+XXXXXXXXXXX'" Successfully created new SMS: /org/freedesktop/ModemManager1/SMS/72 mmcli --send -s 72 error: couldn't send the SMS: 'GDBus.Error:org.freedesktop.ModemManager1.Error.Core.InvalidArgs: Couldn't convert UTF-8 to GSM: input UTF-8 validation failed' ```
author: Andrey Skvortsov <andrej.skvortzov@gmail.com> 2022-08-30 01:38:03 +0300
committer: Aleksander Morgado <aleksander@aleksander.es> 2022-09-13 20:49:01 +0000
commit: 4ebf8ca4e0577e47095969b71b10834ecb37802d (patch)
tree: e70f2508c70529d64920cf5d0f6c3ac8f5e9b85b /src/mm-charsets.c
parent: 2ece78c80f09c919cbbbc825fab2fc63f864147d (diff)
1 files changed, 61 insertions, 15 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 1bc6e87e..0cbad337 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -281,6 +281,18 @@ utf8_to_gsm_ext_char (const gchar *utf8,
     return FALSE;
 }
 
+static guint8
+utf8_to_gsm_char (const gchar *utf8,
+                  guint32      len,
+                  guint8      *out_gsm)
+{
+    if (utf8_to_gsm_def_char (utf8, len, out_gsm))
+        return 1;
+    if (utf8_to_gsm_ext_char (utf8, len, out_gsm))
+        return 2;
+    return 0;
+}
+
 static guint8 *
 charset_gsm_unpacked_to_utf8 (const guint8  *gsm,
                               guint32        len,
@@ -980,29 +992,63 @@ util_split_text_gsm7 (const gchar *text,
                       gsize        text_len,
                       gpointer     log_object)
 {
-    gchar **out;
-    guint   n_chunks;
-    guint   i;
-    guint   j;
+    g_autoptr(GPtrArray)  chunks = NULL;
+    const gchar          *walker;
+    const char           *end;
+    const gchar          *chunk_start;
+    glong                 encoded_chunk_length;
+    glong                 total_encoded_chunk_length;
+
+    chunks = g_ptr_array_new_with_free_func ((GDestroyNotify)g_free);
+
+    walker = text;
+    chunk_start = text;
+    encoded_chunk_length = 0;
+    total_encoded_chunk_length = 0;
+    while (walker && *walker) {
+        guint8 symbol[2] = {0, 0};
+        glong  written_bytes = 0;
+
+        end = g_utf8_find_next_char (walker, NULL);
+        if (end == NULL) {
+            /* Find the string terminating NULL */
+            end = walker;
+            while (*++end);
+        }
+
+        written_bytes = utf8_to_gsm_char (walker, (end - walker), symbol);
+
+        /* If more than one chunk is needed, these have to be of 140 - 6 = 134
+         * bytes each, as additional space is needed for the UDH header.
+         * That means up to 153 input characters can be packed:
+         * 134 * 8 = 1072; 1072/7=153.14
+         */
+        if ((encoded_chunk_length + written_bytes) > 153) {
+            g_ptr_array_add (chunks, g_strndup (chunk_start, walker - chunk_start));
+            chunk_start = walker;
+            encoded_chunk_length = written_bytes;
+        } else
+            encoded_chunk_length += written_bytes;
+
+        total_encoded_chunk_length += written_bytes;
+        walker = g_utf8_next_char (walker);
+    }
 
     /* No splitting needed? */
-    if (text_len <= 160) {
+    if (total_encoded_chunk_length <= 160) {
+        gchar **out;
+
         out = g_new0 (gchar *, 2);
         out[0] = g_strdup (text);
         return out;
     }
 
-    /* Compute number of chunks needed */
-    n_chunks = text_len / 153;
-    if (text_len % 153 != 0)
-        n_chunks++;
-
-    /* Fill in all chunks */
-    out = g_new0 (gchar *, n_chunks + 1);
-    for (i = 0, j = 0; i < n_chunks; i++, j += 153)
-        out[i] = g_strndup (&text[j], 153);
+    /* Otherwise, we do need the splitted chunks. Add the last one
+     * with contents plus the last trailing NULL */
+    g_ptr_array_add (chunks, g_strndup (chunk_start, walker - chunk_start));
+    g_ptr_array_add (chunks, NULL);
 
-    return out;
+    return (gchar **) g_ptr_array_free (g_steal_pointer (&chunks), FALSE);
 }
 
 static gchar **
author	Andrey Skvortsov <andrej.skvortzov@gmail.com>	2022-08-30 01:38:03 +0300
committer	Aleksander Morgado <aleksander@aleksander.es>	2022-09-13 20:49:01 +0000
commit	4ebf8ca4e0577e47095969b71b10834ecb37802d (patch)
tree	e70f2508c70529d64920cf5d0f6c3ac8f5e9b85b /src/mm-charsets.c
parent	2ece78c80f09c919cbbbc825fab2fc63f864147d (diff)