From 16df1e17e6e96f8dd0ba67003c6abb7083b9d7ec Mon Sep 17 00:00:00 2001
From: Aleksander Morgado <aleksander@aleksander.es>
Date: Sat, 13 Feb 2021 16:04:56 +0100
Subject: helpers: rework normalize_operator() to use str_to_utf8()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of blindly assuming that we can take whatever string given as
valid UTF-8, we'll always attempt to convert from the current modem
charset into UTF-8. Before we were doing this for hex-encoded UCS2,
but not for example for GSM-7.

And due to the now applied GSM-7 conversion, the mf627a/mf627b +COPS
parsing unit tests are updated accordingly, because when converting
from an input string that contains byte 0x40 ('@' in UTF-8) as if it
were GSM-7, the 0x40 is taken as character '¡', encoded as 0xc2,0xa1
in UTF-8).
---
 src/mm-modem-helpers.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

(limited to 'src/mm-modem-helpers.c')

diff --git a/src/mm-modem-helpers.c b/src/mm-modem-helpers.c
index b4b5515a..26b55f71 100644
--- a/src/mm-modem-helpers.c
+++ b/src/mm-modem-helpers.c
@@ -1289,9 +1289,9 @@ mm_3gpp_parse_cops_test_response (const gchar     *reply,
         info->operator_code = mm_get_string_unquoted_from_match_info (match_info, 4);
 
         /* The returned strings may be given in e.g. UCS2 */
-        mm_3gpp_normalize_operator (&info->operator_long,  cur_charset);
-        mm_3gpp_normalize_operator (&info->operator_short, cur_charset);
-        mm_3gpp_normalize_operator (&info->operator_code,  cur_charset);
+        mm_3gpp_normalize_operator (&info->operator_long,  cur_charset, log_object);
+        mm_3gpp_normalize_operator (&info->operator_short, cur_charset, log_object);
+        mm_3gpp_normalize_operator (&info->operator_code,  cur_charset, log_object);
 
         /* Only try for access technology with UMTS-format matches.
          * If none give, assume GSM */
@@ -4018,8 +4018,11 @@ mm_string_to_access_tech (const gchar *string)
 
 void
 mm_3gpp_normalize_operator (gchar          **operator,
-                            MMModemCharset   cur_charset)
+                            MMModemCharset   cur_charset,
+                            gpointer         log_object)
 {
+    g_autofree gchar *normalized = NULL;
+
     g_assert (operator);
 
     if (*operator == NULL)
@@ -4027,31 +4030,38 @@ mm_3gpp_normalize_operator (gchar          **operator,
 
     /* Despite +CSCS? may claim supporting UCS2, Some modems (e.g. Huawei)
      * always report the operator name in ASCII in a +COPS response. */
-    if (cur_charset == MM_MODEM_CHARSET_UCS2) {
-        gchar *tmp;
+    if (cur_charset != MM_MODEM_CHARSET_UNKNOWN) {
+        g_autoptr(GError) error = NULL;
 
-        tmp = g_strdup (*operator);
-        /* In this case we're already checking UTF-8 validity */
-        tmp = mm_charset_take_and_convert_to_utf8 (tmp, cur_charset);
-        if (tmp) {
-            g_clear_pointer (operator, g_free);
-            *operator = tmp;
+        normalized = mm_modem_charset_str_to_utf8 (*operator, -1, cur_charset, TRUE, &error);
+        if (normalized)
             goto out;
-        }
+
+        mm_obj_dbg (log_object, "couldn't convert operator string '%s' from charset '%s': %s",
+                    *operator,
+                    mm_modem_charset_to_string (cur_charset),
+                    error->message);
     }
 
     /* Charset is unknown or there was an error in conversion; try to see
      * if the contents we got are valid UTF-8 already. */
-    if (!g_utf8_validate (*operator, -1, NULL))
-        g_clear_pointer (operator, g_free);
+    if (g_utf8_validate (*operator, -1, NULL))
+        normalized = g_strdup (*operator);
 
 out:
 
     /* Some modems (Novatel LTE) return the operator name as "Unknown" when
      * it fails to obtain the operator name. Return NULL in such case.
      */
-    if (*operator && g_ascii_strcasecmp (*operator, "unknown") == 0)
+    if (!normalized || g_ascii_strcasecmp (normalized, "unknown") == 0) {
+        /* If normalization failed, just cleanup the string */
         g_clear_pointer (operator, g_free);
+        return;
+    }
+
+    mm_obj_dbg (log_object, "operator normalized '%s'->'%s'", *operator, normalized);
+    g_clear_pointer (operator, g_free);
+    *operator = g_steal_pointer (&normalized);
 }
 
 /*************************************************************************/
-- 
cgit v1.2.3-70-g09d2