From 16df1e17e6e96f8dd0ba67003c6abb7083b9d7ec Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Sat, 13 Feb 2021 16:04:56 +0100 Subject: helpers: rework normalize_operator() to use str_to_utf8() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of blindly assuming that we can take whatever string given as valid UTF-8, we'll always attempt to convert from the current modem charset into UTF-8. Before we were doing this for hex-encoded UCS2, but not for example for GSM-7. And due to the now applied GSM-7 conversion, the mf627a/mf627b +COPS parsing unit tests are updated accordingly, because when converting from an input string that contains byte 0x40 ('@' in UTF-8) as if it were GSM-7, the 0x40 is taken as character 'ยก', encoded as 0xc2,0xa1 in UTF-8). --- src/mm-modem-helpers.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'src/mm-modem-helpers.c') diff --git a/src/mm-modem-helpers.c b/src/mm-modem-helpers.c index b4b5515a..26b55f71 100644 --- a/src/mm-modem-helpers.c +++ b/src/mm-modem-helpers.c @@ -1289,9 +1289,9 @@ mm_3gpp_parse_cops_test_response (const gchar *reply, info->operator_code = mm_get_string_unquoted_from_match_info (match_info, 4); /* The returned strings may be given in e.g. UCS2 */ - mm_3gpp_normalize_operator (&info->operator_long, cur_charset); - mm_3gpp_normalize_operator (&info->operator_short, cur_charset); - mm_3gpp_normalize_operator (&info->operator_code, cur_charset); + mm_3gpp_normalize_operator (&info->operator_long, cur_charset, log_object); + mm_3gpp_normalize_operator (&info->operator_short, cur_charset, log_object); + mm_3gpp_normalize_operator (&info->operator_code, cur_charset, log_object); /* Only try for access technology with UMTS-format matches. * If none give, assume GSM */ @@ -4018,8 +4018,11 @@ mm_string_to_access_tech (const gchar *string) void mm_3gpp_normalize_operator (gchar **operator, - MMModemCharset cur_charset) + MMModemCharset cur_charset, + gpointer log_object) { + g_autofree gchar *normalized = NULL; + g_assert (operator); if (*operator == NULL) @@ -4027,31 +4030,38 @@ mm_3gpp_normalize_operator (gchar **operator, /* Despite +CSCS? may claim supporting UCS2, Some modems (e.g. Huawei) * always report the operator name in ASCII in a +COPS response. */ - if (cur_charset == MM_MODEM_CHARSET_UCS2) { - gchar *tmp; + if (cur_charset != MM_MODEM_CHARSET_UNKNOWN) { + g_autoptr(GError) error = NULL; - tmp = g_strdup (*operator); - /* In this case we're already checking UTF-8 validity */ - tmp = mm_charset_take_and_convert_to_utf8 (tmp, cur_charset); - if (tmp) { - g_clear_pointer (operator, g_free); - *operator = tmp; + normalized = mm_modem_charset_str_to_utf8 (*operator, -1, cur_charset, TRUE, &error); + if (normalized) goto out; - } + + mm_obj_dbg (log_object, "couldn't convert operator string '%s' from charset '%s': %s", + *operator, + mm_modem_charset_to_string (cur_charset), + error->message); } /* Charset is unknown or there was an error in conversion; try to see * if the contents we got are valid UTF-8 already. */ - if (!g_utf8_validate (*operator, -1, NULL)) - g_clear_pointer (operator, g_free); + if (g_utf8_validate (*operator, -1, NULL)) + normalized = g_strdup (*operator); out: /* Some modems (Novatel LTE) return the operator name as "Unknown" when * it fails to obtain the operator name. Return NULL in such case. */ - if (*operator && g_ascii_strcasecmp (*operator, "unknown") == 0) + if (!normalized || g_ascii_strcasecmp (normalized, "unknown") == 0) { + /* If normalization failed, just cleanup the string */ g_clear_pointer (operator, g_free); + return; + } + + mm_obj_dbg (log_object, "operator normalized '%s'->'%s'", *operator, normalized); + g_clear_pointer (operator, g_free); + *operator = g_steal_pointer (&normalized); } /*************************************************************************/ -- cgit v1.2.3-70-g09d2