diff options
author | Dan Williams <dcbw@redhat.com> | 2012-09-11 16:36:16 -0500 |
---|---|---|
committer | Dan Williams <dcbw@redhat.com> | 2012-09-12 23:03:58 -0500 |
commit | c524734d9fd897add850391e7db0a1060e2f6c37 (patch) | |
tree | 26bb44bdfbabe22abefc098376220a1870358d5c /src/mm-charsets.c | |
parent | 73ced242da75abf63a1b5be47ad95123a9e53a3f (diff) |
core: better handling of non-UCS2 conversions that should be UCS2 (bgo #683817)
Some modems return the +COPS operator name in hex-encoded current
character set (as set with +CSCS). Others return the operator name
in ASCII when set to UCS2, while yet others return the ASCII name
with trash at the end (*cough* Huawei *cough*). Handle that better
by not crashing.
Diffstat (limited to 'src/mm-charsets.c')
-rw-r--r-- | src/mm-charsets.c | 42 |
1 files changed, 30 insertions, 12 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c index f88c0c7a..5f41a7c0 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -711,8 +711,7 @@ gsm_pack (const guint8 *src, * the hex representation of the charset-encoded string, so we need to cope with * that case. */ gchar * -mm_charset_take_and_convert_to_utf8 (gchar *str, - MMModemCharset charset) +mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset) { gchar *utf8 = NULL; @@ -753,6 +752,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, case MM_MODEM_CHARSET_UCS2: { gsize len; gboolean possibly_hex = TRUE; + gsize bread = 0, bwritten = 0; /* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */ len = strlen (str); @@ -766,19 +766,37 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, possibly_hex = isxdigit (*p++); } - /* If we get UCS-2, we expect the HEX representation of the string */ + /* If hex, then we expect hex-encoded UCS-2 */ if (possibly_hex) { utf8 = mm_modem_charset_hex_to_utf8 (str, charset); - if (!utf8) { - /* If we couldn't convert the string as HEX-UCS-2, try to see if - * the string is valid UTF-8 itself. */ - utf8 = str; - } else + if (utf8) { g_free (str); - } else - /* If we already know it's not hex, try to use the string as it is */ - utf8 = str; + break; + } + } + + /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8 + * (much more likely). Try to convert to UTF-8 and if that fails, use + * the partial conversion length to re-convert the part of the string + * that is UTF-8, if any. + */ + utf8 = g_convert (str, strlen (str), + "UTF-8//TRANSLIT", "UTF-8//TRANSLIT", + &bread, &bwritten, NULL); + + /* Valid conversion, or we didn't get enough valid UTF-8 */ + if (utf8 || (bwritten <= 2)) { + g_free (str); + break; + } + /* Last try; chop off the original string at the conversion failure + * location and get what we can. + */ + str[bread] = '\0'; + utf8 = g_convert (str, strlen (str), + "UTF-8//TRANSLIT", "UTF-8//TRANSLIT", + NULL, NULL, NULL); break; } @@ -792,7 +810,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, /* Validate UTF-8 always before returning. This result will be exposed in DBus * very likely... */ - if (!g_utf8_validate (utf8, -1, NULL)) { + if (utf8 && !g_utf8_validate (utf8, -1, NULL)) { /* Better return NULL than an invalid UTF-8 string */ g_free (utf8); utf8 = NULL; |