diff options
author | Aleksander Morgado <aleksander@aleksander.es> | 2021-02-14 09:09:45 +0100 |
---|---|---|
committer | Aleksander Morgado <aleksander@aleksander.es> | 2021-02-23 11:35:11 +0000 |
commit | 0ff3eb7ee0106423519152a68de1621cedf567c8 (patch) | |
tree | e09889ace53c7d3a4e011fe9ff463745e272a33c /src | |
parent | ab4c31ec0b2c79285e24b9a117ff5e5c21f8fd71 (diff) |
charsets: remove take_and_convert methods
These methods worked in a very strict way for some encodings, and in a
very very loose way for others. E.g. when converting from hex-encoded
UCS-2, we would attempt to convert as much text as we could even if
the input string was truly not even close to UCS-2. This kind of "do
our best" could make sense when processing e.g. the operator name
reported by the modem, as that is some string to show to the user and
there may be no strict requirement to have it perfectly fine. But the
kind of loose comparison done for UCS-2 doesn't make sense e.g. when
converting USSD responses or SMS messages.
Diffstat (limited to 'src')
-rw-r--r-- | src/mm-charsets.c | 208 | ||||
-rw-r--r-- | src/mm-charsets.h | 5 |
2 files changed, 0 insertions, 213 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c index 5c0eb6ad..c131ad41 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -701,214 +701,6 @@ mm_charset_gsm_pack (const guint8 *src, return packed; } -/* We do all our best to get the given string, which is possibly given in the - * specified charset, to UTF8. It may happen that the given string is really - * the hex representation of the charset-encoded string, so we need to cope with - * that case. */ -gchar * -mm_charset_take_and_convert_to_utf8 (gchar *str, - MMModemCharset charset) -{ - gchar *utf8 = NULL; - - if (!str) - return NULL; - - switch (charset) { - case MM_MODEM_CHARSET_UNKNOWN: - g_warn_if_reached (); - utf8 = str; - break; - - case MM_MODEM_CHARSET_GSM: - /* This is WRONG! GSM may have embedded NULs (character @)! */ - utf8 = (gchar *) charset_gsm_unpacked_to_utf8 ((const guint8 *) str, strlen (str), FALSE, NULL); - g_free (str); - break; - - case MM_MODEM_CHARSET_8859_1: - case MM_MODEM_CHARSET_PCCP437: - case MM_MODEM_CHARSET_PCDN: { - const gchar *iconv_from; - GError *error = NULL; - - iconv_from = charset_iconv_from (charset); - utf8 = g_convert (str, strlen (str), - "UTF-8", iconv_from, - NULL, NULL, &error); - if (!utf8 || error) { - g_clear_error (&error); - utf8 = NULL; - } - - g_free (str); - break; - } - - case MM_MODEM_CHARSET_UCS2: - case MM_MODEM_CHARSET_UTF16: { - gsize len; - gboolean possibly_hex = TRUE; - gsize bread = 0, bwritten = 0; - - /* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */ - len = strlen (str); - if ((len < 4) || ((len % 4) != 0)) - possibly_hex = FALSE; - else { - const gchar *p = str; - - /* All chars in the string must be hex */ - while (*p && possibly_hex) - possibly_hex = isxdigit (*p++); - } - - /* If hex, then we expect hex-encoded UCS-2 */ - if (possibly_hex) { - utf8 = mm_modem_charset_hex_to_utf8 (str, charset); - if (utf8) { - g_free (str); - break; - } - } - - /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8 - * (much more likely). Try to convert to UTF-8 and if that fails, use - * the partial conversion length to re-convert the part of the string - * that is UTF-8, if any. - */ - utf8 = g_convert (str, strlen (str), - "UTF-8", "UTF-8", - &bread, &bwritten, NULL); - - /* Valid conversion, or we didn't get enough valid UTF-8 */ - if (utf8 || (bwritten <= 2)) { - g_free (str); - break; - } - - /* Last try; chop off the original string at the conversion failure - * location and get what we can. - */ - str[bread] = '\0'; - utf8 = g_convert (str, strlen (str), - "UTF-8", "UTF-8", - NULL, NULL, NULL); - g_free (str); - break; - } - - /* If the given charset is ASCII or UTF8, we really expect the final string - * already here */ - case MM_MODEM_CHARSET_IRA: - case MM_MODEM_CHARSET_UTF8: - utf8 = str; - break; - - default: - g_assert_not_reached (); - } - - /* Validate UTF-8 always before returning. This result will be exposed in DBus - * very likely... */ - if (utf8 && !g_utf8_validate (utf8, -1, NULL)) { - /* Better return NULL than an invalid UTF-8 string */ - g_free (utf8); - utf8 = NULL; - } - - return utf8; -} - -/* We do all our best to convert the given string, which comes in UTF-8, to the - * specified charset. It may be that the output string needs to be the hex - * representation of the charset-encoded string, so we need to cope with that - * case. */ -gchar * -mm_utf8_take_and_convert_to_charset (gchar *str, - MMModemCharset charset) -{ - gchar *encoded = NULL; - - if (!str) - return NULL; - - /* Validate UTF-8 always before converting */ - if (!g_utf8_validate (str, -1, NULL)) { - /* Better return NULL than an invalid encoded string */ - g_free (str); - return NULL; - } - - switch (charset) { - case MM_MODEM_CHARSET_UNKNOWN: - g_warn_if_reached (); - encoded = str; - break; - - case MM_MODEM_CHARSET_GSM: - /* This is WRONG! GSM may have embedded NULs (character @)! */ - encoded = mm_modem_charset_str_from_utf8 (str, MM_MODEM_CHARSET_GSM, FALSE, NULL); - g_free (str); - break; - - case MM_MODEM_CHARSET_8859_1: - case MM_MODEM_CHARSET_PCCP437: - case MM_MODEM_CHARSET_PCDN: { - const gchar *iconv_to; - GError *error = NULL; - - iconv_to = charset_iconv_from (charset); - encoded = g_convert (str, strlen (str), - iconv_to, "UTF-8", - NULL, NULL, &error); - if (!encoded || error) { - g_clear_error (&error); - encoded = NULL; - } - - g_free (str); - break; - } - - case MM_MODEM_CHARSET_UCS2: - case MM_MODEM_CHARSET_UTF16: { - const gchar *iconv_to; - gsize encoded_len = 0; - GError *error = NULL; - gchar *hex; - - iconv_to = charset_iconv_from (charset); - encoded = g_convert (str, strlen (str), - iconv_to, "UTF-8", - NULL, &encoded_len, &error); - if (!encoded || error) { - g_clear_error (&error); - encoded = NULL; - } - - /* Get hex representation of the string */ - hex = mm_utils_bin2hexstr ((guint8 *)encoded, encoded_len); - g_free (encoded); - encoded = hex; - g_free (str); - break; - } - - /* If the given charset is ASCII or UTF8, we really expect the final string - * already here. */ - case MM_MODEM_CHARSET_IRA: - case MM_MODEM_CHARSET_UTF8: - encoded = str; - break; - - default: - g_assert_not_reached (); - } - - return encoded; -} - /*****************************************************************************/ /* Main conversion functions */ diff --git a/src/mm-charsets.h b/src/mm-charsets.h index 37b39d7e..9cae5ddf 100644 --- a/src/mm-charsets.h +++ b/src/mm-charsets.h @@ -57,11 +57,6 @@ guint8 *mm_charset_gsm_pack (const guint8 *src, guint8 start_offset, /* in bits */ guint32 *out_packed_len); -gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, - MMModemCharset charset); -gchar *mm_utf8_take_and_convert_to_charset (gchar *str, - MMModemCharset charset); - /*****************************************************************************************/ /* |