aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAleksander Morgado <aleksander@aleksander.es>2021-02-14 09:09:45 +0100
committerAleksander Morgado <aleksander@aleksander.es>2021-02-23 11:35:11 +0000
commit0ff3eb7ee0106423519152a68de1621cedf567c8 (patch)
treee09889ace53c7d3a4e011fe9ff463745e272a33c /src
parentab4c31ec0b2c79285e24b9a117ff5e5c21f8fd71 (diff)
charsets: remove take_and_convert methods
These methods worked in a very strict way for some encodings, and in a very very loose way for others. E.g. when converting from hex-encoded UCS-2, we would attempt to convert as much text as we could even if the input string was truly not even close to UCS-2. This kind of "do our best" could make sense when processing e.g. the operator name reported by the modem, as that is some string to show to the user and there may be no strict requirement to have it perfectly fine. But the kind of loose comparison done for UCS-2 doesn't make sense e.g. when converting USSD responses or SMS messages.
Diffstat (limited to 'src')
-rw-r--r--src/mm-charsets.c208
-rw-r--r--src/mm-charsets.h5
2 files changed, 0 insertions, 213 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 5c0eb6ad..c131ad41 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -701,214 +701,6 @@ mm_charset_gsm_pack (const guint8 *src,
return packed;
}
-/* We do all our best to get the given string, which is possibly given in the
- * specified charset, to UTF8. It may happen that the given string is really
- * the hex representation of the charset-encoded string, so we need to cope with
- * that case. */
-gchar *
-mm_charset_take_and_convert_to_utf8 (gchar *str,
- MMModemCharset charset)
-{
- gchar *utf8 = NULL;
-
- if (!str)
- return NULL;
-
- switch (charset) {
- case MM_MODEM_CHARSET_UNKNOWN:
- g_warn_if_reached ();
- utf8 = str;
- break;
-
- case MM_MODEM_CHARSET_GSM:
- /* This is WRONG! GSM may have embedded NULs (character @)! */
- utf8 = (gchar *) charset_gsm_unpacked_to_utf8 ((const guint8 *) str, strlen (str), FALSE, NULL);
- g_free (str);
- break;
-
- case MM_MODEM_CHARSET_8859_1:
- case MM_MODEM_CHARSET_PCCP437:
- case MM_MODEM_CHARSET_PCDN: {
- const gchar *iconv_from;
- GError *error = NULL;
-
- iconv_from = charset_iconv_from (charset);
- utf8 = g_convert (str, strlen (str),
- "UTF-8", iconv_from,
- NULL, NULL, &error);
- if (!utf8 || error) {
- g_clear_error (&error);
- utf8 = NULL;
- }
-
- g_free (str);
- break;
- }
-
- case MM_MODEM_CHARSET_UCS2:
- case MM_MODEM_CHARSET_UTF16: {
- gsize len;
- gboolean possibly_hex = TRUE;
- gsize bread = 0, bwritten = 0;
-
- /* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */
- len = strlen (str);
- if ((len < 4) || ((len % 4) != 0))
- possibly_hex = FALSE;
- else {
- const gchar *p = str;
-
- /* All chars in the string must be hex */
- while (*p && possibly_hex)
- possibly_hex = isxdigit (*p++);
- }
-
- /* If hex, then we expect hex-encoded UCS-2 */
- if (possibly_hex) {
- utf8 = mm_modem_charset_hex_to_utf8 (str, charset);
- if (utf8) {
- g_free (str);
- break;
- }
- }
-
- /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8
- * (much more likely). Try to convert to UTF-8 and if that fails, use
- * the partial conversion length to re-convert the part of the string
- * that is UTF-8, if any.
- */
- utf8 = g_convert (str, strlen (str),
- "UTF-8", "UTF-8",
- &bread, &bwritten, NULL);
-
- /* Valid conversion, or we didn't get enough valid UTF-8 */
- if (utf8 || (bwritten <= 2)) {
- g_free (str);
- break;
- }
-
- /* Last try; chop off the original string at the conversion failure
- * location and get what we can.
- */
- str[bread] = '\0';
- utf8 = g_convert (str, strlen (str),
- "UTF-8", "UTF-8",
- NULL, NULL, NULL);
- g_free (str);
- break;
- }
-
- /* If the given charset is ASCII or UTF8, we really expect the final string
- * already here */
- case MM_MODEM_CHARSET_IRA:
- case MM_MODEM_CHARSET_UTF8:
- utf8 = str;
- break;
-
- default:
- g_assert_not_reached ();
- }
-
- /* Validate UTF-8 always before returning. This result will be exposed in DBus
- * very likely... */
- if (utf8 && !g_utf8_validate (utf8, -1, NULL)) {
- /* Better return NULL than an invalid UTF-8 string */
- g_free (utf8);
- utf8 = NULL;
- }
-
- return utf8;
-}
-
-/* We do all our best to convert the given string, which comes in UTF-8, to the
- * specified charset. It may be that the output string needs to be the hex
- * representation of the charset-encoded string, so we need to cope with that
- * case. */
-gchar *
-mm_utf8_take_and_convert_to_charset (gchar *str,
- MMModemCharset charset)
-{
- gchar *encoded = NULL;
-
- if (!str)
- return NULL;
-
- /* Validate UTF-8 always before converting */
- if (!g_utf8_validate (str, -1, NULL)) {
- /* Better return NULL than an invalid encoded string */
- g_free (str);
- return NULL;
- }
-
- switch (charset) {
- case MM_MODEM_CHARSET_UNKNOWN:
- g_warn_if_reached ();
- encoded = str;
- break;
-
- case MM_MODEM_CHARSET_GSM:
- /* This is WRONG! GSM may have embedded NULs (character @)! */
- encoded = mm_modem_charset_str_from_utf8 (str, MM_MODEM_CHARSET_GSM, FALSE, NULL);
- g_free (str);
- break;
-
- case MM_MODEM_CHARSET_8859_1:
- case MM_MODEM_CHARSET_PCCP437:
- case MM_MODEM_CHARSET_PCDN: {
- const gchar *iconv_to;
- GError *error = NULL;
-
- iconv_to = charset_iconv_from (charset);
- encoded = g_convert (str, strlen (str),
- iconv_to, "UTF-8",
- NULL, NULL, &error);
- if (!encoded || error) {
- g_clear_error (&error);
- encoded = NULL;
- }
-
- g_free (str);
- break;
- }
-
- case MM_MODEM_CHARSET_UCS2:
- case MM_MODEM_CHARSET_UTF16: {
- const gchar *iconv_to;
- gsize encoded_len = 0;
- GError *error = NULL;
- gchar *hex;
-
- iconv_to = charset_iconv_from (charset);
- encoded = g_convert (str, strlen (str),
- iconv_to, "UTF-8",
- NULL, &encoded_len, &error);
- if (!encoded || error) {
- g_clear_error (&error);
- encoded = NULL;
- }
-
- /* Get hex representation of the string */
- hex = mm_utils_bin2hexstr ((guint8 *)encoded, encoded_len);
- g_free (encoded);
- encoded = hex;
- g_free (str);
- break;
- }
-
- /* If the given charset is ASCII or UTF8, we really expect the final string
- * already here. */
- case MM_MODEM_CHARSET_IRA:
- case MM_MODEM_CHARSET_UTF8:
- encoded = str;
- break;
-
- default:
- g_assert_not_reached ();
- }
-
- return encoded;
-}
-
/*****************************************************************************/
/* Main conversion functions */
diff --git a/src/mm-charsets.h b/src/mm-charsets.h
index 37b39d7e..9cae5ddf 100644
--- a/src/mm-charsets.h
+++ b/src/mm-charsets.h
@@ -57,11 +57,6 @@ guint8 *mm_charset_gsm_pack (const guint8 *src,
guint8 start_offset, /* in bits */
guint32 *out_packed_len);
-gchar *mm_charset_take_and_convert_to_utf8 (gchar *str,
- MMModemCharset charset);
-gchar *mm_utf8_take_and_convert_to_charset (gchar *str,
- MMModemCharset charset);
-
/*****************************************************************************************/
/*