charsets: avoid //TRANSLIT when converting to/from charsets

The //TRANSLIT extension is not always supported by the different iconv() implementations that we may find out there, so let's completely avoid using it. For some of the charsets it actually didn't make much sense anyway, e.g. as converting to UTF-16 or UTF-8 would always be possible without requiring //TRANSLIT to take effect. The //TRANSLIT extension was also being used sometimes in the source charset identification, which was also not fully correct, as we would only expect it in the target charset identification.
author: Aleksander Morgado <aleksander@aleksander.es> 2021-02-14 13:47:21 +0100
committer: Aleksander Morgado <aleksander@aleksander.es> 2021-02-23 11:35:11 +0000
commit: 6f32c8d38f2c7ad269c4ccf84190ad6e917293a9 (patch)
tree: c118e0a585221c658c724c8681b8e6eee8062df5 /src/mm-charsets.c
parent: bc449cbe87ccebccbe35f926e88a2dd110832ddf (diff)
1 files changed, 40 insertions, 41 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 4b571c49..1b7d3d7c 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -26,23 +26,25 @@
 #include "mm-charsets.h"
 #include "mm-log.h"
 
+/******************************************************************************/
+/* Expected charset settings */
+
 typedef struct {
+    MMModemCharset  charset;
     const gchar    *gsm_name;
     const gchar    *other_name;
-    const gchar    *iconv_from_name;
-    const gchar    *iconv_to_name;
-    MMModemCharset  charset;
-} CharsetEntry;
-
-static const CharsetEntry charset_map[] = {
-    { "UTF-8",   "UTF8",   "UTF-8",     "UTF-8//TRANSLIT",     MM_MODEM_CHARSET_UTF8    },
-    { "UCS2",    NULL,     "UCS-2BE",   "UCS-2BE//TRANSLIT",   MM_MODEM_CHARSET_UCS2    },
-    { "IRA",     "ASCII",  "ASCII",     "ASCII//TRANSLIT",     MM_MODEM_CHARSET_IRA     },
-    { "GSM",     NULL,     NULL,        NULL,                  MM_MODEM_CHARSET_GSM     },
-    { "8859-1",  NULL,     "ISO8859-1", "ISO8859-1//TRANSLIT", MM_MODEM_CHARSET_8859_1  },
-    { "PCCP437", "CP437",  "CP437",     "CP437//TRANSLIT",     MM_MODEM_CHARSET_PCCP437 },
-    { "PCDN",    "CP850",  "CP850",     "CP850//TRANSLIT",     MM_MODEM_CHARSET_PCDN    },
-    { "UTF-16",  "UTF16",  "UTF-16BE",  "UTF-16BE//TRANSLIT",  MM_MODEM_CHARSET_UTF16   },
+    const gchar    *iconv_name;
+} CharsetSettings;
+
+static const CharsetSettings charset_settings[] = {
+    { MM_MODEM_CHARSET_UTF8,    "UTF-8",   "UTF8",   "UTF-8"     },
+    { MM_MODEM_CHARSET_UCS2,    "UCS2",    NULL,     "UCS-2BE"   },
+    { MM_MODEM_CHARSET_IRA,     "IRA",     "ASCII",  "ASCII"     },
+    { MM_MODEM_CHARSET_GSM,     "GSM",     NULL,     NULL        },
+    { MM_MODEM_CHARSET_8859_1,  "8859-1",  NULL,     "ISO8859-1" },
+    { MM_MODEM_CHARSET_PCCP437, "PCCP437", "CP437",  "CP437"     },
+    { MM_MODEM_CHARSET_PCDN,    "PCDN",    "CP850",  "CP850"     },
+    { MM_MODEM_CHARSET_UTF16,   "UTF-16",  "UTF16",  "UTF-16BE"  },
 };
 
 MMModemCharset
@@ -52,24 +54,24 @@ mm_modem_charset_from_string (const gchar *string)
 
     g_return_val_if_fail (string != NULL, MM_MODEM_CHARSET_UNKNOWN);
 
-    for (i = 0; i < G_N_ELEMENTS (charset_map); i++) {
-        if (strcasestr (string, charset_map[i].gsm_name))
-            return charset_map[i].charset;
-        if (charset_map[i].other_name && strcasestr (string, charset_map[i].other_name))
-            return charset_map[i].charset;
+    for (i = 0; i < G_N_ELEMENTS (charset_settings); i++) {
+        if (strcasestr (string, charset_settings[i].gsm_name))
+            return charset_settings[i].charset;
+        if (charset_settings[i].other_name && strcasestr (string, charset_settings[i].other_name))
+            return charset_settings[i].charset;
     }
     return MM_MODEM_CHARSET_UNKNOWN;
 }
 
-static const CharsetEntry *
-lookup_charset_by_id (MMModemCharset charset)
+static const CharsetSettings *
+lookup_charset_settings (MMModemCharset charset)
 {
     guint i;
 
     g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL);
-    for (i = 0; i < G_N_ELEMENTS (charset_map); i++) {
-        if (charset_map[i].charset == charset)
-            return &charset_map[i];
+    for (i = 0; i < G_N_ELEMENTS (charset_settings); i++) {
+        if (charset_settings[i].charset == charset)
+            return &charset_settings[i];
     }
     g_warn_if_reached ();
     return NULL;
@@ -78,28 +80,25 @@ lookup_charset_by_id (MMModemCharset charset)
 const gchar *
 mm_modem_charset_to_string (MMModemCharset charset)
 {
-    const CharsetEntry *entry;
+    const CharsetSettings *settings;
 
-    entry = lookup_charset_by_id (charset);
-    return entry ? entry->gsm_name : NULL;
+    settings = lookup_charset_settings (charset);
+    return settings ? settings->gsm_name : NULL;
 }
 
 static const gchar *
-charset_iconv_to (MMModemCharset charset)
+charset_iconv_from (MMModemCharset charset)
 {
-    const CharsetEntry *entry;
+    const CharsetSettings *settings;
 
-    entry = lookup_charset_by_id (charset);
-    return entry ? entry->iconv_to_name : NULL;
+    settings = lookup_charset_settings (charset);
+    return settings ? settings->iconv_name : NULL;
 }
 
 static const gchar *
-charset_iconv_from (MMModemCharset charset)
+charset_iconv_to (MMModemCharset charset)
 {
-    const CharsetEntry *entry;
-
-    entry = lookup_charset_by_id (charset);
-    return entry ? entry->iconv_from_name : NULL;
+    return charset_iconv_from (charset);
 }
 
 gboolean
@@ -145,7 +144,7 @@ mm_modem_charset_byte_array_to_utf8 (GByteArray     *array,
     g_return_val_if_fail (iconv_from != NULL, FALSE);
 
     converted = g_convert ((const gchar *)array->data, array->len,
-                           "UTF-8//TRANSLIT", iconv_from,
+                           "UTF-8", iconv_from,
                            NULL, NULL, &error);
     if (!converted || error)
         return NULL;
@@ -177,7 +176,7 @@ mm_modem_charset_hex_to_utf8 (const gchar    *src,
         return g_steal_pointer (&unconverted);
 
     converted = g_convert ((const gchar *)unconverted, unconverted_len,
-                           "UTF-8//TRANSLIT", iconv_from,
+                           "UTF-8", iconv_from,
                            NULL, NULL, &error);
     if (!converted || error)
         return NULL;
@@ -772,7 +771,7 @@ mm_charset_take_and_convert_to_utf8 (gchar          *str,
 
         iconv_from = charset_iconv_from (charset);
         utf8 = g_convert (str, strlen (str),
-                          "UTF-8//TRANSLIT", iconv_from,
+                          "UTF-8", iconv_from,
                           NULL, NULL, &error);
         if (!utf8 || error) {
             g_clear_error (&error);
@@ -816,7 +815,7 @@ mm_charset_take_and_convert_to_utf8 (gchar          *str,
          * that is UTF-8, if any.
          */
         utf8 = g_convert (str, strlen (str),
-                          "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+                          "UTF-8", "UTF-8",
                           &bread, &bwritten, NULL);
 
         /* Valid conversion, or we didn't get enough valid UTF-8 */
@@ -830,7 +829,7 @@ mm_charset_take_and_convert_to_utf8 (gchar          *str,
          */
         str[bread] = '\0';
         utf8 = g_convert (str, strlen (str),
-                          "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+                          "UTF-8", "UTF-8",
                           NULL, NULL, NULL);
         g_free (str);
         break;
author	Aleksander Morgado <aleksander@aleksander.es>	2021-02-14 13:47:21 +0100
committer	Aleksander Morgado <aleksander@aleksander.es>	2021-02-23 11:35:11 +0000
commit	6f32c8d38f2c7ad269c4ccf84190ad6e917293a9 (patch)
tree	c118e0a585221c658c724c8681b8e6eee8062df5 /src/mm-charsets.c
parent	bc449cbe87ccebccbe35f926e88a2dd110832ddf (diff)