charsets: new UTF-8 to given charset converter

UCS-2 strings are always hex-converted.
author: Aleksander Morgado <aleksander@lanedo.com> 2012-02-18 10:48:18 +0100
committer: Aleksander Morgado <aleksander@lanedo.com> 2012-03-16 14:28:13 +0100
commit: 3028126bd244506e709093636245aab2b835dce5 (patch)
tree: 7dd859d70bb8c267465421efc700fbab5facd9c3
parent: 6f1a4514da5041ee57437e651e357bc3a3b86418 (diff)
2 files changed, 89 insertions, 0 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 68bf2d1d..8e9358eb 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -799,3 +799,89 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
 
     return utf8;
 }
+
+/* We do all our best to convert the given string, which comes in UTF-8, to the
+ * specified charset. It may be that the output string needs to be the hex
+ * representation of the charset-encoded string, so we need to cope with that
+ * case. */
+gchar *
+mm_utf8_take_and_convert_to_charset (gchar *str,
+                                     MMModemCharset charset)
+{
+    gchar *encoded;
+
+    if (!str)
+        return NULL;
+
+    /* Validate UTF-8 always before converting */
+    if (!g_utf8_validate (str, -1, NULL)) {
+        /* Better return NULL than an invalid encoded string */
+        g_free (str);
+        return NULL;
+    }
+
+    switch (charset) {
+    case MM_MODEM_CHARSET_UNKNOWN:
+        g_warn_if_reached ();
+        encoded = str;
+        break;
+
+    case MM_MODEM_CHARSET_HEX:
+        /* FIXME: What encoding is this? */
+        g_warn_if_reached ();
+        encoded = str;
+        break;
+
+    case MM_MODEM_CHARSET_GSM:
+    case MM_MODEM_CHARSET_8859_1:
+    case MM_MODEM_CHARSET_PCCP437:
+    case MM_MODEM_CHARSET_PCDN: {
+        const gchar *iconv_to;
+        GError *error = NULL;
+
+        iconv_to = charset_iconv_from (charset);
+        encoded = g_convert (str, strlen (str),
+                             iconv_to, "UTF-8",
+                             NULL, NULL, &error);
+        if (!encoded || error) {
+            g_clear_error (&error);
+            encoded = NULL;
+        }
+
+        g_free (str);
+        break;
+    }
+
+    case MM_MODEM_CHARSET_UCS2: {
+        const gchar *iconv_to;
+        gsize encoded_len = 0;
+        GError *error = NULL;
+        gchar *hex;
+
+        iconv_to = charset_iconv_from (charset);
+        encoded = g_convert (str, strlen (str),
+                             iconv_to, "UTF-8",
+                             NULL, &encoded_len, &error);
+        if (!encoded || error) {
+            g_clear_error (&error);
+            encoded = NULL;
+        }
+
+        /* Get hex representation of the string */
+        hex = utils_bin2hexstr ((guint8 *)encoded, encoded_len);
+        g_free (encoded);
+        encoded = hex;
+        g_free (str);
+        break;
+    }
+
+    /* If the given charset is ASCII or UTF8, we really expect the final string
+     * already here. */
+    case MM_MODEM_CHARSET_IRA:
+    case MM_MODEM_CHARSET_UTF8:
+        encoded = str;
+        break;
+    }
+
+    return encoded;
+}
diff --git a/src/mm-charsets.h b/src/mm-charsets.h
index 855387a0..0238582a 100644
--- a/src/mm-charsets.h
+++ b/src/mm-charsets.h
@@ -75,4 +75,7 @@ guint8 *gsm_pack (const guint8 *src,
 gchar *mm_charset_take_and_convert_to_utf8 (gchar *str,
                                             MMModemCharset charset);
 
+gchar *mm_utf8_take_and_convert_to_charset (gchar *str,
+                                            MMModemCharset charset);
+
 #endif /* MM_CHARSETS_H */
author	Aleksander Morgado <aleksander@lanedo.com>	2012-02-18 10:48:18 +0100
committer	Aleksander Morgado <aleksander@lanedo.com>	2012-03-16 14:28:13 +0100
commit	3028126bd244506e709093636245aab2b835dce5 (patch)
tree	7dd859d70bb8c267465421efc700fbab5facd9c3
parent	6f1a4514da5041ee57437e651e357bc3a3b86418 (diff)