aboutsummaryrefslogtreecommitdiff
path: root/src/mm-charsets.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mm-charsets.c')
-rw-r--r--src/mm-charsets.c42
1 files changed, 30 insertions, 12 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index f88c0c7a..5f41a7c0 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -711,8 +711,7 @@ gsm_pack (const guint8 *src,
* the hex representation of the charset-encoded string, so we need to cope with
* that case. */
gchar *
-mm_charset_take_and_convert_to_utf8 (gchar *str,
- MMModemCharset charset)
+mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset)
{
gchar *utf8 = NULL;
@@ -753,6 +752,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
case MM_MODEM_CHARSET_UCS2: {
gsize len;
gboolean possibly_hex = TRUE;
+ gsize bread = 0, bwritten = 0;
/* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */
len = strlen (str);
@@ -766,19 +766,37 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
possibly_hex = isxdigit (*p++);
}
- /* If we get UCS-2, we expect the HEX representation of the string */
+ /* If hex, then we expect hex-encoded UCS-2 */
if (possibly_hex) {
utf8 = mm_modem_charset_hex_to_utf8 (str, charset);
- if (!utf8) {
- /* If we couldn't convert the string as HEX-UCS-2, try to see if
- * the string is valid UTF-8 itself. */
- utf8 = str;
- } else
+ if (utf8) {
g_free (str);
- } else
- /* If we already know it's not hex, try to use the string as it is */
- utf8 = str;
+ break;
+ }
+ }
+
+ /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8
+ * (much more likely). Try to convert to UTF-8 and if that fails, use
+ * the partial conversion length to re-convert the part of the string
+ * that is UTF-8, if any.
+ */
+ utf8 = g_convert (str, strlen (str),
+ "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+ &bread, &bwritten, NULL);
+
+ /* Valid conversion, or we didn't get enough valid UTF-8 */
+ if (utf8 || (bwritten <= 2)) {
+ g_free (str);
+ break;
+ }
+ /* Last try; chop off the original string at the conversion failure
+ * location and get what we can.
+ */
+ str[bread] = '\0';
+ utf8 = g_convert (str, strlen (str),
+ "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+ NULL, NULL, NULL);
break;
}
@@ -792,7 +810,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
/* Validate UTF-8 always before returning. This result will be exposed in DBus
* very likely... */
- if (!g_utf8_validate (utf8, -1, NULL)) {
+ if (utf8 && !g_utf8_validate (utf8, -1, NULL)) {
/* Better return NULL than an invalid UTF-8 string */
g_free (utf8);
utf8 = NULL;