aboutsummaryrefslogtreecommitdiff
path: root/src/mm-charsets.c
diff options
context:
space:
mode:
authorDan Williams <dcbw@redhat.com>2012-09-11 16:36:16 -0500
committerDan Williams <dcbw@redhat.com>2012-09-12 23:03:58 -0500
commitc524734d9fd897add850391e7db0a1060e2f6c37 (patch)
tree26bb44bdfbabe22abefc098376220a1870358d5c /src/mm-charsets.c
parent73ced242da75abf63a1b5be47ad95123a9e53a3f (diff)
core: better handling of non-UCS2 conversions that should be UCS2 (bgo #683817)
Some modems return the +COPS operator name in hex-encoded current character set (as set with +CSCS). Others return the operator name in ASCII when set to UCS2, while yet others return the ASCII name with trash at the end (*cough* Huawei *cough*). Handle that better by not crashing.
Diffstat (limited to 'src/mm-charsets.c')
-rw-r--r--src/mm-charsets.c42
1 files changed, 30 insertions, 12 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index f88c0c7a..5f41a7c0 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -711,8 +711,7 @@ gsm_pack (const guint8 *src,
* the hex representation of the charset-encoded string, so we need to cope with
* that case. */
gchar *
-mm_charset_take_and_convert_to_utf8 (gchar *str,
- MMModemCharset charset)
+mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset)
{
gchar *utf8 = NULL;
@@ -753,6 +752,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
case MM_MODEM_CHARSET_UCS2: {
gsize len;
gboolean possibly_hex = TRUE;
+ gsize bread = 0, bwritten = 0;
/* If the string comes in hex-UCS-2, len needs to be a multiple of 4 */
len = strlen (str);
@@ -766,19 +766,37 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
possibly_hex = isxdigit (*p++);
}
- /* If we get UCS-2, we expect the HEX representation of the string */
+ /* If hex, then we expect hex-encoded UCS-2 */
if (possibly_hex) {
utf8 = mm_modem_charset_hex_to_utf8 (str, charset);
- if (!utf8) {
- /* If we couldn't convert the string as HEX-UCS-2, try to see if
- * the string is valid UTF-8 itself. */
- utf8 = str;
- } else
+ if (utf8) {
g_free (str);
- } else
- /* If we already know it's not hex, try to use the string as it is */
- utf8 = str;
+ break;
+ }
+ }
+
+ /* If not hex, then it might be raw UCS-2 (very unlikely) or ASCII/UTF-8
+ * (much more likely). Try to convert to UTF-8 and if that fails, use
+ * the partial conversion length to re-convert the part of the string
+ * that is UTF-8, if any.
+ */
+ utf8 = g_convert (str, strlen (str),
+ "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+ &bread, &bwritten, NULL);
+
+ /* Valid conversion, or we didn't get enough valid UTF-8 */
+ if (utf8 || (bwritten <= 2)) {
+ g_free (str);
+ break;
+ }
+ /* Last try; chop off the original string at the conversion failure
+ * location and get what we can.
+ */
+ str[bread] = '\0';
+ utf8 = g_convert (str, strlen (str),
+ "UTF-8//TRANSLIT", "UTF-8//TRANSLIT",
+ NULL, NULL, NULL);
break;
}
@@ -792,7 +810,7 @@ mm_charset_take_and_convert_to_utf8 (gchar *str,
/* Validate UTF-8 always before returning. This result will be exposed in DBus
* very likely... */
- if (!g_utf8_validate (utf8, -1, NULL)) {
+ if (utf8 && !g_utf8_validate (utf8, -1, NULL)) {
/* Better return NULL than an invalid UTF-8 string */
g_free (utf8);
utf8 = NULL;