aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mm-charsets.c16
-rw-r--r--src/mm-charsets.h3
-rw-r--r--src/tests/test-charsets.c18
3 files changed, 26 insertions, 11 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index e48cec3e..46b3c68c 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -43,6 +43,7 @@ static CharsetEntry charset_map[] = {
{ "PCCP437", "CP437", "CP437", "CP437//TRANSLIT", MM_MODEM_CHARSET_PCCP437 },
{ "PCDN", "CP850", "CP850", "CP850//TRANSLIT", MM_MODEM_CHARSET_PCDN },
{ "HEX", NULL, NULL, NULL, MM_MODEM_CHARSET_HEX },
+ { "UTF-16", "UTF16", "UTF-16BE", "UTF-16BE//TRANSLIT", MM_MODEM_CHARSET_UTF16 },
{ NULL, NULL, NULL, NULL, MM_MODEM_CHARSET_UNKNOWN }
};
@@ -536,6 +537,14 @@ ucs2_is_subset (gunichar c, const char *utf8, gsize ulen)
}
static gboolean
+utf16_is_subset (gunichar c,
+ const gchar *utf8,
+ gsize ulen)
+{
+ return TRUE;
+}
+
+static gboolean
iso88591_is_subset (gunichar c, const char *utf8, gsize ulen)
{
return (c <= 0xFF);
@@ -613,6 +622,7 @@ SubsetEntry subset_table[] = {
{ MM_MODEM_CHARSET_GSM, gsm_is_subset },
{ MM_MODEM_CHARSET_IRA, ira_is_subset },
{ MM_MODEM_CHARSET_UCS2, ucs2_is_subset },
+ { MM_MODEM_CHARSET_UTF16, utf16_is_subset },
{ MM_MODEM_CHARSET_8859_1, iso88591_is_subset },
{ MM_MODEM_CHARSET_PCCP437, pccp437_is_subset },
{ MM_MODEM_CHARSET_PCDN, pcdn_is_subset },
@@ -786,7 +796,8 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset)
break;
}
- case MM_MODEM_CHARSET_UCS2: {
+ case MM_MODEM_CHARSET_UCS2:
+ case MM_MODEM_CHARSET_UTF16: {
gsize len;
gboolean possibly_hex = TRUE;
gsize bread = 0, bwritten = 0;
@@ -914,7 +925,8 @@ mm_utf8_take_and_convert_to_charset (gchar *str,
break;
}
- case MM_MODEM_CHARSET_UCS2: {
+ case MM_MODEM_CHARSET_UCS2:
+ case MM_MODEM_CHARSET_UTF16: {
const gchar *iconv_to;
gsize encoded_len = 0;
GError *error = NULL;
diff --git a/src/mm-charsets.h b/src/mm-charsets.h
index 9e9215d5..e81674c4 100644
--- a/src/mm-charsets.h
+++ b/src/mm-charsets.h
@@ -27,7 +27,8 @@ typedef enum {
MM_MODEM_CHARSET_UCS2 = 0x00000010,
MM_MODEM_CHARSET_PCCP437 = 0x00000020,
MM_MODEM_CHARSET_PCDN = 0x00000040,
- MM_MODEM_CHARSET_HEX = 0x00000080
+ MM_MODEM_CHARSET_HEX = 0x00000080,
+ MM_MODEM_CHARSET_UTF16 = 0x00000100,
} MMModemCharset;
const char *mm_modem_charset_to_string (MMModemCharset charset);
diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c
index 0931d7e8..a15e0332 100644
--- a/src/tests/test-charsets.c
+++ b/src/tests/test-charsets.c
@@ -369,6 +369,7 @@ struct charset_can_convert_to_test_s {
gboolean to_ira;
gboolean to_8859_1;
gboolean to_ucs2;
+ gboolean to_utf16;
gboolean to_pccp437;
gboolean to_pcdn;
};
@@ -379,35 +380,35 @@ test_charset_can_covert_to (void)
static const struct charset_can_convert_to_test_s charset_can_convert_to_test[] = {
{
.utf8 = "",
- .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+ .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
},
{
.utf8 = " ",
- .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+ .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
},
{
.utf8 = "some basic ascii",
- .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+ .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
},
{
.utf8 = "ホモ・サピエンス 喂人类 katakana, chinese, english: UCS2 takes it all",
- .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
+ .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
},
{
.utf8 = "Some from the GSM7 basic set: a % Ψ Ω ñ ö è æ",
- .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
+ .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
},
{
.utf8 = "More from the GSM7 extended set: {} [] ~ € |",
- .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
+ .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
},
{
.utf8 = "patín cannot be encoded in GSM7 or IRA, but is valid UCS2, ISO-8859-1, CP437 and CP850",
- .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+ .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
},
{
.utf8 = "ècole can be encoded in multiple ways, but not in IRA",
- .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+ .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
},
};
guint i;
@@ -418,6 +419,7 @@ test_charset_can_covert_to (void)
g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_IRA) == charset_can_convert_to_test[i].to_ira);
g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_8859_1) == charset_can_convert_to_test[i].to_8859_1);
g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_UCS2) == charset_can_convert_to_test[i].to_ucs2);
+ g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_UTF16) == charset_can_convert_to_test[i].to_utf16);
g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_PCCP437) == charset_can_convert_to_test[i].to_pccp437);
g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_PCDN) == charset_can_convert_to_test[i].to_pcdn);
}