diff options
author | Aleksander Morgado <aleksander@aleksander.es> | 2020-08-20 10:58:24 +0200 |
---|---|---|
committer | Aleksander Morgado <aleksander@aleksander.es> | 2020-08-20 18:13:18 +0200 |
commit | eb5443b197464e55c85d7a8af67a28f2088506a3 (patch) | |
tree | 938203db0b78602ff9a07fb7e97c8e4d3a871cee | |
parent | 93686510d737bc373100beaeeb3edb7ca091a3f0 (diff) |
charsets: add UTF-16BE as a possible modem charset
Just as an implementation detail to be taken as an extension of
UCS2BE, never really to be used as a real modem charset.
-rw-r--r-- | src/mm-charsets.c | 16 | ||||
-rw-r--r-- | src/mm-charsets.h | 3 | ||||
-rw-r--r-- | src/tests/test-charsets.c | 18 |
3 files changed, 26 insertions, 11 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c index e48cec3e..46b3c68c 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -43,6 +43,7 @@ static CharsetEntry charset_map[] = { { "PCCP437", "CP437", "CP437", "CP437//TRANSLIT", MM_MODEM_CHARSET_PCCP437 }, { "PCDN", "CP850", "CP850", "CP850//TRANSLIT", MM_MODEM_CHARSET_PCDN }, { "HEX", NULL, NULL, NULL, MM_MODEM_CHARSET_HEX }, + { "UTF-16", "UTF16", "UTF-16BE", "UTF-16BE//TRANSLIT", MM_MODEM_CHARSET_UTF16 }, { NULL, NULL, NULL, NULL, MM_MODEM_CHARSET_UNKNOWN } }; @@ -536,6 +537,14 @@ ucs2_is_subset (gunichar c, const char *utf8, gsize ulen) } static gboolean +utf16_is_subset (gunichar c, + const gchar *utf8, + gsize ulen) +{ + return TRUE; +} + +static gboolean iso88591_is_subset (gunichar c, const char *utf8, gsize ulen) { return (c <= 0xFF); @@ -613,6 +622,7 @@ SubsetEntry subset_table[] = { { MM_MODEM_CHARSET_GSM, gsm_is_subset }, { MM_MODEM_CHARSET_IRA, ira_is_subset }, { MM_MODEM_CHARSET_UCS2, ucs2_is_subset }, + { MM_MODEM_CHARSET_UTF16, utf16_is_subset }, { MM_MODEM_CHARSET_8859_1, iso88591_is_subset }, { MM_MODEM_CHARSET_PCCP437, pccp437_is_subset }, { MM_MODEM_CHARSET_PCDN, pcdn_is_subset }, @@ -786,7 +796,8 @@ mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset) break; } - case MM_MODEM_CHARSET_UCS2: { + case MM_MODEM_CHARSET_UCS2: + case MM_MODEM_CHARSET_UTF16: { gsize len; gboolean possibly_hex = TRUE; gsize bread = 0, bwritten = 0; @@ -914,7 +925,8 @@ mm_utf8_take_and_convert_to_charset (gchar *str, break; } - case MM_MODEM_CHARSET_UCS2: { + case MM_MODEM_CHARSET_UCS2: + case MM_MODEM_CHARSET_UTF16: { const gchar *iconv_to; gsize encoded_len = 0; GError *error = NULL; diff --git a/src/mm-charsets.h b/src/mm-charsets.h index 9e9215d5..e81674c4 100644 --- a/src/mm-charsets.h +++ b/src/mm-charsets.h @@ -27,7 +27,8 @@ typedef enum { MM_MODEM_CHARSET_UCS2 = 0x00000010, MM_MODEM_CHARSET_PCCP437 = 0x00000020, MM_MODEM_CHARSET_PCDN = 0x00000040, - MM_MODEM_CHARSET_HEX = 0x00000080 + MM_MODEM_CHARSET_HEX = 0x00000080, + MM_MODEM_CHARSET_UTF16 = 0x00000100, } MMModemCharset; const char *mm_modem_charset_to_string (MMModemCharset charset); diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c index 0931d7e8..a15e0332 100644 --- a/src/tests/test-charsets.c +++ b/src/tests/test-charsets.c @@ -369,6 +369,7 @@ struct charset_can_convert_to_test_s { gboolean to_ira; gboolean to_8859_1; gboolean to_ucs2; + gboolean to_utf16; gboolean to_pccp437; gboolean to_pcdn; }; @@ -379,35 +380,35 @@ test_charset_can_covert_to (void) static const struct charset_can_convert_to_test_s charset_can_convert_to_test[] = { { .utf8 = "", - .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, + .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, }, { .utf8 = " ", - .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, + .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, }, { .utf8 = "some basic ascii", - .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, + .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, }, { .utf8 = "ホモ・サピエンス 喂人类 katakana, chinese, english: UCS2 takes it all", - .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE, + .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE, }, { .utf8 = "Some from the GSM7 basic set: a % Ψ Ω ñ ö è æ", - .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE, + .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE, }, { .utf8 = "More from the GSM7 extended set: {} [] ~ € |", - .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE, + .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE, }, { .utf8 = "patín cannot be encoded in GSM7 or IRA, but is valid UCS2, ISO-8859-1, CP437 and CP850", - .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, + .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, }, { .utf8 = "ècole can be encoded in multiple ways, but not in IRA", - .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, + .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_utf16 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE, }, }; guint i; @@ -418,6 +419,7 @@ test_charset_can_covert_to (void) g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_IRA) == charset_can_convert_to_test[i].to_ira); g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_8859_1) == charset_can_convert_to_test[i].to_8859_1); g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_UCS2) == charset_can_convert_to_test[i].to_ucs2); + g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_UTF16) == charset_can_convert_to_test[i].to_utf16); g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_PCCP437) == charset_can_convert_to_test[i].to_pccp437); g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_PCDN) == charset_can_convert_to_test[i].to_pcdn); } |