aboutsummaryrefslogtreecommitdiff
path: root/src/mm-charsets.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mm-charsets.c')
-rw-r--r--src/mm-charsets.c178
1 files changed, 176 insertions, 2 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 708dd3e1..0956e4e1 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -36,8 +36,8 @@ static CharsetEntry charset_map[] = {
{ "IRA", "ASCII", "ASCII", "ASCII//TRANSLIT", MM_MODEM_CHARSET_IRA },
{ "GSM", NULL, NULL, NULL, MM_MODEM_CHARSET_GSM },
{ "8859-1", NULL, "ISO8859-1", "ISO8859-1//TRANSLIT", MM_MODEM_CHARSET_8859_1 },
- { "PCCP437", NULL, NULL, NULL, MM_MODEM_CHARSET_PCCP437 },
- { "PCDN", NULL, NULL, NULL, MM_MODEM_CHARSET_PCDN },
+ { "PCCP437", "CP437", "CP437", "CP437//TRANSLIT", MM_MODEM_CHARSET_PCCP437 },
+ { "PCDN", "CP850", "CP850", "CP850//TRANSLIT", MM_MODEM_CHARSET_PCDN },
{ "HEX", NULL, NULL, NULL, MM_MODEM_CHARSET_HEX },
{ NULL, NULL, NULL, NULL, MM_MODEM_CHARSET_UNKNOWN }
};
@@ -456,6 +456,180 @@ mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len)
return g_byte_array_free (gsm, FALSE);
}
+static gboolean
+gsm_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+{
+ guint8 gsm;
+
+ *out_clen = 1;
+ if (utf8_to_gsm_def_char (utf8, ulen, &gsm))
+ return TRUE;
+ if (utf8_to_gsm_ext_char (utf8, ulen, &gsm)) {
+ *out_clen = 2;
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static gboolean
+ira_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+{
+ *out_clen = 1;
+ return (ulen == 1);
+}
+
+static gboolean
+ucs2_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+{
+ *out_clen = 2;
+ return (c <= 0xFFFF);
+}
+
+static gboolean
+iso88591_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+{
+ *out_clen = 1;
+ return (c <= 0xFF);
+}
+
+static gboolean
+pccp437_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+{
+ static const gunichar t[] = {
+ 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea,
+ 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
+ 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff, 0x00d6, 0x00dc,
+ 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
+ 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc,
+ 0x00a1, 0x00ab, 0x00bb, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
+ 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b,
+ 0x2510, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
+ 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, 0x2568,
+ 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
+ 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 0x03b1, 0x00df, 0x0393,
+ 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
+ 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320,
+ 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
+ 0x25a0, 0x00a0
+ };
+ int i;
+
+ *out_clen = 1;
+
+ if (c <= 0x7F)
+ return TRUE;
+ for (i = 0; i < sizeof (t) / sizeof (t[0]); i++) {
+ if (c == t[i])
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static gboolean
+pcdn_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+{
+ static const gunichar t[] = {
+ 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea,
+ 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
+ 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff, 0x00d6, 0x00dc,
+ 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
+ 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc,
+ 0x00a1, 0x00ab, 0x00bb, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
+ 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5,
+ 0x2510, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
+ 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4, 0x00f0,
+ 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
+ 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580, 0x00d3, 0x00df, 0x00d4,
+ 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
+ 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6,
+ 0x00a7, 0x00f7, 0x00b8, 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
+ 0x25a0, 0x00a0
+ };
+ int i;
+
+ *out_clen = 1;
+
+ if (c <= 0x7F)
+ return TRUE;
+ for (i = 0; i < sizeof (t) / sizeof (t[0]); i++) {
+ if (c == t[i])
+ return TRUE;
+ }
+ return FALSE;
+}
+
+typedef struct {
+ MMModemCharset cs;
+ gboolean (*func) (gunichar c, const char *utf8, gsize ulen, guint *out_clen);
+ guint charsize;
+} SubsetEntry;
+
+SubsetEntry subset_table[] = {
+ { MM_MODEM_CHARSET_GSM, gsm_is_subset },
+ { MM_MODEM_CHARSET_IRA, ira_is_subset },
+ { MM_MODEM_CHARSET_UCS2, ucs2_is_subset },
+ { MM_MODEM_CHARSET_8859_1, iso88591_is_subset },
+ { MM_MODEM_CHARSET_PCCP437, pccp437_is_subset },
+ { MM_MODEM_CHARSET_PCDN, pcdn_is_subset },
+ { MM_MODEM_CHARSET_UNKNOWN, NULL },
+};
+
+/**
+ * mm_charset_get_encoded_len:
+ *
+ * @utf8: UTF-8 valid string
+ * @charset: the #MMModemCharset to check the length of @utf8 in
+ * @out_unsupported: on return, number of characters of @utf8 that are not fully
+ * representable in @charset
+ *
+ * Returns: the size in bytes of the string if converted from UTF-8 into @charset.
+ **/
+guint
+mm_charset_get_encoded_len (const char *utf8,
+ MMModemCharset charset,
+ guint *out_unsupported)
+{
+ const char *p = utf8, *next;
+ guint len = 0, unsupported = 0;
+ SubsetEntry *e;
+
+ g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, 0);
+ g_return_val_if_fail (utf8 != NULL, 0);
+
+ if (charset == MM_MODEM_CHARSET_UTF8)
+ return strlen (utf8);
+
+ /* Find the charset in our subset table */
+ for (e = &subset_table[0];
+ e->cs != charset && e->cs != MM_MODEM_CHARSET_UNKNOWN;
+ e++);
+ g_return_val_if_fail (e->cs != MM_MODEM_CHARSET_UNKNOWN, 0);
+
+ while (*p) {
+ gunichar c;
+ const char *end;
+ guint clen = 0;
+
+ c = g_utf8_get_char_validated (p, -1);
+ g_return_val_if_fail (c != (gunichar) -1, 0);
+ end = next = g_utf8_find_next_char (p, NULL);
+ if (end == NULL) {
+ /* Find the end... */
+ end = p;
+ while (*end++);
+ }
+
+ if (!e->func (c, p, (end - p), &clen))
+ unsupported++;
+ len += clen;
+ p = next;
+ }
+
+ if (out_unsupported)
+ *out_unsupported = unsupported;
+ return len;
+}
+
guint8 *
gsm_unpack (const guint8 *gsm,
guint32 num_septets,