diff options
author | Dan Williams <dcbw@redhat.com> | 2010-09-02 19:29:05 -0500 |
---|---|---|
committer | Dan Williams <dcbw@redhat.com> | 2010-09-02 19:29:05 -0500 |
commit | 9e94dd5b6124d00cf10d6296c7c9aa80f8f68d80 (patch) | |
tree | 78be39d22a363099512a36ebf8b71756bd559162 | |
parent | 85ce5446759092968c6540b9d842c5bc777abb74 (diff) |
gsm: add GSM 03.38 encoding/decoding functions and testcases
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | src/mm-charsets.c | 262 | ||||
-rw-r--r-- | src/mm-charsets.h | 6 | ||||
-rw-r--r-- | src/tests/Makefile.am | 13 | ||||
-rw-r--r-- | src/tests/test-charsets.c | 119 |
5 files changed, 389 insertions, 12 deletions
@@ -31,6 +31,7 @@ docs/spec.html callouts/mm-modem-probe test/lsudev src/tests/test-modem-helpers +src/tests/test-charsets policy/org.freedesktop.modem-manager.policy libqcdm/tests/test-qcdm diff --git a/src/mm-charsets.c b/src/mm-charsets.c index e61e56ea..8fea400e 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -109,7 +109,7 @@ charset_iconv_from (MMModemCharset charset) gboolean mm_modem_charset_byte_array_append (GByteArray *array, - const char *string, + const char *utf8, gboolean quoted, MMModemCharset charset) { @@ -119,22 +119,16 @@ mm_modem_charset_byte_array_append (GByteArray *array, gsize written = 0; g_return_val_if_fail (array != NULL, FALSE); - g_return_val_if_fail (string != NULL, FALSE); + g_return_val_if_fail (utf8 != NULL, FALSE); iconv_to = charset_iconv_to (charset); g_return_val_if_fail (iconv_to != NULL, FALSE); - converted = g_convert (string, - g_utf8_strlen (string, -1), - iconv_to, - "UTF-8", - NULL, - &written, - &error); + converted = g_convert (utf8, -1, iconv_to, "UTF-8", NULL, &written, &error); if (!converted) { if (error) { g_warning ("%s: failed to convert '%s' to %s character set: (%d) %s", - __func__, string, iconv_to, + __func__, utf8, iconv_to, error->code, error->message); g_error_free (error); } @@ -183,3 +177,251 @@ mm_modem_charset_hex_to_utf8 (const char *src, MMModemCharset charset) return converted; } + +/* GSM 03.38 encoding conversion stuff */ + +#define GSM_DEF_ALPHABET_SIZE 128 +#define GSM_EXT_ALPHABET_SIZE 10 + +typedef struct GsmUtf8Mapping { + gchar chars[3]; + guint8 len; + guint8 gsm; /* only used for extended GSM charset */ +} GsmUtf8Mapping; + +#define ONE(a) { {a, 0x00, 0x00}, 1, 0 } +#define TWO(a, b) { {a, b, 0x00}, 2, 0 } + +/** + * gsm_def_utf8_alphabet: + * + * Mapping from GSM default alphabet to UTF-8. + * + * ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet. Mapping to UCS-2. + * Mapping according to http://unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + */ +static const GsmUtf8Mapping gsm_def_utf8_alphabet[GSM_DEF_ALPHABET_SIZE] = { + /* @ £ $ ¥ */ + ONE(0x40), TWO(0xc2, 0xa3), ONE(0x24), TWO(0xc2, 0xa5), + /* è é ù ì */ + TWO(0xc3, 0xa8), TWO(0xc3, 0xa9), TWO(0xc3, 0xb9), TWO(0xc3, 0xac), + /* ò Ç \n Ø */ + TWO(0xc3, 0xb2), TWO(0xc3, 0x87), ONE(0x0a), TWO(0xc3, 0x98), + /* ø \r Å å */ + TWO(0xc3, 0xb8), ONE(0x0d), TWO(0xc3, 0x85), TWO(0xc3, 0xa5), + /* Δ _ Φ Γ */ + TWO(0xce, 0x94), ONE(0x5f), TWO(0xce, 0xa6), TWO(0xce, 0x93), + /* Λ Ω Π Ψ */ + TWO(0xce, 0x9b), TWO(0xce, 0xa9), TWO(0xce, 0xa0), TWO(0xce, 0xa8), + /* Σ Θ Ξ Escape Code */ + TWO(0xce, 0xa3), TWO(0xce, 0x98), TWO(0xce, 0x9e), ONE(0xa0), + /* Æ æ ß É */ + TWO(0xc3, 0x86), TWO(0xc3, 0xa6), TWO(0xc3, 0x9f), TWO(0xc3, 0x89), + /* ' ' ! " # */ + ONE(0x20), ONE(0x21), ONE(0x22), ONE(0x23), + /* ¤ % & ' */ + TWO(0xc2, 0xa4), ONE(0x25), ONE(0x26), ONE(0x27), + /* ( ) * + */ + ONE(0x28), ONE(0x29), ONE(0x2a), ONE(0x2b), + /* , - . / */ + ONE(0x2c), ONE(0x2d), ONE(0x2e), ONE(0x2f), + /* 0 1 2 3 */ + ONE(0x30), ONE(0x31), ONE(0x32), ONE(0x33), + /* 4 5 6 7 */ + ONE(0x34), ONE(0x35), ONE(0x36), ONE(0x37), + /* 8 9 : ; */ + ONE(0x38), ONE(0x39), ONE(0x3a), ONE(0x3b), + /* < = > ? */ + ONE(0x3c), ONE(0x3d), ONE(0x3e), ONE(0x3f), + /* ¡ A B C */ + TWO(0xc2, 0xa1), ONE(0x41), ONE(0x42), ONE(0x43), + /* D E F G */ + ONE(0x44), ONE(0x45), ONE(0x46), ONE(0x47), + /* H I J K */ + ONE(0x48), ONE(0x49), ONE(0x4a), ONE(0x4b), + /* L M N O */ + ONE(0x4c), ONE(0x4d), ONE(0x4e), ONE(0x4f), + /* P Q R S */ + ONE(0x50), ONE(0x51), ONE(0x52), ONE(0x53), + /* T U V W */ + ONE(0x54), ONE(0x55), ONE(0x56), ONE(0x57), + /* X Y Z Ä */ + ONE(0x58), ONE(0x59), ONE(0x5a), TWO(0xc3, 0x84), + /* Ö Ñ Ü § */ + TWO(0xc3, 0x96), TWO(0xc3, 0x91), TWO(0xc3, 0x9c), TWO(0xc2, 0xa7), + /* ¿ a b c */ + TWO(0xc2, 0xbf), ONE(0x61), ONE(0x62), ONE(0x63), + /* d e f g */ + ONE(0x64), ONE(0x65), ONE(0x66), ONE(0x67), + /* h i j k */ + ONE(0x68), ONE(0x69), ONE(0x6a), ONE(0x6b), + /* l m n o */ + ONE(0x6c), ONE(0x6d), ONE(0x6e), ONE(0x6f), + /* p q r s */ + ONE(0x70), ONE(0x71), ONE(0x72), ONE(0x73), + /* t u v w */ + ONE(0x74), ONE(0x75), ONE(0x76), ONE(0x77), + /* x y z ä */ + ONE(0x78), ONE(0x79), ONE(0x7a), TWO(0xc3, 0xa4), + /* ö ñ ü à */ + TWO(0xc3, 0xb6), TWO(0xc3, 0xb1), TWO(0xc3, 0xbc), TWO(0xc3, 0xa0) +}; + +static guint8 +gsm_def_char_to_utf8 (const guint8 gsm, guint8 out_utf8[2]) +{ + g_return_val_if_fail (gsm < GSM_DEF_ALPHABET_SIZE, 0); + memcpy (&out_utf8[0], &gsm_def_utf8_alphabet[gsm].chars[0], gsm_def_utf8_alphabet[gsm].len); + return gsm_def_utf8_alphabet[gsm].len; +} + +static gboolean +utf8_to_gsm_def_char (const char *utf8, guint32 len, guint8 *out_gsm) +{ + int i; + + if (len > 0 && len < 4) { + for (i = 0; i < GSM_DEF_ALPHABET_SIZE; i++) { + if (gsm_def_utf8_alphabet[i].len == len) { + if (memcmp (&gsm_def_utf8_alphabet[i].chars[0], utf8, len) == 0) { + *out_gsm = i; + return TRUE; + } + } + } + } + return FALSE; +} + + +#define EONE(a, g) { {a, 0x00, 0x00}, 1, g } +#define ETHR(a, b, c, g) { {a, b, c}, 3, g } + +/** + * gsm_ext_utf8_alphabet: + * + * Mapping from GSM extended alphabet to UTF-8. + * + */ +static const GsmUtf8Mapping gsm_ext_utf8_alphabet[GSM_EXT_ALPHABET_SIZE] = { + /* form feed ^ { } */ + EONE(0x0c, 0x0a), EONE(0x5e, 0x14), EONE(0x7b, 0x28), EONE(0x7d, 0x29), + /* \ [ ~ ] */ + EONE(0x5c, 0x2f), EONE(0x5b, 0x3c), EONE(0x7e, 0x3d), EONE(0x5d, 0x3e), + /* | € */ + EONE(0x7c, 0x40), ETHR(0xe2, 0x82, 0xac, 0x65) +}; + +#define GSM_ESCAPE_CHAR 0x1b + +static guint8 +gsm_ext_char_to_utf8 (const guint8 gsm, guint8 out_utf8[3]) +{ + int i; + + for (i = 0; i < GSM_EXT_ALPHABET_SIZE; i++) { + if (gsm == gsm_ext_utf8_alphabet[i].gsm) { + memcpy (&out_utf8[0], &gsm_ext_utf8_alphabet[i].chars[0], gsm_ext_utf8_alphabet[i].len); + return gsm_ext_utf8_alphabet[i].len; + } + } + return 0; +} + +static gboolean +utf8_to_gsm_ext_char (const char *utf8, guint32 len, guint8 *out_gsm) +{ + int i; + + if (len > 0 && len < 4) { + for (i = 0; i < GSM_EXT_ALPHABET_SIZE; i++) { + if (gsm_ext_utf8_alphabet[i].len == len) { + if (memcmp (&gsm_ext_utf8_alphabet[i].chars[0], utf8, len) == 0) { + *out_gsm = gsm_ext_utf8_alphabet[i].gsm; + return TRUE; + } + } + } + } + return FALSE; +} + +guint8 * +mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len) +{ + int i; + GByteArray *utf8; + + g_return_val_if_fail (gsm != NULL, NULL); + g_return_val_if_fail (len < 4096, NULL); + + /* worst case initial length */ + utf8 = g_byte_array_sized_new (len * 2 + 1); + + for (i = 0; i < len; i++) { + guint8 uchars[4]; + guint8 ulen; + + if (gsm[i] == GSM_ESCAPE_CHAR) { + /* Extended alphabet, decode next char */ + ulen = gsm_ext_char_to_utf8 (gsm[i+1], uchars); + if (ulen) + i += 1; + } else { + /* Default alphabet */ + ulen = gsm_def_char_to_utf8 (gsm[i], uchars); + } + + if (ulen) + g_byte_array_append (utf8, &uchars[0], ulen); + else + g_byte_array_append (utf8, (guint8 *) "?", 1); + } + + g_byte_array_append (utf8, (guint8 *) "\0", 1); /* NULL terminator */ + return g_byte_array_free (utf8, FALSE); +} + +guint8 * +mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len) +{ + GByteArray *gsm; + const char *c = utf8, *next = c; + static const guint8 gesc = GSM_ESCAPE_CHAR; + int i = 0; + + g_return_val_if_fail (utf8 != NULL, NULL); + g_return_val_if_fail (out_len != NULL, NULL); + g_return_val_if_fail (g_utf8_validate (utf8, -1, NULL), NULL); + + /* worst case initial length */ + gsm = g_byte_array_sized_new (g_utf8_strlen (utf8, -1) * 2 + 1); + + if (*utf8 == 0x00) { + /* Zero-length string */ + g_byte_array_append (gsm, (guint8 *) "\0", 1); + *out_len = 0; + return g_byte_array_free (gsm, FALSE); + } + + while (next && *next) { + guint8 gch = 0x3f; /* 0x3f == '?' */ + + next = g_utf8_next_char (c); + + /* Try escaped chars first, then default alphabet */ + if (utf8_to_gsm_ext_char (c, next - c, &gch)) { + /* Add the escape char */ + g_byte_array_append (gsm, &gesc, 1); + g_byte_array_append (gsm, &gch, 1); + } else if (utf8_to_gsm_def_char (c, next - c, &gch)) + g_byte_array_append (gsm, &gch, 1); + + c = next; + i++; + } + + *out_len = gsm->len; + return g_byte_array_free (gsm, FALSE); +} + diff --git a/src/mm-charsets.h b/src/mm-charsets.h index 5fa34065..ff39400e 100644 --- a/src/mm-charsets.h +++ b/src/mm-charsets.h @@ -39,7 +39,7 @@ MMModemCharset mm_modem_charset_from_string (const char *string); * UTF-8 encoded. */ gboolean mm_modem_charset_byte_array_append (GByteArray *array, - const char *string, + const char *utf8, gboolean quoted, MMModemCharset charset); @@ -48,5 +48,9 @@ gboolean mm_modem_charset_byte_array_append (GByteArray *array, */ char *mm_modem_charset_hex_to_utf8 (const char *src, MMModemCharset charset); +guint8 *mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len); + +guint8 *mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len); + #endif /* MM_CHARSETS_H */ diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index 74255dbc..77bd4775 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -1,7 +1,7 @@ INCLUDES = \ -I$(top_srcdir)/src -noinst_PROGRAMS = test-modem-helpers +noinst_PROGRAMS = test-modem-helpers test-charsets test_modem_helpers_SOURCES = \ test-modem-helpers.c @@ -13,10 +13,21 @@ test_modem_helpers_LDADD = \ $(top_builddir)/src/libmodem-helpers.la \ $(MM_LIBS) +test_charsets_SOURCES = \ + test-charsets.c + +test_charsets_CPPFLAGS = \ + $(MM_CFLAGS) + +test_charsets_LDADD = \ + $(top_builddir)/src/libmodem-helpers.la \ + $(MM_LIBS) + if WITH_TESTS check-local: test-modem-helpers $(abs_builddir)/test-modem-helpers + $(abs_builddir)/test-charsets endif diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c new file mode 100644 index 00000000..80518dc5 --- /dev/null +++ b/src/tests/test-charsets.c @@ -0,0 +1,119 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details: + * + * Copyright (C) 2010 Red Hat, Inc. + */ + +#include <glib.h> +#include <string.h> + +#include "mm-modem-helpers.h" + +static void +test_def_chars (void *f, gpointer d) +{ + /* Test that a string with all the characters in the GSM 03.38 charset + * are converted from UTF-8 to GSM and back to UTF-8 successfully. + */ + static const char *s = "@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà"; + guint8 *gsm, *utf8; + guint32 len = 0; + + /* Convert to GSM */ + gsm = mm_charset_utf8_to_unpacked_gsm (s, &len); + g_assert (gsm); + g_assert_cmpint (len, ==, 127); + + /* And back to UTF-8 */ + utf8 = mm_charset_gsm_unpacked_to_utf8 (gsm, len); + g_assert (utf8); + g_assert_cmpstr (s, ==, (const char *) utf8); + + g_free (gsm); + g_free (utf8); +} + +static void +test_esc_chars (void *f, gpointer d) +{ + /* Test that a string with all the characters in the extended GSM 03.38 + * charset are converted from UTF-8 to GSM and back to UTF-8 successfully. + */ + static const char *s = "\f^{}\\[~]|€"; + guint8 *gsm, *utf8; + guint32 len = 0; + + /* Convert to GSM */ + gsm = mm_charset_utf8_to_unpacked_gsm (s, &len); + g_assert (gsm); + g_assert_cmpint (len, ==, 20); + + /* And back to UTF-8 */ + utf8 = mm_charset_gsm_unpacked_to_utf8 (gsm, len); + g_assert (utf8); + g_assert_cmpstr (s, ==, (const char *) utf8); + + g_free (gsm); + g_free (utf8); +} + +static void +test_mixed_chars (void *f, gpointer d) +{ + /* Test that a string with a mix of GSM 03.38 default and extended characters + * is converted from UTF-8 to GSM and back to UTF-8 successfully. + */ + static const char *s = "@£$¥èéùìø\fΩΠΨΣΘ{ΞÆæß(})789\\:;<=>[?¡QRS]TUÖ|Ñܧ¿abpqrstuvöñüà€"; + guint8 *gsm, *utf8; + guint32 len = 0; + + /* Convert to GSM */ + gsm = mm_charset_utf8_to_unpacked_gsm (s, &len); + g_assert (gsm); + g_assert_cmpint (len, ==, 69); + + /* And back to UTF-8 */ + utf8 = mm_charset_gsm_unpacked_to_utf8 (gsm, len); + g_assert (utf8); + g_assert_cmpstr (s, ==, (const char *) utf8); + + g_free (gsm); + g_free (utf8); +} + + +#if GLIB_CHECK_VERSION(2,25,12) +typedef GTestFixtureFunc TCFunc; +#else +typedef void (*TCFunc)(void); +#endif + +#define TESTCASE(t, d) g_test_create_case (#t, 0, d, NULL, (TCFunc) t, NULL) + +int main (int argc, char **argv) +{ + GTestSuite *suite; + gint result; + + g_test_init (&argc, &argv, NULL); + + suite = g_test_get_root (); + + g_test_suite_add (suite, TESTCASE (test_def_chars, NULL)); + g_test_suite_add (suite, TESTCASE (test_esc_chars, NULL)); + g_test_suite_add (suite, TESTCASE (test_mixed_chars, NULL)); + + result = g_test_run (); + + return result; +} + |