From bc449cbe87ccebccbe35f926e88a2dd110832ddf Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Thu, 26 Nov 2020 23:22:57 +0100 Subject: charsets: make translit optional in utf8_to_unpacked_gsm() If the conversion is not fully compatible, the user of the method needs to request transliteration enabled explicitly in order to avoid returning errors in this method. --- src/mm-broadband-modem-mbim.c | 5 ++--- src/mm-charsets.c | 39 ++++++++++++++++++++++++++------------- src/mm-charsets.h | 4 +++- src/mm-sms-part-3gpp.c | 7 +++++-- src/tests/test-charsets.c | 3 ++- 5 files changed, 38 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/mm-broadband-modem-mbim.c b/src/mm-broadband-modem-mbim.c index c073bb29..ba37cdb7 100644 --- a/src/mm-broadband-modem-mbim.c +++ b/src/mm-broadband-modem-mbim.c @@ -4777,10 +4777,9 @@ ussd_encode (const gchar *command, guint32 packed_len = 0; *scheme = MM_MODEM_GSM_USSD_SCHEME_7BIT; - gsm = mm_charset_utf8_to_unpacked_gsm (command, &len); + gsm = mm_charset_utf8_to_unpacked_gsm (command, FALSE, &len, error); if (!gsm) { - g_set_error (error, MM_CORE_ERROR, MM_CORE_ERROR_UNSUPPORTED, - "Failed to encode USSD command in GSM7 charset"); + g_prefix_error (error, "Failed to encode USSD command in GSM7 charset: "); return NULL; } packed = mm_charset_gsm_pack (gsm, len, 0, &packed_len); diff --git a/src/mm-charsets.c b/src/mm-charsets.c index 3a8ea719..4b571c49 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -427,16 +427,21 @@ mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, } guint8 * -mm_charset_utf8_to_unpacked_gsm (const gchar *utf8, - guint32 *out_len) +mm_charset_utf8_to_unpacked_gsm (const gchar *utf8, + gboolean translit, + guint32 *out_len, + GError **error) { - GByteArray *gsm; - const gchar *c; - const gchar *next; - static const guint8 gesc = GSM_ESCAPE_CHAR; - - g_return_val_if_fail (utf8 != NULL, NULL); - g_return_val_if_fail (g_utf8_validate (utf8, -1, NULL), NULL); + g_autoptr(GByteArray) gsm = NULL; + const gchar *c; + const gchar *next; + static const guint8 gesc = GSM_ESCAPE_CHAR; + + if (!utf8 || !g_utf8_validate (utf8, -1, NULL)) { + g_set_error (error, MM_CORE_ERROR, MM_CORE_ERROR_INVALID_ARGS, + "Couldn't convert UTF-8 to GSM: input UTF-8 validation failed"); + return NULL; + } /* worst case initial length */ gsm = g_byte_array_sized_new (g_utf8_strlen (utf8, -1) * 2 + 1); @@ -446,7 +451,7 @@ mm_charset_utf8_to_unpacked_gsm (const gchar *utf8, g_byte_array_append (gsm, (guint8 *) "\0", 1); if (out_len) *out_len = 0; - return g_byte_array_free (gsm, FALSE); + return g_byte_array_free (g_steal_pointer (&gsm), FALSE); } next = utf8; @@ -461,8 +466,16 @@ mm_charset_utf8_to_unpacked_gsm (const gchar *utf8, /* Add the escape char */ g_byte_array_append (gsm, &gesc, 1); g_byte_array_append (gsm, &gch, 1); - } else if (utf8_to_gsm_def_char (c, next - c, &gch)) + } else if (utf8_to_gsm_def_char (c, next - c, &gch)) { + g_byte_array_append (gsm, &gch, 1); + } else if (translit) { + /* add ? */ g_byte_array_append (gsm, &gch, 1); + } else { + g_set_error (error, MM_CORE_ERROR, MM_CORE_ERROR_INVALID_ARGS, + "Couldn't convert UTF-8 char to GSM"); + return NULL; + } c = next; } @@ -473,7 +486,7 @@ mm_charset_utf8_to_unpacked_gsm (const gchar *utf8, /* Always make sure returned string is NUL terminated */ g_byte_array_append (gsm, (guint8 *) "\0", 1); - return g_byte_array_free (gsm, FALSE); + return g_byte_array_free (g_steal_pointer (&gsm), FALSE); } static gboolean @@ -872,7 +885,7 @@ mm_utf8_take_and_convert_to_charset (gchar *str, break; case MM_MODEM_CHARSET_GSM: - encoded = (gchar *) mm_charset_utf8_to_unpacked_gsm (str, NULL); + encoded = (gchar *) mm_charset_utf8_to_unpacked_gsm (str, FALSE, NULL, NULL); g_free (str); break; diff --git a/src/mm-charsets.h b/src/mm-charsets.h index dc8613a5..b59eeeaa 100644 --- a/src/mm-charsets.h +++ b/src/mm-charsets.h @@ -54,7 +54,9 @@ gchar *mm_modem_charset_hex_to_utf8 (const gchar *src, MMModemCharset charset); guint8 *mm_charset_utf8_to_unpacked_gsm (const gchar *utf8, - guint32 *out_len); + gboolean translit, + guint32 *out_len, + GError **error); guint8 *mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len, gboolean translit, diff --git a/src/mm-sms-part-3gpp.c b/src/mm-sms-part-3gpp.c index e7735e1d..7547d029 100644 --- a/src/mm-sms-part-3gpp.c +++ b/src/mm-sms-part-3gpp.c @@ -987,8 +987,11 @@ mm_sms_part_3gpp_get_submit_pdu (MMSmsPart *part, guint8 *unpacked, *packed; guint32 unlen = 0, packlen = 0; - unpacked = mm_charset_utf8_to_unpacked_gsm (mm_sms_part_get_text (part), &unlen); - if (!unpacked || unlen == 0) { + unpacked = mm_charset_utf8_to_unpacked_gsm (mm_sms_part_get_text (part), FALSE, &unlen, error); + if (!unpacked) + goto error; + + if (unlen == 0) { g_free (unpacked); g_set_error_literal (error, MM_MESSAGE_ERROR, diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c index b18c11b1..c2e08591 100644 --- a/src/tests/test-charsets.c +++ b/src/tests/test-charsets.c @@ -33,8 +33,9 @@ common_test_gsm7 (const gchar *in_utf8) g_autoptr(GError) error = NULL; /* Convert to GSM */ - unpacked_gsm = mm_charset_utf8_to_unpacked_gsm (in_utf8, &unpacked_gsm_len); + unpacked_gsm = mm_charset_utf8_to_unpacked_gsm (in_utf8, FALSE, &unpacked_gsm_len, &error); g_assert_nonnull (unpacked_gsm); + g_assert_no_error (error); g_assert_cmpuint (unpacked_gsm_len, >, 0); /* Pack */ -- cgit v1.2.3-70-g09d2