5 files changed, 97 insertions, 56 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 56a745dd..55604182 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -463,43 +463,37 @@ mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len)
 }
 
 static gboolean
-gsm_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+gsm_is_subset (gunichar c, const char *utf8, gsize ulen)
 {
     guint8 gsm;
 
-    *out_clen = 1;
     if (utf8_to_gsm_def_char (utf8, ulen, &gsm))
         return TRUE;
-    if (utf8_to_gsm_ext_char (utf8, ulen, &gsm)) {
-        *out_clen = 2;
+    if (utf8_to_gsm_ext_char (utf8, ulen, &gsm))
         return TRUE;
-    }
     return FALSE;
 }
 
 static gboolean
-ira_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+ira_is_subset (gunichar c, const char *utf8, gsize ulen)
 {
-    *out_clen = 1;
     return (ulen == 1);
 }
 
 static gboolean
-ucs2_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+ucs2_is_subset (gunichar c, const char *utf8, gsize ulen)
 {
-    *out_clen = 2;
     return (c <= 0xFFFF);
 }
 
 static gboolean
-iso88591_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+iso88591_is_subset (gunichar c, const char *utf8, gsize ulen)
 {
-    *out_clen = 1;
     return (c <= 0xFF);
 }
 
 static gboolean
-pccp437_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+pccp437_is_subset (gunichar c, const char *utf8, gsize ulen)
 {
     static const gunichar t[] = {
         0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea,
@@ -520,8 +514,6 @@ pccp437_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
     };
     int i;
 
-    *out_clen = 1;
-
     if (c <= 0x7F)
         return TRUE;
     for (i = 0; i < sizeof (t) / sizeof (t[0]); i++) {
@@ -532,7 +524,7 @@ pccp437_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
 }
 
 static gboolean
-pcdn_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
+pcdn_is_subset (gunichar c, const char *utf8, gsize ulen)
 {
     static const gunichar t[] = {
         0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea,
@@ -553,8 +545,6 @@ pcdn_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
     };
     int i;
 
-    *out_clen = 1;
-
     if (c <= 0x7F)
         return TRUE;
     for (i = 0; i < sizeof (t) / sizeof (t[0]); i++) {
@@ -566,7 +556,7 @@ pcdn_is_subset (gunichar c, const char *utf8, gsize ulen, guint *out_clen)
 
 typedef struct {
     MMModemCharset cs;
-    gboolean (*func) (gunichar c, const char *utf8, gsize ulen, guint *out_clen);
+    gboolean (*func) (gunichar c, const char *utf8, gsize ulen);
     guint charsize;
 } SubsetEntry;
 
@@ -581,40 +571,34 @@ SubsetEntry subset_table[] = {
 };
 
 /**
- * mm_charset_get_encoded_len:
+ * mm_charset_can_covert_to:
+ * @utf8: UTF-8 valid string.
+ * @charset: the #MMModemCharset to validate the conversion from @utf8.
  *
- * @utf8: UTF-8 valid string
- * @charset: the #MMModemCharset to check the length of @utf8 in
- * @out_unsupported: on return, number of characters of @utf8 that are not fully
- * representable in @charset
- *
- * Returns: the size in bytes of the string if converted from UTF-8 into @charset.
- **/
-guint
-mm_charset_get_encoded_len (const char *utf8,
-                            MMModemCharset charset,
-                            guint *out_unsupported)
+ * Returns: %TRUE if the conversion is possible without errors, %FALSE otherwise.
+ */
+gboolean
+mm_charset_can_convert_to (const char *utf8,
+                           MMModemCharset charset)
 {
     const char *p = utf8;
-    guint len = 0, unsupported = 0;
     SubsetEntry *e;
 
-    g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, 0);
-    g_return_val_if_fail (utf8 != NULL, 0);
+    g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, FALSE);
+    g_return_val_if_fail (utf8 != NULL, FALSE);
 
     if (charset == MM_MODEM_CHARSET_UTF8)
-        return strlen (utf8);
+        return TRUE;
 
     /* Find the charset in our subset table */
     for (e = &subset_table[0];
          e->cs != charset && e->cs != MM_MODEM_CHARSET_UNKNOWN;
          e++);
-    g_return_val_if_fail (e->cs != MM_MODEM_CHARSET_UNKNOWN, 0);
+    g_return_val_if_fail (e->cs != MM_MODEM_CHARSET_UNKNOWN, FALSE);
 
     while (*p) {
         gunichar c;
         const char *end;
-        guint clen = 0;
 
         c = g_utf8_get_char_validated (p, -1);
         g_return_val_if_fail (c != (gunichar) -1, 0);
@@ -625,15 +609,13 @@ mm_charset_get_encoded_len (const char *utf8,
             while (*++end);
         }
 
-        if (!e->func (c, p, (end - p), &clen))
-            unsupported++;
-        len += clen;
+        if (!e->func (c, p, (end - p)))
+            return FALSE;
+
         p = end;
     }
 
-    if (out_unsupported)
-        *out_unsupported = unsupported;
-    return len;
+    return TRUE;
 }
 
 guint8 *
diff --git a/src/mm-charsets.h b/src/mm-charsets.h
index c0b309e3..340ae95b 100644
--- a/src/mm-charsets.h
+++ b/src/mm-charsets.h
@@ -57,10 +57,9 @@ guint8 *mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len);
 
 guint8 *mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len);
 
-/* Returns the size in bytes required to hold the UTF-8 string in the given charset */
-guint mm_charset_get_encoded_len (const char *utf8,
-                                  MMModemCharset charset,
-                                  guint *out_unsupported);
+/* Checks whether conversion to the given charset may be done without errors */
+gboolean mm_charset_can_convert_to (const char *utf8,
+                                    MMModemCharset charset);
 
 guint8 *mm_charset_gsm_unpack (const guint8 *gsm,
                                guint32 num_septets,
diff --git a/src/mm-sms-part-3gpp.c b/src/mm-sms-part-3gpp.c
index 8fd255ea..0b59b247 100644
--- a/src/mm-sms-part-3gpp.c
+++ b/src/mm-sms-part-3gpp.c
@@ -1026,7 +1026,6 @@ gchar **
 mm_sms_part_3gpp_util_split_text (const gchar *text,
                                   MMSmsEncoding *encoding)
 {
-    guint gsm_unsupported = 0;
     gchar **out;
     guint n_chunks;
     guint i;
@@ -1058,10 +1057,7 @@ mm_sms_part_3gpp_util_split_text (const gchar *text,
      */
 
     /* Check if we can do GSM encoding */
-    mm_charset_get_encoded_len (text,
-                                MM_MODEM_CHARSET_GSM,
-                                &gsm_unsupported);
-    if (gsm_unsupported > 0) {
+    if (!mm_charset_can_convert_to (text, MM_MODEM_CHARSET_GSM)) {
         /* If cannot do it in GSM encoding, do it in UCS-2 */
         GByteArray *array;
 
diff --git a/src/mm-sms-part-cdma.c b/src/mm-sms-part-cdma.c
index 8d76bcec..167eda83 100644
--- a/src/mm-sms-part-cdma.c
+++ b/src/mm-sms-part-cdma.c
@@ -1365,7 +1365,6 @@ decide_best_encoding (const gchar *text,
                       guint *num_bits_per_field,
                       Encoding *encoding)
 {
-    guint latin_unsupported = 0;
     guint ascii_unsupported = 0;
     guint i;
     guint len;
@@ -1391,10 +1390,7 @@ decide_best_encoding (const gchar *text,
     }
 
     /* Check if we can do Latin encoding */
-    mm_charset_get_encoded_len (text,
-                                MM_MODEM_CHARSET_8859_1,
-                                &latin_unsupported);
-    if (!latin_unsupported) {
+    if (mm_charset_can_convert_to (text, MM_MODEM_CHARSET_8859_1)) {
         *out = g_byte_array_sized_new (len);
         mm_modem_charset_byte_array_append (*out,
                                             text,
diff --git a/src/tests/test-charsets.c b/src/tests/test-charsets.c
index 9ae23faf..e7b47da0 100644
--- a/src/tests/test-charsets.c
+++ b/src/tests/test-charsets.c
@@ -20,6 +20,12 @@
 #include "mm-modem-helpers.h"
 #include "mm-log.h"
 
+#if defined ENABLE_TEST_MESSAGE_TRACES
+#define trace(message, ...) g_print (message, ##__VA_ARGS__)
+#else
+#define trace(...)
+#endif
+
 static void
 test_gsm7_default_chars (void)
 {
@@ -344,6 +350,66 @@ test_take_convert_ucs2_bad_ascii2 (void)
     g_assert (converted == NULL);
 }
 
+struct charset_can_convert_to_test_s {
+    const char *utf8;
+    gboolean    to_gsm;
+    gboolean    to_ira;
+    gboolean    to_8859_1;
+    gboolean    to_ucs2;
+    gboolean    to_pccp437;
+    gboolean    to_pcdn;
+};
+
+static void
+test_charset_can_covert_to (void)
+{
+    static const struct charset_can_convert_to_test_s charset_can_convert_to_test[] = {
+        {
+            .utf8 = "",
+            .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+        },
+        {
+            .utf8 = " ",
+            .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+        },
+        {
+            .utf8 = "some basic ascii",
+            .to_gsm = TRUE, .to_ira = TRUE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+        },
+        {
+            .utf8 = "ホモ・サピエンス 喂人类 katakana, chinese, english: UCS2 takes it all",
+            .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
+        },
+        {
+            .utf8 = "Some from the GSM7 basic set: a % Ψ Ω ñ ö è æ",
+            .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
+        },
+        {
+            .utf8 = "More from the GSM7 extended set: {} [] ~ € |",
+            .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = FALSE, .to_ucs2 = TRUE, .to_pccp437 = FALSE, .to_pcdn = FALSE,
+        },
+        {
+            .utf8 = "patín cannot be encoded in GSM7 or IRA, but is valid UCS2, ISO-8859-1, CP437 and CP850",
+            .to_gsm = FALSE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+        },
+        {
+            .utf8 = "ècole can be encoded in multiple ways, but not in IRA",
+            .to_gsm = TRUE, .to_ira = FALSE, .to_8859_1 = TRUE, .to_ucs2 = TRUE, .to_pccp437 = TRUE, .to_pcdn = TRUE,
+        },
+    };
+    guint i;
+
+    for (i = 0; i < G_N_ELEMENTS (charset_can_convert_to_test); i++) {
+        trace ("testing charset conversion: '%s'\n", charset_can_convert_to_test[i].utf8);
+        g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_GSM)     == charset_can_convert_to_test[i].to_gsm);
+        g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_IRA)     == charset_can_convert_to_test[i].to_ira);
+        g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_8859_1)  == charset_can_convert_to_test[i].to_8859_1);
+        g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_UCS2)    == charset_can_convert_to_test[i].to_ucs2);
+        g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_PCCP437) == charset_can_convert_to_test[i].to_pccp437);
+        g_assert (mm_charset_can_convert_to (charset_can_convert_to_test[i].utf8, MM_MODEM_CHARSET_PCDN)    == charset_can_convert_to_test[i].to_pcdn);
+    }
+}
+
 void
 _mm_log (const char *loc,
          const char *func,
@@ -387,5 +453,7 @@ int main (int argc, char **argv)
     g_test_add_func ("/MM/charsets/take-convert/ucs2/bad-ascii",   test_take_convert_ucs2_bad_ascii);
     g_test_add_func ("/MM/charsets/take-convert/ucs2/bad-ascii-2", test_take_convert_ucs2_bad_ascii2);
 
+    g_test_add_func ("/MM/charsets/can-convert-to", test_charset_can_covert_to);
+
     return g_test_run ();
 }