diff options
author | Aleksander Morgado <aleksander@lanedo.com> | 2012-09-07 17:00:03 +0200 |
---|---|---|
committer | Aleksander Morgado <aleksander@lanedo.com> | 2012-09-14 07:05:22 +0200 |
commit | e7b094ea3c67209f305b3f8b9cb9eb8ed89d52b5 (patch) | |
tree | 8527127cb82f0212f8ed44fb1d0f3bf5c7dcfe7c /src/mm-sms-part.c | |
parent | 6b575cece08f026710d4421c11dfba8051036dc4 (diff) |
sms-part: new util to split input text string into chunks to fit in PDUs
Diffstat (limited to 'src/mm-sms-part.c')
-rw-r--r-- | src/mm-sms-part.c | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/src/mm-sms-part.c b/src/mm-sms-part.c index f37434d7..f8493bb5 100644 --- a/src/mm-sms-part.c +++ b/src/mm-sms-part.c @@ -855,3 +855,110 @@ error: g_free (pdu); return NULL; } + +gchar ** +mm_sms_part_util_split_text (const gchar *text, + MMSmsEncoding *encoding) +{ + guint gsm_unsupported = 0; + gchar **out; + guint n_chunks; + guint i; + guint j; + gsize in_len; + + if (!text) + return NULL; + + in_len = strlen (text); + + /* Some info about the rules for splitting. + * + * The User Data can be up to 140 bytes in the SMS part: + * 0) If we only need one chunk, it can be of up to 140 bytes. + * If we need more than one chunk, these have to be of 140 - 6 = 134 + * bytes each, as we need place for the UDH header. + * 1) If we're using GSM7 encoding, this gives us up to 160 characters, + * as we can pack 160 characters of 7bits each into 140 bytes. + * 160 * 7 = 140 * 8 = 1120. + * If we only have 134 bytes allowed, that would mean that we can pack + * up to 153 input characters: + * 134 * 8 = 1072; 1072/7=153.14 + * 2) If we're using UCS2 encoding, we can pack up to 70 characters in + * 140 bytes (each with 2 bytes), or up to 67 characters in 134 bytes. + * + * This method does the split of the input string into N strings, so that + * each of the strings can be placed in a SMS part. + */ + + /* Check if we can do GSM encoding */ + mm_charset_get_encoded_len (text, + MM_MODEM_CHARSET_GSM, + &gsm_unsupported); + if (gsm_unsupported > 0) { + /* If cannot do it in GSM encoding, do it in UCS-2 */ + GByteArray *array; + + *encoding = MM_SMS_ENCODING_UCS2; + + /* Guess more or less the size of the output array to avoid multiple + * allocations */ + array = g_byte_array_sized_new (in_len * 2); + if (!mm_modem_charset_byte_array_append (array, + text, + FALSE, + MM_MODEM_CHARSET_UCS2)) { + g_byte_array_unref (array); + return NULL; + } + + /* Our bytearray has it in UCS-2 now. + * UCS-2 is a fixed-size encoding, which means that the text has exactly + * 2 bytes for each unicode point. We can now split this array into + * chunks of 67 UCS-2 characters (134 bytes). + * + * Note that UCS-2 covers unicode points between U+0000 and U+FFFF, which + * means that there is no direct relationship between the size of the + * input text in UTF-8 and the size of the text in UCS-2. A 3-byte UTF-8 + * encoded character will still be represented with 2 bytes in UCS-2. + */ + if (array->len <= 140) { + out = g_new (gchar *, 2); + out[0] = g_strdup (text); + out[1] = NULL; + } else { + n_chunks = array->len / 134; + if (array->len % 134 != 0) + n_chunks++; + + out = g_new0 (gchar *, n_chunks + 1); + for (i = 0, j = 0; i < n_chunks; i++, j += 134) { + out[i] = sms_decode_text (&array->data[j], + MIN (array->len - j, 134), + MM_SMS_ENCODING_UCS2, + 0); + } + } + g_byte_array_unref (array); + } else { + /* Do it with GSM encoding */ + *encoding = MM_SMS_ENCODING_GSM7; + + if (in_len <= 160) { + out = g_new (gchar *, 2); + out[0] = g_strdup (text); + out[1] = NULL; + } else { + n_chunks = in_len / 153; + if (in_len % 153 != 0) + n_chunks++; + + out = g_new0 (gchar *, n_chunks + 1); + for (i = 0, j = 0; i < n_chunks; i++, j += 153) { + out[i] = g_strndup (&text[j], 153); + } + } + } + + return out; +} |