aboutsummaryrefslogtreecommitdiff
path: root/src/mm-charsets.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mm-charsets.c')
-rw-r--r--src/mm-charsets.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c
index 023dcf82..31c4a85e 100644
--- a/src/mm-charsets.c
+++ b/src/mm-charsets.c
@@ -424,6 +424,28 @@ mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len)
guint8 uchars[4];
guint8 ulen;
+ /*
+ * 0x00 is NULL (when followed only by 0x00 up to the
+ * end of (fixed byte length) message, possibly also up to
+ * FORM FEED. But 0x00 is also the code for COMMERCIAL AT
+ * when some other character (CARRIAGE RETURN if nothing else)
+ * comes after the 0x00.
+ * http://unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
+ *
+ * So, if we find a '@' (0x00) and all the next chars after that
+ * are also 0x00, we can consider the string finished already.
+ */
+ if (gsm[i] == 0x00) {
+ gsize j;
+
+ for (j = i + 1; j < len; j++) {
+ if (gsm[j] != 0x00)
+ break;
+ }
+ if (j == len)
+ break;
+ }
+
if (gsm[i] == GSM_ESCAPE_CHAR) {
/* Extended alphabet, decode next char */
ulen = gsm_ext_char_to_utf8 (gsm[i+1], uchars);