diff options
Diffstat (limited to 'src/mm-charsets.c')
-rw-r--r-- | src/mm-charsets.c | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/src/mm-charsets.c b/src/mm-charsets.c index 023dcf82..31c4a85e 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -424,6 +424,28 @@ mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len) guint8 uchars[4]; guint8 ulen; + /* + * 0x00 is NULL (when followed only by 0x00 up to the + * end of (fixed byte length) message, possibly also up to + * FORM FEED. But 0x00 is also the code for COMMERCIAL AT + * when some other character (CARRIAGE RETURN if nothing else) + * comes after the 0x00. + * http://unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * So, if we find a '@' (0x00) and all the next chars after that + * are also 0x00, we can consider the string finished already. + */ + if (gsm[i] == 0x00) { + gsize j; + + for (j = i + 1; j < len; j++) { + if (gsm[j] != 0x00) + break; + } + if (j == len) + break; + } + if (gsm[i] == GSM_ESCAPE_CHAR) { /* Extended alphabet, decode next char */ ulen = gsm_ext_char_to_utf8 (gsm[i+1], uchars); |