diff options
author | Aleksander Morgado <aleksander@aleksander.es> | 2020-11-27 00:41:40 +0100 |
---|---|---|
committer | Aleksander Morgado <aleksander@aleksander.es> | 2021-02-23 11:35:11 +0000 |
commit | 9c613d33e1f60501cc8406f6429097d8bda87c59 (patch) | |
tree | e3ff469d0fac1d703dfbe9191d000993b2799140 /src/mm-charsets.h | |
parent | 6f32c8d38f2c7ad269c4ccf84190ad6e917293a9 (diff) |
charsets: new common APIs to convert from/to charsets and UTF-8
Diffstat (limited to 'src/mm-charsets.h')
-rw-r--r-- | src/mm-charsets.h | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/src/mm-charsets.h b/src/mm-charsets.h index b59eeeaa..4d032f38 100644 --- a/src/mm-charsets.h +++ b/src/mm-charsets.h @@ -18,6 +18,8 @@ #include <glib.h> +/*****************************************************************************************/ + typedef enum { MM_MODEM_CHARSET_UNKNOWN = 0, MM_MODEM_CHARSET_GSM = 1 << 0, @@ -33,6 +35,8 @@ typedef enum { const gchar *mm_modem_charset_to_string (MMModemCharset charset); MMModemCharset mm_modem_charset_from_string (const gchar *string); +/*****************************************************************************************/ + /* Append the given string to the given byte array but re-encode it * into the given charset first. The original string is assumed to be * UTF-8 encoded. @@ -81,4 +85,61 @@ gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, gchar *mm_utf8_take_and_convert_to_charset (gchar *str, MMModemCharset charset); +/*****************************************************************************************/ + +/* + * Convert the given UTF-8 encoded string into the given charset. + * + * The output is given as a bytearray, because the target charset may allow + * embedded NUL bytes (e.g. UTF-16). + * + * The output encoded string is not guaranteed to be NUL-terminated, instead + * the bytearray length itself gives the correct string length. + */ +GByteArray *mm_modem_charset_bytearray_from_utf8 (const gchar *utf8, + MMModemCharset charset, + gboolean translit, + GError **error); + +/* + * Convert the given UTF-8 encoded string into the given charset. + * + * The output is given as a C string, and those charsets that allow + * embedded NUL bytes (e.g. UTF-16) will be hex-encoded. + * + * The output encoded string is guaranteed to be NUL-terminated, and so no + * explicit output length is returned. + */ +gchar *mm_modem_charset_str_from_utf8 (const gchar *utf8, + MMModemCharset charset, + gboolean translit, + GError **error); + +/* + * Convert into an UTF-8 encoded string the input byte array, which is + * encoded in the given charset. + * + * The output string is guaranteed to be valid UTF-8 and NUL-terminated. + */ +gchar *mm_modem_charset_bytearray_to_utf8 (GByteArray *bytearray, + MMModemCharset charset, + gboolean translit, + GError **error); + +/* + * Convert into an UTF-8 encoded string the input string, which is + * encoded in the given charset. Those charsets that allow embedded NUL + * bytes (e.g. UTF-16) need to be hex-encoded. + * + * If the input string is NUL-terminated, len may be given as -1; otherwise + * len needs to specify the number of valid bytes in the input string. + * + * The output string is guaranteed to be valid UTF-8 and NUL-terminated. + */ +gchar *mm_modem_charset_str_to_utf8 (const gchar *str, + gssize len, + MMModemCharset charset, + gboolean translit, + GError **error); + #endif /* MM_CHARSETS_H */ |