1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details:
*
* Copyright (C) 2010 Red Hat, Inc.
*/
#ifndef MM_CHARSETS_H
#define MM_CHARSETS_H
#include <glib.h>
/*****************************************************************************************/
typedef enum {
MM_MODEM_CHARSET_UNKNOWN = 0,
MM_MODEM_CHARSET_GSM = 1 << 0,
MM_MODEM_CHARSET_IRA = 1 << 1,
MM_MODEM_CHARSET_8859_1 = 1 << 2,
MM_MODEM_CHARSET_UTF8 = 1 << 3,
MM_MODEM_CHARSET_UCS2 = 1 << 4,
MM_MODEM_CHARSET_PCCP437 = 1 << 5,
MM_MODEM_CHARSET_PCDN = 1 << 6,
MM_MODEM_CHARSET_UTF16 = 1 << 7,
} MMModemCharset;
const gchar *mm_modem_charset_to_string (MMModemCharset charset);
MMModemCharset mm_modem_charset_from_string (const gchar *string);
/*****************************************************************************************/
/* Checks whether conversion to the given charset may be done without errors */
gboolean mm_charset_can_convert_to (const gchar *utf8,
MMModemCharset charset);
guint8 *mm_charset_gsm_unpack (const guint8 *gsm,
guint32 num_septets,
guint8 start_offset, /* in bits */
guint32 *out_unpacked_len);
guint8 *mm_charset_gsm_pack (const guint8 *src,
guint32 src_len,
guint8 start_offset, /* in bits */
guint32 *out_packed_len);
/*****************************************************************************************/
/*
* Convert the given UTF-8 encoded string into the given charset.
*
* The output is given as a bytearray, because the target charset may allow
* embedded NUL bytes (e.g. UTF-16).
*
* The output encoded string is not guaranteed to be NUL-terminated, instead
* the bytearray length itself gives the correct string length.
*/
GByteArray *mm_modem_charset_bytearray_from_utf8 (const gchar *utf8,
MMModemCharset charset,
gboolean translit,
GError **error);
/*
* Convert the given UTF-8 encoded string into the given charset.
*
* The output is given as a C string, and those charsets that allow
* embedded NUL bytes (e.g. UTF-16) will be hex-encoded.
*
* The output encoded string is guaranteed to be NUL-terminated, and so no
* explicit output length is returned.
*/
gchar *mm_modem_charset_str_from_utf8 (const gchar *utf8,
MMModemCharset charset,
gboolean translit,
GError **error);
/*
* Convert into an UTF-8 encoded string the input byte array, which is
* encoded in the given charset.
*
* The output string is guaranteed to be valid UTF-8 and NUL-terminated.
*/
gchar *mm_modem_charset_bytearray_to_utf8 (GByteArray *bytearray,
MMModemCharset charset,
gboolean translit,
GError **error);
/*
* Convert into an UTF-8 encoded string the input string, which is
* encoded in the given charset. Those charsets that allow embedded NUL
* bytes (e.g. UTF-16) need to be hex-encoded.
*
* If the input string is NUL-terminated, len may be given as -1; otherwise
* len needs to specify the number of valid bytes in the input string.
*
* The output string is guaranteed to be valid UTF-8 and NUL-terminated.
*/
gchar *mm_modem_charset_str_to_utf8 (const gchar *str,
gssize len,
MMModemCharset charset,
gboolean translit,
GError **error);
/*****************************************************************************************/
void mm_modem_charsets_init (void);
/*
* Select appropriate encoding and split an UTF-8 encoded input string
* into N UTF-8 strings, so that each of the strings
* can be encoded into 'charset' and placed in a SMS part.
*/
gchar **mm_charset_util_split_text (const gchar *text,
MMModemCharset *charset,
gpointer log_object);
#endif /* MM_CHARSETS_H */
|