Subversion
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
svn_utf.h
Go to the documentation of this file.
1 /**
2  * @copyright
3  * ====================================================================
4  * Licensed to the Apache Software Foundation (ASF) under one
5  * or more contributor license agreements. See the NOTICE file
6  * distributed with this work for additional information
7  * regarding copyright ownership. The ASF licenses this file
8  * to you under the Apache License, Version 2.0 (the
9  * "License"); you may not use this file except in compliance
10  * with the License. You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing,
15  * software distributed under the License is distributed on an
16  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  * KIND, either express or implied. See the License for the
18  * specific language governing permissions and limitations
19  * under the License.
20  * ====================================================================
21  * @endcopyright
22  *
23  * @file svn_utf.h
24  * @brief UTF-8 conversion routines
25  *
26  * Whenever a conversion routine cannot convert to or from UTF-8, the
27  * error returned has code @c APR_EINVAL.
28  */
29 
30 
31 
32 #ifndef SVN_UTF_H
33 #define SVN_UTF_H
34 
35 #include <apr_pools.h>
36 #include <apr_xlate.h> /* for APR_*_CHARSET */
37 
38 #include "svn_types.h"
39 #include "svn_string.h"
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif /* __cplusplus */
44 
45 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
46 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
47 
48 /**
49  * Initialize the UTF-8 encoding/decoding routines.
50  * Allocate cached translation handles in a subpool of @a pool.
51  *
52  * @note It is optional to call this function, but if it is used, no other
53  * svn function may be in use in other threads during the call of this
54  * function or when @a pool is cleared or destroyed.
55  * Initializing the UTF-8 routines will improve performance.
56  *
57  * @since New in 1.1.
58  */
59 void
60 svn_utf_initialize(apr_pool_t *pool);
61 
62 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
63  * allocate @a *dest in @a pool.
64  */
67  const svn_stringbuf_t *src,
68  apr_pool_t *pool);
69 
70 
71 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
72  * @a *dest in @a pool.
73  */
76  const svn_string_t *src,
77  apr_pool_t *pool);
78 
79 
80 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
81  * allocate @a *dest in @a pool.
82  */
84 svn_utf_cstring_to_utf8(const char **dest,
85  const char *src,
86  apr_pool_t *pool);
87 
88 
89 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
90  * string @a src; allocate @a *dest in @a pool.
91  *
92  * @since New in 1.4.
93  */
95 svn_utf_cstring_to_utf8_ex2(const char **dest,
96  const char *src,
97  const char *frompage,
98  apr_pool_t *pool);
99 
100 
101 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
102  * ignored.
103  *
104  * @deprecated Provided for backward compatibility with the 1.3 API.
105  */
107 svn_error_t *
108 svn_utf_cstring_to_utf8_ex(const char **dest,
109  const char *src,
110  const char *frompage,
111  const char *convset_key,
112  apr_pool_t *pool);
113 
114 
115 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
116  * allocate @a *dest in @a pool.
117  */
118 svn_error_t *
120  const svn_stringbuf_t *src,
121  apr_pool_t *pool);
122 
123 
124 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
125  * allocate @a *dest in @a pool.
126  */
127 svn_error_t *
129  const svn_string_t *src,
130  apr_pool_t *pool);
131 
132 
133 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
134  * allocate @a *dest in @a pool.
135  */
136 svn_error_t *
137 svn_utf_cstring_from_utf8(const char **dest,
138  const char *src,
139  apr_pool_t *pool);
140 
141 
142 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
143  * @a src; allocate @a *dest in @a pool.
144  *
145  * @since New in 1.4.
146  */
147 svn_error_t *
148 svn_utf_cstring_from_utf8_ex2(const char **dest,
149  const char *src,
150  const char *topage,
151  apr_pool_t *pool);
152 
153 
154 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
155  * ignored.
156  *
157  * @deprecated Provided for backward compatibility with the 1.3 API.
158  */
160 svn_error_t *
161 svn_utf_cstring_from_utf8_ex(const char **dest,
162  const char *src,
163  const char *topage,
164  const char *convset_key,
165  apr_pool_t *pool);
166 
167 
168 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
169  * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii
170  * characters the same, and substitutes "?\\XXX" for others, where XXX
171  * is the unsigned decimal code for that character.
172  *
173  * This function cannot error; it is guaranteed to return something.
174  * First it will recode as described above and then attempt to convert
175  * the (new) 7-bit UTF-8 string to native encoding. If that fails, it
176  * will return the raw fuzzily recoded string, which may or may not be
177  * meaningful in the client's locale, but is (presumably) better than
178  * nothing.
179  *
180  * ### Notes:
181  *
182  * Improvement is possible, even imminent. The original problem was
183  * that if you converted a UTF-8 string (say, a log message) into a
184  * locale that couldn't represent all the characters, you'd just get a
185  * static placeholder saying "[unconvertible log message]". Then
186  * Justin Erenkrantz pointed out how on platforms that didn't support
187  * conversion at all, "svn log" would still fail completely when it
188  * encountered unconvertible data.
189  *
190  * Now for both cases, the caller can at least fall back on this
191  * function, which converts the message as best it can, substituting
192  * "?\\XXX" escape codes for the non-ascii characters.
193  *
194  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
195  * so when we can detect that at configure time, things will change.
196  * Also, this should (?) be moved to apr/apu eventually.
197  *
198  * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
199  * details.
200  */
201 const char *
202 svn_utf_cstring_from_utf8_fuzzy(const char *src,
203  apr_pool_t *pool);
204 
205 
206 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
207  * allocate @a *dest in @a pool.
208  */
209 svn_error_t *
210 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
211  const svn_stringbuf_t *src,
212  apr_pool_t *pool);
213 
214 
215 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
216  * allocate @a *dest in @a pool.
217  */
218 svn_error_t *
219 svn_utf_cstring_from_utf8_string(const char **dest,
220  const svn_string_t *src,
221  apr_pool_t *pool);
222 
223 #ifdef __cplusplus
224 }
225 #endif /* __cplusplus */
226 
227 #endif /* SVN_UTF_H */
Counted-length strings for Subversion, plus some C string goodies.
void svn_utf_initialize(apr_pool_t *pool)
Initialize the UTF-8 encoding/decoding routines.
svn_error_t * svn_utf_cstring_to_utf8_ex(const char **dest, const char *src, const char *frompage, const char *convset_key, apr_pool_t *pool)
Like svn_utf_cstring_to_utf8_ex2() but with convset_key which is ignored.
svn_error_t * svn_utf_cstring_from_utf8_stringbuf(const char **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 stringbuf src; allocate *dest in pool...
svn_error_t * svn_utf_cstring_to_utf8(const char **dest, const char *src, apr_pool_t *pool)
Set *dest to a utf8-encoded C string from native C string src; allocate *dest in pool.
A simple counted string.
Definition: svn_string.h:96
svn_error_t * svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded stringbuf from utf8 stringbuf src; allocate *dest in pool...
svn_error_t * svn_utf_cstring_from_utf8(const char **dest, const char *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 C string src; allocate *dest in pool...
Subversion error object.
Definition: svn_types.h:90
svn_error_t * svn_utf_cstring_from_utf8_string(const char **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 string src; allocate *dest in pool.
svn_error_t * svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a utf8-encoded stringbuf from native stringbuf src; allocate *dest in pool...
svn_error_t * svn_utf_cstring_from_utf8_ex(const char **dest, const char *src, const char *topage, const char *convset_key, apr_pool_t *pool)
Like svn_utf_cstring_from_utf8_ex2() but with convset_key which is ignored.
svn_error_t * svn_utf_cstring_from_utf8_ex2(const char **dest, const char *src, const char *topage, apr_pool_t *pool)
Set *dest to a topage encoded C string from utf8 encoded C string src; allocate *dest in pool...
svn_error_t * svn_utf_string_to_utf8(const svn_string_t **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a utf8-encoded string from native string src; allocate *dest in pool.
Subversion's data types.
#define SVN_DEPRECATED
Macro used to mark deprecated functions.
Definition: svn_types.h:58
svn_error_t * svn_utf_string_from_utf8(const svn_string_t **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded string from utf8 string src; allocate *dest in pool.
const char * svn_utf_cstring_from_utf8_fuzzy(const char *src, apr_pool_t *pool)
Return a fuzzily native-encoded C string from utf8 C string src, allocated in pool.
svn_error_t * svn_utf_cstring_to_utf8_ex2(const char **dest, const char *src, const char *frompage, apr_pool_t *pool)
Set *dest to a utf8 encoded C string from frompage encoded C string src; allocate *dest in pool...
A buffered string, capable of appending without an allocation and copy for each append.
Definition: svn_string.h:104