00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004, 2008 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_utf.h 00019 * @brief UTF-8 conversion routines 00020 */ 00021 00022 00023 00024 #ifndef SVN_UTF_H 00025 #define SVN_UTF_H 00026 00027 #include <apr_xlate.h> 00028 00029 #include "svn_error.h" 00030 #include "svn_string.h" 00031 #include "svn_types.h" 00032 00033 #ifdef __cplusplus 00034 extern "C" { 00035 #endif /* __cplusplus */ 00036 00037 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET 00038 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET 00039 00040 /** 00041 * Initialize the UTF-8 encoding/decoding routines. 00042 * Allocate cached translation handles in a subpool of @a pool. 00043 * 00044 * @note It is optional to call this function, but if it is used, no other 00045 * svn function may be in use in other threads during the call of this 00046 * function or when @a pool is cleared or destroyed. 00047 * Initializing the UTF-8 routines will improve performance. 00048 * 00049 * @since New in 1.1. 00050 */ 00051 void 00052 svn_utf_initialize(apr_pool_t *pool); 00053 00054 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00055 * allocate @a *dest in @a pool. 00056 */ 00057 svn_error_t * 00058 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, 00059 const svn_stringbuf_t *src, 00060 apr_pool_t *pool); 00061 00062 00063 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00064 * @a *dest in @a pool. 00065 */ 00066 svn_error_t * 00067 svn_utf_string_to_utf8(const svn_string_t **dest, 00068 const svn_string_t *src, 00069 apr_pool_t *pool); 00070 00071 00072 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00073 * allocate @a *dest in @a pool. 00074 */ 00075 svn_error_t * 00076 svn_utf_cstring_to_utf8(const char **dest, 00077 const char *src, 00078 apr_pool_t *pool); 00079 00080 00081 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C 00082 * string @a src; allocate @a *dest in @a pool. 00083 * 00084 * @since New in 1.4. 00085 */ 00086 svn_error_t * 00087 svn_utf_cstring_to_utf8_ex2(const char **dest, 00088 const char *src, 00089 const char *frompage, 00090 apr_pool_t *pool); 00091 00092 00093 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is 00094 * ignored. 00095 * 00096 * @deprecated Provided for backward compatibility with the 1.3 API. 00097 */ 00098 SVN_DEPRECATED 00099 svn_error_t * 00100 svn_utf_cstring_to_utf8_ex(const char **dest, 00101 const char *src, 00102 const char *frompage, 00103 const char *convset_key, 00104 apr_pool_t *pool); 00105 00106 00107 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00108 * allocate @a *dest in @a pool. 00109 */ 00110 svn_error_t * 00111 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, 00112 const svn_stringbuf_t *src, 00113 apr_pool_t *pool); 00114 00115 00116 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00117 * allocate @a *dest in @a pool. 00118 */ 00119 svn_error_t * 00120 svn_utf_string_from_utf8(const svn_string_t **dest, 00121 const svn_string_t *src, 00122 apr_pool_t *pool); 00123 00124 00125 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00126 * allocate @a *dest in @a pool. 00127 */ 00128 svn_error_t * 00129 svn_utf_cstring_from_utf8(const char **dest, 00130 const char *src, 00131 apr_pool_t *pool); 00132 00133 00134 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string 00135 * @a src; allocate @a *dest in @a pool. 00136 * 00137 * @since New in 1.4. 00138 */ 00139 svn_error_t * 00140 svn_utf_cstring_from_utf8_ex2(const char **dest, 00141 const char *src, 00142 const char *topage, 00143 apr_pool_t *pool); 00144 00145 00146 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is 00147 * ignored. 00148 * 00149 * @deprecated Provided for backward compatibility with the 1.3 API. 00150 */ 00151 SVN_DEPRECATED 00152 svn_error_t * 00153 svn_utf_cstring_from_utf8_ex(const char **dest, 00154 const char *src, 00155 const char *topage, 00156 const char *convset_key, 00157 apr_pool_t *pool); 00158 00159 00160 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00161 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00162 * characters the same, and substitutes "?\\XXX" for others, where XXX 00163 * is the unsigned decimal code for that character. 00164 * 00165 * This function cannot error; it is guaranteed to return something. 00166 * First it will recode as described above and then attempt to convert 00167 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00168 * will return the raw fuzzily recoded string, which may or may not be 00169 * meaningful in the client's locale, but is (presumably) better than 00170 * nothing. 00171 * 00172 * ### Notes: 00173 * 00174 * Improvement is possible, even imminent. The original problem was 00175 * that if you converted a UTF-8 string (say, a log message) into a 00176 * locale that couldn't represent all the characters, you'd just get a 00177 * static placeholder saying "[unconvertible log message]". Then 00178 * Justin Erenkrantz pointed out how on platforms that didn't support 00179 * conversion at all, "svn log" would still fail completely when it 00180 * encountered unconvertible data. 00181 * 00182 * Now for both cases, the caller can at least fall back on this 00183 * function, which converts the message as best it can, substituting 00184 * "?\\XXX" escape codes for the non-ascii characters. 00185 * 00186 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00187 * so when we can detect that at configure time, things will change. 00188 * Also, this should (?) be moved to apr/apu eventually. 00189 * 00190 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00191 * details. 00192 */ 00193 const char * 00194 svn_utf_cstring_from_utf8_fuzzy(const char *src, 00195 apr_pool_t *pool); 00196 00197 00198 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00199 * allocate @a *dest in @a pool. 00200 */ 00201 svn_error_t * 00202 svn_utf_cstring_from_utf8_stringbuf(const char **dest, 00203 const svn_stringbuf_t *src, 00204 apr_pool_t *pool); 00205 00206 00207 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00208 * allocate @a *dest in @a pool. 00209 */ 00210 svn_error_t * 00211 svn_utf_cstring_from_utf8_string(const char **dest, 00212 const svn_string_t *src, 00213 apr_pool_t *pool); 00214 00215 #ifdef __cplusplus 00216 } 00217 #endif /* __cplusplus */ 00218 00219 #endif /* SVN_UTF_H */
1.3.9.1