svn_utf.h

Go to the documentation of this file.
00001 /**
00002  * @copyright
00003  * ====================================================================
00004  * Copyright (c) 2000-2004, 2008 CollabNet.  All rights reserved.
00005  *
00006  * This software is licensed as described in the file COPYING, which
00007  * you should have received as part of this distribution.  The terms
00008  * are also available at http://subversion.tigris.org/license-1.html.
00009  * If newer versions of this license are posted there, you may use a
00010  * newer version instead, at your option.
00011  *
00012  * This software consists of voluntary contributions made by many
00013  * individuals.  For exact contribution history, see the revision
00014  * history and logs, available at http://subversion.tigris.org/.
00015  * ====================================================================
00016  * @endcopyright
00017  *
00018  * @file svn_utf.h
00019  * @brief UTF-8 conversion routines
00020  */
00021 
00022 
00023 
00024 #ifndef SVN_UTF_H
00025 #define SVN_UTF_H
00026 
00027 #include <apr_xlate.h>
00028 
00029 #include "svn_error.h"
00030 #include "svn_string.h"
00031 #include "svn_types.h"
00032 
00033 #ifdef __cplusplus
00034 extern "C" {
00035 #endif /* __cplusplus */
00036 
00037 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
00038 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
00039 
00040 /**
00041  * Initialize the UTF-8 encoding/decoding routines.
00042  * Allocate cached translation handles in a subpool of @a pool.
00043  *
00044  * @note It is optional to call this function, but if it is used, no other
00045  * svn function may be in use in other threads during the call of this
00046  * function or when @a pool is cleared or destroyed.
00047  * Initializing the UTF-8 routines will improve performance.
00048  *
00049  * @since New in 1.1.
00050  */
00051 void
00052 svn_utf_initialize(apr_pool_t *pool);
00053 
00054 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
00055  * allocate @a *dest in @a pool.
00056  */
00057 svn_error_t *
00058 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
00059                           const svn_stringbuf_t *src,
00060                           apr_pool_t *pool);
00061 
00062 
00063 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
00064  * @a *dest in @a pool.
00065  */
00066 svn_error_t *
00067 svn_utf_string_to_utf8(const svn_string_t **dest,
00068                        const svn_string_t *src,
00069                        apr_pool_t *pool);
00070 
00071 
00072 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
00073  * allocate @a *dest in @a pool.
00074  */
00075 svn_error_t *
00076 svn_utf_cstring_to_utf8(const char **dest,
00077                         const char *src,
00078                         apr_pool_t *pool);
00079 
00080 
00081 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
00082  * string @a src; allocate @a *dest in @a pool.
00083  *
00084  * @since New in 1.4.
00085  */
00086 svn_error_t *
00087 svn_utf_cstring_to_utf8_ex2(const char **dest,
00088                             const char *src,
00089                             const char *frompage,
00090                             apr_pool_t *pool);
00091 
00092 
00093 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
00094  * ignored.
00095  *
00096  * @deprecated Provided for backward compatibility with the 1.3 API.
00097  */
00098 SVN_DEPRECATED
00099 svn_error_t *
00100 svn_utf_cstring_to_utf8_ex(const char **dest,
00101                            const char *src,
00102                            const char *frompage,
00103                            const char *convset_key,
00104                            apr_pool_t *pool);
00105 
00106 
00107 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
00108  * allocate @a *dest in @a pool.
00109  */
00110 svn_error_t *
00111 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
00112                             const svn_stringbuf_t *src,
00113                             apr_pool_t *pool);
00114 
00115 
00116 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
00117  * allocate @a *dest in @a pool.
00118  */
00119 svn_error_t *
00120 svn_utf_string_from_utf8(const svn_string_t **dest,
00121                          const svn_string_t *src,
00122                          apr_pool_t *pool);
00123 
00124 
00125 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
00126  * allocate @a *dest in @a pool.
00127  */
00128 svn_error_t *
00129 svn_utf_cstring_from_utf8(const char **dest,
00130                           const char *src,
00131                           apr_pool_t *pool);
00132 
00133 
00134 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
00135  * @a src; allocate @a *dest in @a pool.
00136  *
00137  * @since New in 1.4.
00138  */
00139 svn_error_t *
00140 svn_utf_cstring_from_utf8_ex2(const char **dest,
00141                               const char *src,
00142                               const char *topage,
00143                               apr_pool_t *pool);
00144 
00145 
00146 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
00147  * ignored.
00148  *
00149  * @deprecated Provided for backward compatibility with the 1.3 API.
00150  */
00151 SVN_DEPRECATED
00152 svn_error_t *
00153 svn_utf_cstring_from_utf8_ex(const char **dest,
00154                              const char *src,
00155                              const char *topage,
00156                              const char *convset_key,
00157                              apr_pool_t *pool);
00158 
00159 
00160 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
00161  * allocated in @a pool.  A fuzzy recoding leaves all 7-bit ascii
00162  * characters the same, and substitutes "?\\XXX" for others, where XXX
00163  * is the unsigned decimal code for that character.
00164  *
00165  * This function cannot error; it is guaranteed to return something.
00166  * First it will recode as described above and then attempt to convert
00167  * the (new) 7-bit UTF-8 string to native encoding.  If that fails, it
00168  * will return the raw fuzzily recoded string, which may or may not be
00169  * meaningful in the client's locale, but is (presumably) better than
00170  * nothing.
00171  *
00172  * ### Notes:
00173  *
00174  * Improvement is possible, even imminent.  The original problem was
00175  * that if you converted a UTF-8 string (say, a log message) into a
00176  * locale that couldn't represent all the characters, you'd just get a
00177  * static placeholder saying "[unconvertible log message]".  Then
00178  * Justin Erenkrantz pointed out how on platforms that didn't support
00179  * conversion at all, "svn log" would still fail completely when it
00180  * encountered unconvertible data.
00181  *
00182  * Now for both cases, the caller can at least fall back on this
00183  * function, which converts the message as best it can, substituting
00184  * "?\\XXX" escape codes for the non-ascii characters.
00185  *
00186  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
00187  * so when we can detect that at configure time, things will change.
00188  * Also, this should (?) be moved to apr/apu eventually.
00189  *
00190  * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
00191  * details.
00192  */
00193 const char *
00194 svn_utf_cstring_from_utf8_fuzzy(const char *src,
00195                                 apr_pool_t *pool);
00196 
00197 
00198 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
00199  * allocate @a *dest in @a pool.
00200  */
00201 svn_error_t *
00202 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
00203                                     const svn_stringbuf_t *src,
00204                                     apr_pool_t *pool);
00205 
00206 
00207 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
00208  * allocate @a *dest in @a pool.
00209  */
00210 svn_error_t *
00211 svn_utf_cstring_from_utf8_string(const char **dest,
00212                                  const svn_string_t *src,
00213                                  apr_pool_t *pool);
00214 
00215 #ifdef __cplusplus
00216 }
00217 #endif /* __cplusplus */
00218 
00219 #endif /* SVN_UTF_H */

Generated on Tue Oct 7 04:09:55 2008 for Subversion by  doxygen 1.3.9.1