00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_path.h 00019 * @brief A path manipulation library 00020 * 00021 * All incoming and outgoing paths are non-NULL and in UTF-8, unless 00022 * otherwise documented. 00023 * 00024 * No result path ever ends with a separator, no matter whether the 00025 * path is a file or directory, because we always canonicalize() it. 00026 * 00027 * Nearly all the @c svn_path_xxx functions expect paths passed into 00028 * them to be in canonical form as defined by the Subversion path 00029 * library itself. The only functions which do *not* have such 00030 * expectations are: 00031 * 00032 * - @c svn_path_canonicalize() 00033 * - @c svn_path_is_canonical() 00034 * - @c svn_path_internal_style() 00035 * - @c svn_path_uri_encode() 00036 * 00037 * For the most part, we mean what most anyone would mean when talking 00038 * about canonical paths, but to be on the safe side, you must run 00039 * your paths through @c svn_path_canonicalize() before passing them to 00040 * other functions in this API. 00041 */ 00042 00043 #ifndef SVN_PATH_H 00044 #define SVN_PATH_H 00045 00046 00047 #include <apr_pools.h> 00048 #include <apr_tables.h> 00049 00050 #include "svn_string.h" 00051 #include "svn_error.h" 00052 00053 00054 #ifdef __cplusplus 00055 extern "C" { 00056 #endif /* __cplusplus */ 00057 00058 00059 00060 /** Convert @a path from the local style to the canonical internal style. */ 00061 const char * 00062 svn_path_internal_style(const char *path, apr_pool_t *pool); 00063 00064 /** Convert @a path from the canonical internal style to the local style. */ 00065 const char * 00066 svn_path_local_style(const char *path, apr_pool_t *pool); 00067 00068 00069 /** Join a base path (@a base) with a component (@a component), allocated in 00070 * @a pool. 00071 * 00072 * If either @a base or @a component is the empty path, then the other 00073 * argument will be copied and returned. If both are the empty path the 00074 * empty path is returned. 00075 * 00076 * If the @a component is an absolute path, then it is copied and returned. 00077 * Exactly one slash character ('/') is used to joined the components, 00078 * accounting for any trailing slash in @a base. 00079 * 00080 * Note that the contents of @a base are not examined, so it is possible to 00081 * use this function for constructing URLs, or for relative URLs or 00082 * repository paths. 00083 * 00084 * This function is NOT appropriate for native (local) file 00085 * paths. Only for "internal" canonicalized paths, since it uses '/' 00086 * for the separator. Further, an absolute path (for @a component) is 00087 * based on a leading '/' character. Thus, an "absolute URI" for the 00088 * @a component won't be detected. An absolute URI can only be used 00089 * for the base. 00090 */ 00091 char * 00092 svn_path_join(const char *base, const char *component, apr_pool_t *pool); 00093 00094 /** Join multiple components onto a @a base path, allocated in @a pool. The 00095 * components are terminated by a @c NULL. 00096 * 00097 * If any component is the empty string, it will be ignored. 00098 * 00099 * If any component is an absolute path, then it resets the base and 00100 * further components will be appended to it. 00101 * 00102 * This function does not support URLs. 00103 * 00104 * See svn_path_join() for further notes about joining paths. 00105 */ 00106 char * 00107 svn_path_join_many(apr_pool_t *pool, const char *base, ...); 00108 00109 00110 /** Get the basename of the specified canonicalized @a path. The 00111 * basename is defined as the last component of the path (ignoring any 00112 * trailing slashes). If the @a path is root ("/"), then that is 00113 * returned. Otherwise, the returned value will have no slashes in 00114 * it. 00115 * 00116 * Example: svn_path_basename("/foo/bar") -> "bar" 00117 * 00118 * The returned basename will be allocated in @a pool. 00119 * 00120 * @note If an empty string is passed, then an empty string will be returned. 00121 */ 00122 char * 00123 svn_path_basename(const char *path, apr_pool_t *pool); 00124 00125 /** Get the dirname of the specified canonicalized @a path, defined as 00126 * the path with its basename removed. 00127 * 00128 * Get the dirname of the specified @a path, defined as the path with its 00129 * basename removed. If @a path is root ("/"), it is returned unchanged. 00130 * 00131 * The returned dirname will be allocated in @a pool. 00132 */ 00133 char * 00134 svn_path_dirname(const char *path, apr_pool_t *pool); 00135 00136 /** Split @a path into a root portion and an extension such that 00137 * the root + the extension = the original path, and where the 00138 * extension contains no period (.) characters. If not @c NULL, set 00139 * @a *path_root to the root portion. If not @c NULL, set 00140 * @a *path_ext to the extension (or "" if there is no extension 00141 * found). Allocate both @a *path_root and @a *path_ext in @a pool. 00142 * 00143 * @since New in 1.5. 00144 */ 00145 void 00146 svn_path_splitext(const char **path_root, const char **path_ext, 00147 const char *path, apr_pool_t *pool); 00148 00149 /** Return the number of components in the canonicalized @a path. 00150 * 00151 * @since New in 1.1. 00152 */ 00153 apr_size_t 00154 svn_path_component_count(const char *path); 00155 00156 /** Add a @a component (a NULL-terminated C-string) to the 00157 * canonicalized @a path. @a component is allowed to contain 00158 * directory separators. 00159 * 00160 * If @a path is non-empty, append the appropriate directory separator 00161 * character, and then @a component. If @a path is empty, simply set it to 00162 * @a component; don't add any separator character. 00163 * 00164 * If the result ends in a separator character, then remove the separator. 00165 */ 00166 void 00167 svn_path_add_component(svn_stringbuf_t *path, const char *component); 00168 00169 /** Remove one component off the end of the canonicalized @a path. */ 00170 void 00171 svn_path_remove_component(svn_stringbuf_t *path); 00172 00173 /** Remove @a n components off the end of the canonicalized @a path. 00174 * Equivalent to calling svn_path_remove_component() @a n times. 00175 * 00176 * @since New in 1.1. 00177 */ 00178 void 00179 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n); 00180 00181 /** Divide the canonicalized @a path into @a *dirpath and @a 00182 * *base_name, allocated in @a pool. 00183 * 00184 * If @a dirpath or @a base_name is NULL, then don't set that one. 00185 * 00186 * Either @a dirpath or @a base_name may be @a path's own address, but they 00187 * may not both be the same address, or the results are undefined. 00188 * 00189 * If @a path has two or more components, the separator between @a dirpath 00190 * and @a base_name is not included in either of the new names. 00191 * 00192 * examples: 00193 * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre> 00194 * - <pre>"/bar" ==> "/" and "bar"</pre> 00195 * - <pre>"/" ==> "/" and "/"</pre> 00196 * - <pre>"X:/" ==> "X:/" and "X:/"</pre> 00197 * - <pre>"bar" ==> "" and "bar"</pre> 00198 * - <pre>"" ==> "" and ""</pre> 00199 */ 00200 void 00201 svn_path_split(const char *path, 00202 const char **dirpath, 00203 const char **base_name, 00204 apr_pool_t *pool); 00205 00206 00207 /** Return non-zero iff @a path is empty ("") or represents the current 00208 * directory -- that is, if prepending it as a component to an existing 00209 * path would result in no meaningful change. 00210 */ 00211 int 00212 svn_path_is_empty(const char *path); 00213 00214 #ifndef SVN_DIRENT_URI_H 00215 /* This declaration has been moved to svn_dirent_uri.h, remains here only for 00216 compatiblity reasons. */ 00217 svn_boolean_t 00218 svn_dirent_is_root(const char *dirent, apr_size_t len); 00219 #endif /* SVN_DIRENT_URI_H */ 00220 00221 /** Return a new path (or URL) like @a path, but transformed such that 00222 * some types of path specification redundancies are removed. 00223 * 00224 * This involves collapsing redundant "/./" elements, removing 00225 * multiple adjacent separator characters, removing trailing 00226 * separator characters, and possibly other semantically inoperative 00227 * transformations. 00228 * 00229 * Convert the scheme and hostname to lowercase (see issue #2475) 00230 * 00231 * The returned path may be statically allocated, equal to @a path, or 00232 * allocated from @a pool. 00233 */ 00234 const char * 00235 svn_path_canonicalize(const char *path, apr_pool_t *pool); 00236 00237 /** Return @c TRUE iff path is canonical. Use @a pool for temporary 00238 * allocations. 00239 * 00240 * @note The test for canonicalization is currently defined as 00241 * "looks exactly the same as @c svn_path_canonicalize() would make 00242 * it look". 00243 * 00244 * @since New in 1.5. 00245 */ 00246 svn_boolean_t 00247 svn_path_is_canonical(const char *path, apr_pool_t *pool); 00248 00249 00250 /** Return an integer greater than, equal to, or less than 0, according 00251 * as @a path1 is greater than, equal to, or less than @a path2. 00252 */ 00253 int 00254 svn_path_compare_paths(const char *path1, const char *path2); 00255 00256 00257 /** Return the longest common path shared by two canonicalized paths, 00258 * @a path1 and @a path2. If there's no common ancestor, return the 00259 * empty path. 00260 * 00261 * @a path1 and @a path2 may be URLs. In order for two URLs to have 00262 * a common ancestor, they must (a) have the same protocol (since two URLs 00263 * with the same path but different protocols may point at completely 00264 * different resources), and (b) share a common ancestor in their path 00265 * component, i.e. 'protocol://' is not a sufficient ancestor. 00266 */ 00267 char * 00268 svn_path_get_longest_ancestor(const char *path1, 00269 const char *path2, 00270 apr_pool_t *pool); 00271 00272 /** Convert @a relative canonicalized path to an absolute path and 00273 * return the results in @a *pabsolute, allocated in @a pool. 00274 * 00275 * @a relative may be a URL, in which case no attempt is made to convert it, 00276 * and a copy of the URL is returned. 00277 */ 00278 svn_error_t * 00279 svn_path_get_absolute(const char **pabsolute, 00280 const char *relative, 00281 apr_pool_t *pool); 00282 00283 /** Return the path part of the canonicalized @a path in @a 00284 * *pdirectory, and the file part in @a *pfile. If @a path is a 00285 * directory, set @a *pdirectory to @a path, and @a *pfile to the 00286 * empty string. If @a path does not exist it is treated as if it is 00287 * a file, since directories do not normally vanish. 00288 */ 00289 svn_error_t * 00290 svn_path_split_if_file(const char *path, 00291 const char **pdirectory, 00292 const char **pfile, 00293 apr_pool_t *pool); 00294 00295 /** Find the common prefix of the canonicalized paths in @a targets 00296 * (an array of <tt>const char *</tt>'s), and remove redundant paths if @a 00297 * remove_redundancies is TRUE. 00298 * 00299 * - Set @a *pcommon to the absolute path of the path or URL common to 00300 * all of the targets. If the targets have no common prefix, or 00301 * are a mix of URLs and local paths, set @a *pcommon to the 00302 * empty string. 00303 * 00304 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets 00305 * to an array of targets relative to @a *pcommon, and if 00306 * @a remove_redundancies is TRUE, omit any paths/URLs that are 00307 * descendants of another path/URL in @a targets. If *pcommon 00308 * is empty, @a *pcondensed_targets will contain full URLs and/or 00309 * absolute paths; redundancies can still be removed (from both URLs 00310 * and paths). If @a pcondensed_targets is NULL, leave it alone. 00311 * 00312 * Else if there is exactly one target, then 00313 * 00314 * - Set @a *pcommon to that target, and 00315 * 00316 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets 00317 * to an array containing zero elements. Else if 00318 * @a pcondensed_targets is NULL, leave it alone. 00319 * 00320 * If there are no items in @a targets, set @a *pcommon and (if 00321 * applicable) @a *pcondensed_targets to @c NULL. 00322 * 00323 * @note There is no guarantee that @a *pcommon is within a working 00324 * copy. */ 00325 svn_error_t * 00326 svn_path_condense_targets(const char **pcommon, 00327 apr_array_header_t **pcondensed_targets, 00328 const apr_array_header_t *targets, 00329 svn_boolean_t remove_redundancies, 00330 apr_pool_t *pool); 00331 00332 00333 /** Copy a list of canonicalized @a targets, one at a time, into @a 00334 * pcondensed_targets, omitting any targets that are found earlier in 00335 * the list, or whose ancestor is found earlier in the list. Ordering 00336 * of targets in the original list is preserved in the condensed list 00337 * of targets. Use @a pool for any allocations. 00338 * 00339 * How does this differ in functionality from svn_path_condense_targets()? 00340 * 00341 * Here's the short version: 00342 * 00343 * 1. Disclaimer: if you wish to debate the following, talk to Karl. :-) 00344 * Order matters for updates because a multi-arg update is not 00345 * atomic, and CVS users are used to, when doing 'cvs up targetA 00346 * targetB' seeing targetA get updated, then targetB. I think the 00347 * idea is that if you're in a time-sensitive or flaky-network 00348 * situation, a user can say, "I really *need* to update 00349 * wc/A/D/G/tau, but I might as well update my whole working copy if 00350 * I can." So that user will do 'svn up wc/A/D/G/tau wc', and if 00351 * something dies in the middles of the 'wc' update, at least the 00352 * user has 'tau' up-to-date. 00353 * 00354 * 2. Also, we have this notion of an anchor and a target for updates 00355 * (the anchor is where the update editor is rooted, the target is 00356 * the actual thing we want to update). I needed a function that 00357 * would NOT screw with my input paths so that I could tell the 00358 * difference between someone being in A/D and saying 'svn up G' and 00359 * being in A/D/G and saying 'svn up .' -- believe it or not, these 00360 * two things don't mean the same thing. svn_path_condense_targets() 00361 * plays with absolute paths (which is fine, so does 00362 * svn_path_remove_redundancies()), but the difference is that it 00363 * actually tweaks those targets to be relative to the "grandfather 00364 * path" common to all the targets. Updates don't require a 00365 * "grandfather path" at all, and even if it did, the whole 00366 * conversion to an absolute path drops the crucial difference 00367 * between saying "i'm in foo, update bar" and "i'm in foo/bar, 00368 * update '.'" 00369 */ 00370 svn_error_t * 00371 svn_path_remove_redundancies(apr_array_header_t **pcondensed_targets, 00372 const apr_array_header_t *targets, 00373 apr_pool_t *pool); 00374 00375 00376 /** Decompose the canonicalized @a path into an array of <tt>const 00377 * char *</tt> components, allocated in @a pool. If @a path is 00378 * absolute, the first component will be a lone dir separator (the 00379 * root directory). 00380 */ 00381 apr_array_header_t * 00382 svn_path_decompose(const char *path, apr_pool_t *pool); 00383 00384 /** Join an array of <tt>const char *</tt> components into a '/' 00385 * separated path, allocated in @a pool. The joined path is absolute if 00386 * the first component is a lone dir separator. 00387 * 00388 * Calling svn_path_compose() on the output of svn_path_decompose() 00389 * will return the exact same path. 00390 * 00391 * @since New in 1.5. 00392 */ 00393 const char * 00394 svn_path_compose(const apr_array_header_t *components, apr_pool_t *pool); 00395 00396 /** Test that @a name is a single path component, that is: 00397 * - not @c NULL or empty. 00398 * - not a `/'-separated directory path 00399 * - not empty or `..' 00400 */ 00401 svn_boolean_t 00402 svn_path_is_single_path_component(const char *name); 00403 00404 00405 /** 00406 * Test to see if a backpath, i.e. '..', is present in @a path. 00407 * If not, return @c FALSE. 00408 * If so, return @c TRUE. 00409 * 00410 * @since New in 1.1. 00411 */ 00412 svn_boolean_t 00413 svn_path_is_backpath_present(const char *path); 00414 00415 00416 /** 00417 * Test to see if a dotpath, i.e. '.', is present in @a path. 00418 * If not, return @c FALSE. 00419 * If so, return @c TRUE. 00420 * 00421 * @since New in 1.6. 00422 */ 00423 svn_boolean_t 00424 svn_path_is_dotpath_present(const char *path); 00425 00426 00427 /** Test if @a path2 is a child of @a path1. 00428 * If not, return @c NULL. 00429 * If so, return a copy of the remainder path, allocated in @a pool. 00430 * (The remainder is the component which, added to @a path1, yields 00431 * @a path2. The remainder does not begin with a dir separator.) 00432 * 00433 * Both paths must be in canonical form, and must either be absolute, 00434 * or contain no ".." components. 00435 * 00436 * If @a path2 is the same as @a path1, it is not considered a child, so the 00437 * result is @c NULL; an empty string is never returned. 00438 * 00439 * @note In 1.5 this function has been extended to allow a @c NULL @a pool 00440 * in which case a pointer into @a path2 will be returned to 00441 * identify the remainder path. 00442 * 00443 * ### todo: the ".." restriction is unfortunate, and would ideally 00444 * be lifted by making the implementation smarter. But this is not 00445 * trivial: if the path is "../foo", how do you know whether or not 00446 * the current directory is named "foo" in its parent? 00447 */ 00448 const char * 00449 svn_path_is_child(const char *path1, const char *path2, apr_pool_t *pool); 00450 00451 /** Return TRUE if @a path1 is an ancestor of @a path2 or the paths are equal 00452 * and FALSE otherwise. 00453 * 00454 * @since New in 1.3. 00455 */ 00456 svn_boolean_t 00457 svn_path_is_ancestor(const char *path1, const char *path2); 00458 00459 /** 00460 * Check whether @a path is a valid Subversion path. 00461 * 00462 * A valid Subversion pathname is a UTF-8 string without control 00463 * characters. "Valid" means Subversion can store the pathname in 00464 * a repository. There may be other, OS-specific, limitations on 00465 * what paths can be represented in a working copy. 00466 * 00467 * ASSUMPTION: @a path is a valid UTF-8 string. This function does 00468 * not check UTF-8 validity. 00469 * 00470 * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if 00471 * invalid. 00472 * 00473 * @note Despite returning an @c SVN_ERR_FS_* error, this function has 00474 * nothing to do with the versioned filesystem's concept of validity. 00475 * 00476 * @since New in 1.2. 00477 */ 00478 svn_error_t * 00479 svn_path_check_valid(const char *path, apr_pool_t *pool); 00480 00481 00482 /** URI/URL stuff 00483 * 00484 * @defgroup svn_path_uri_stuff URI/URL conversion 00485 * @{ 00486 */ 00487 00488 /** Return TRUE iff @a path looks like a valid absolute URL. */ 00489 svn_boolean_t 00490 svn_path_is_url(const char *path); 00491 00492 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */ 00493 svn_boolean_t 00494 svn_path_is_uri_safe(const char *path); 00495 00496 /** Return a URI-encoded copy of @a path, allocated in @a pool. (@a 00497 path can be an arbitrary UTF-8 string and does not have to be a 00498 canonical path.) */ 00499 const char * 00500 svn_path_uri_encode(const char *path, apr_pool_t *pool); 00501 00502 /** Return a URI-decoded copy of @a path, allocated in @a pool. */ 00503 const char * 00504 svn_path_uri_decode(const char *path, apr_pool_t *pool); 00505 00506 /** Extend @a url by @a component, URI-encoding that @a component 00507 * before adding it to the @a url; return the new @a url, allocated in 00508 * @a pool. If @a component is @c NULL, just return a copy of @a url, 00509 * allocated in @a pool. 00510 * 00511 * @a component need not be a single path segment, but if it contains 00512 * multiple segments, they must be separated by '/'. @a component 00513 * should not begin with '/', however; if it does, the behavior is 00514 * undefined. 00515 * 00516 * @a url need not be a canonical path; it may have a trailing '/'. 00517 * 00518 * @note To add a component that is already URI-encoded, use 00519 * <tt>svn_path_join(url, component, pool)</tt> instead. 00520 * 00521 * @note gstein suggests this for when @a component begins with '/': 00522 * 00523 * "replace the path entirely 00524 * https://example.com:4444/base/path joined with /leading/slash, 00525 * should return: https://example.com:4444/leading/slash 00526 * per the RFCs on combining URIs" 00527 * 00528 * We may implement that someday, which is why leading '/' is 00529 * merely undefined right now. 00530 */ 00531 const char * 00532 svn_path_url_add_component(const char *url, 00533 const char *component, 00534 apr_pool_t *pool); 00535 00536 /** 00537 * Convert @a iri (Internationalized URI) to an URI. 00538 * The return value may be the same as @a iri if it was already 00539 * a URI. Else, allocate the return value in @a pool. 00540 * 00541 * @since New in 1.1. 00542 */ 00543 const char * 00544 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool); 00545 00546 /** 00547 * URI-encode certain characters in @a uri that are not valid in an URI, but 00548 * doesn't have any special meaning in @a uri at their positions. If no 00549 * characters need escaping, just return @a uri. 00550 * 00551 * @note Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `. 00552 * This may be extended in the future to do context-dependent escaping. 00553 * 00554 * @since New in 1.1. 00555 */ 00556 const char * 00557 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool); 00558 00559 /** @} */ 00560 00561 /** Charset conversion stuff 00562 * 00563 * @defgroup svn_path_charset_stuff Charset conversion 00564 * @{ 00565 */ 00566 00567 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */ 00568 svn_error_t * 00569 svn_path_cstring_from_utf8(const char **path_apr, 00570 const char *path_utf8, 00571 apr_pool_t *pool); 00572 00573 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */ 00574 svn_error_t * 00575 svn_path_cstring_to_utf8(const char **path_utf8, 00576 const char *path_apr, 00577 apr_pool_t *pool); 00578 00579 00580 /** @} */ 00581 00582 #ifdef __cplusplus 00583 } 00584 #endif /* __cplusplus */ 00585 00586 00587 #endif /* SVN_PATH_H */
1.3.9.1