00001 #ifndef s11n_net_s11n_STRINGTOOL_HPP_INCLUDED 00002 #define s11n_net_s11n_STRINGTOOL_HPP_INCLUDED 1 00003 00004 #include <string> 00005 #include <map> 00006 #include <locale> 00007 #include <iostream> 00008 #include <sstream> 00009 00010 namespace s11n { namespace io { 00011 /** 00012 The strtool namespace encapsulates a set of utility functions for 00013 working with string objects. This mini-lib has unfortunately followed 00014 me from source tree to source tree like a little virus. While i have 00015 no special love for this code, it has proven useful time and time again. 00016 */ 00017 namespace strtool { 00018 00019 /** 00020 The functions in the Private namespace should not be used 00021 by client code. 00022 */ 00023 namespace STPrivate 00024 { 00025 00026 00027 /** 00028 Lexically casts str to a value_type, returning 00029 errorVal if the conversion fails. 00030 00031 TODO: implement the following suggestion from 00032 Kai Unger <kai.unger@hacon.de> (21 Sept 2004): 00033 00034 When the cast is done, you should check if there 00035 are unread characters left. For example, casting 00036 "1.2this_definitly_is_not_a_number" to double will 00037 not result in returning the error value, because 00038 conversion of "1.2" to 1.2d succeeds and the rest 00039 of the string is ignored. 00040 */ 00041 template <typename value_type> 00042 value_type from_string( const std::string & str, const value_type & errorVal ) throw() 00043 { 00044 std::istringstream is( str ); 00045 if ( !is ) 00046 return errorVal; 00047 value_type foo = value_type(); 00048 if ( is >> foo ) 00049 return foo; 00050 return errorVal; 00051 } 00052 00053 /** 00054 Returns a string representation of the given 00055 object, which must be ostreamble. 00056 */ 00057 template <typename value_type> 00058 std::string to_string( const value_type & obj ) throw() 00059 { 00060 std::ostringstream os; 00061 // os << std::fixed; 00062 os << obj; 00063 return os.str(); 00064 } 00065 00066 // inline std::string to_string( double d ) throw() 00067 // { 00068 // std::ostringstream os; 00069 // os << std::fixed << d; 00070 // return os.str(); 00071 // } 00072 00073 /** 00074 Convenience/efficiency overload. 00075 */ 00076 inline std::string from_string( const std::string & str, const std::string & /*errorVal*/ ) throw() 00077 { 00078 return str; 00079 } 00080 00081 /** 00082 Convenience/efficiency overload. 00083 */ 00084 inline std::string from_string( const char *str, const char * /*errorVal*/ ) throw() 00085 { 00086 return str; 00087 } 00088 00089 /** 00090 Convenience/efficiency overload. 00091 */ 00092 inline std::string to_string( const char *obj ) throw() 00093 { 00094 return obj ? obj : ""; 00095 } 00096 00097 /** 00098 Convenience/efficiency overload. 00099 */ 00100 inline std::string to_string( const std::string & obj ) throw() 00101 { 00102 return obj; 00103 } 00104 00105 00106 } // end STPrivate namespace 00107 00108 /** 00109 Convenience typedef for use with translate_entities(). 00110 */ 00111 typedef std::map<std::string,std::string> entity_map; 00112 00113 /** 00114 For each entry in the input string, the characters are 00115 mapped to string sequences using the given 00116 translation_map. Where no mappings exist, the input 00117 sequence is left as-is. 00118 00119 It returns the number of translations made. 00120 00121 If reverse_translation == true then a reverse mapping is 00122 done: map values are treated as keys. 00123 00124 This is useful, for example, for doing XML-entity-to-char 00125 conversions. 00126 00127 Complexity is essentially linear, based on a combination of 00128 buffer.size() and translation_map.size(). Best used with 00129 small maps on short strings! The speed can be increased 00130 signifcantly, but probably only if we restrict keys and 00131 values to 1 character each. 00132 00133 Design note: this really should be a function template, 00134 accepting any lexically-castable key/val types, but the 00135 function is quite long, and therefore not really suitable 00136 for inclusion in the header. 00137 */ 00138 size_t translate_entities( std::string & buffer, const entity_map & translation_map, bool reverse_translation = false ); 00139 00140 00141 /** 00142 A policy enum used by trim_string(). 00143 */ 00144 enum TrimPolicy { 00145 /** 00146 Trim only leading spaces. 00147 */ 00148 TrimLeading = 0x01, 00149 /** 00150 Trim only trailing spaces. 00151 */ 00152 TrimTrailing = 0x02, 00153 /** 00154 Trim leading and trailing spaces. 00155 */ 00156 TrimAll = TrimLeading | TrimTrailing 00157 }; 00158 00159 /** 00160 Trims leading and trailing whitespace from the input string 00161 and returns the number of whitespace characters removed. 00162 */ 00163 size_t trim_string( std::string &, TrimPolicy = TrimAll ); 00164 /** 00165 Trims leading and trailing whitespace from the input string 00166 and returns the trimmed string. 00167 */ 00168 std::string trim_string( const std::string &, TrimPolicy = TrimAll ); 00169 00170 00171 /** 00172 Attempts to remove all backslash-escaped chars from str. 00173 00174 Removes backslash-escaped newlines from the input string, including 00175 any whitespace immediately following each backslash. 00176 00177 The optional slash parameter defines the escape character. 00178 */ 00179 size_t strip_slashes( std::string &str, const char slash = '\\' ); 00180 00181 /** 00182 Adds an escape sequence in front of any characters in 00183 instring which are also in the list of chars_to_escape. 00184 Returns the number of escapes added. 00185 00186 e.g., to escape (with a single backslash) all $, % and \ in 00187 mystring with a backslash: 00188 00189 <pre> 00190 escape_string( mystring, "$%\\", "\\" ); 00191 </pre> 00192 00193 (WARNING: the doxygen-generated HTML version of these docs 00194 may incorrectly show single backslashes in the above example!) 00195 00196 00197 */ 00198 size_t escape_string( std::string & instring, const std::string & chars_to_escape, const std::string & escape_seq = "\\" ); 00199 00200 /** 00201 normalize_string() is like trim_string() and 00202 strip_slashes(), combined, plus it removes leading/trailing 00203 quotes: 00204 00205 <pre> 00206 "this is a \ 00207 sample multi-line, backslash-escaped \ 00208 string." 00209 </pre> 00210 00211 Will translate to: 00212 <pre> 00213 this is a sample multi-line, backslash-escaped string. 00214 </pre> 00215 */ 00216 void normalize_string( std::string & ); 00217 00218 00219 /** 00220 Returns the first whitespace-delimited token from the given 00221 string, or an empty string if there is no such token. 00222 */ 00223 std::string first_token( const std::string & ); 00224 00225 /** 00226 Returns the passed-in string, minus the first 00227 whitespace-delimited token. An empty string is returned if 00228 there is no second token. 00229 */ 00230 std::string after_first_token( const std::string & ); 00231 00232 00233 00234 /** 00235 Returns int values for chars '0'-'9', 'a'-'f' and 'A'-'F', 00236 else -1. 00237 */ 00238 int int4hexchar( char character ); 00239 00240 /** 00241 Returns decimal value of wd, which is assumed to be a 00242 hex-encoded number. wd may optionally be prefixed with '#', 00243 as in \#ff00ff. Case is insignificant. 00244 00245 On error -1 is returned, but -1 is also potentially a valid 00246 number, so there is really no way of knowing if it fails or 00247 not. :/ 00248 */ 00249 int hex2int( const std::string & wd ); 00250 00251 00252 /** 00253 Lexically casts v to a string. 00254 */ 00255 template <typename ValueT> 00256 std::string to( const ValueT & v ) 00257 { 00258 return STPrivate::to_string(v); 00259 } 00260 00261 /** 00262 Lexically casts v to a ValueT, or returns dflt if 00263 conversion fails. 00264 */ 00265 template <typename ValueT> 00266 ValueT from( const std::string & v, const ValueT & dflt = ValueT() ) 00267 { 00268 return STPrivate::from_string( v, dflt ); 00269 } 00270 00271 00272 /** 00273 See translate_entities() for details. 00274 */ 00275 typedef std::map<std::string,std::string> entity_map; 00276 00277 00278 /** 00279 YAGNI! 00280 00281 A functor for translating entities in a set of strings. 00282 Designed for use with std::for_each(). 00283 */ 00284 struct entity_translator 00285 { 00286 /** 00287 Sets the map and reverse options to be used from 00288 calls to operator(). 00289 */ 00290 entity_translator( const entity_map & map, bool reverse ) 00291 : m_map(&map),m_rev(reverse) 00292 { 00293 } 00294 00295 /** 00296 Calls translate_entities( str, MAP, REVERSE ), 00297 where MAP and REVERSE are the flags set via the 00298 ctor. 00299 */ 00300 inline void operator()( std::string & str ) const 00301 { 00302 translate_entities( str, *(this->m_map), this->m_rev ); 00303 } 00304 private: 00305 const entity_map * m_map; 00306 bool m_rev; 00307 00308 }; 00309 00310 /** 00311 Internal-use initializer for setting up an entity 00312 translation map for default quote-escaping behaviour. 00313 */ 00314 struct default_escapes_initializer 00315 { 00316 /** 00317 Adds the following escape sequences to map: 00318 00319 - 1x backslash (\) == 2x backslash. 00320 00321 - 1x apostrophe == 1x backslash 1x apostrophe 00322 00323 - 1x double-quote == 1x backslash 1x double-quote 00324 */ 00325 void operator()( entity_map & map ); 00326 }; 00327 00328 00329 /** Internal marker type. */ 00330 template <typename ContextT> struct strtool_sharing_context {}; 00331 00332 /** 00333 Returns the default entity translation map, which can be used to 00334 [un]slash-escape the folling entities: '\\', '\'', '"'. 00335 */ 00336 const entity_map & default_escapes_translations(); 00337 00338 /** 00339 Converts v to a string, applies translate_entities(...,trans,reverse ), 00340 and returns the resulting string. 00341 */ 00342 template <typename ValueT> 00343 std::string translate( const ValueT & v, 00344 const entity_map & trans, 00345 bool reverse ) 00346 { 00347 std::string val = to( v ); 00348 translate_entities( val, trans, reverse ); 00349 return val; 00350 } 00351 00352 00353 /** 00354 Calls translate( v,trans, false); 00355 */ 00356 template <typename ValueT> 00357 std::string escape( const ValueT & v, const entity_map & trans = default_escapes_translations() ) 00358 { 00359 return translate( v, trans, false ); 00360 } 00361 00362 00363 /** 00364 Calls translate( v, trans, true ); 00365 */ 00366 template <typename ValueT> 00367 std::string unescape( const ValueT & v, const entity_map & trans = default_escapes_translations() ) 00368 { 00369 return translate( v, trans, true ); 00370 } 00371 00372 /** 00373 Returns v as a quoted string, using the given quote 00374 character. 00375 */ 00376 template <typename ValueT> 00377 std::string quote( const ValueT & v, const std::string & quote = "\'" ) 00378 { 00379 return quote + to( v ) + quote; 00380 } 00381 00382 /** 00383 Exactly like expand_dollar_refs_inline() but returns a new string 00384 which results from the expansions. The returned string may 00385 be the same as the original. 00386 00387 */ 00388 std::string expand_dollar_refs( const std::string & text, const entity_map & src ); 00389 00390 /** 00391 Parsed env vars out of buffer, replacing them with their 00392 values, as defined in the src map. Accepts variables 00393 in the format ${VAR} and $VAR. 00394 00395 e.g., ${foo} corresponds to the value set in src["foo"]. 00396 00397 Referencing a variable which is not set does not 00398 expand the variable to an empty value: it is left 00399 as-is. Thus expanding ${FOO} when "FOO" is not set 00400 will result in "${FOO}". 00401 00402 To get a dollar sign into the resulting string, escape 00403 it with a single backslash: this keeps it from being 00404 parsed as a ${variable}. 00405 00406 Returns the number of variables expanded. 00407 00408 Note that this function is much *more* efficient than using 00409 translate_entities() to perform a similar operation. 00410 Because of it's stricter format we can do a single pass 00411 through the string and may not even have to reference the 00412 source map. 00413 00414 Complexity depends on the number of ${vars} parts are expanded 00415 in buffer: overall runtime depends on buffer length, 00416 plus a non-determinate amount of time per ${var} expanded. 00417 00418 Design note: this really should be a function template, 00419 accepting any lexically-castable key/val types, but the 00420 function is quite long, and therefore not really suitable 00421 to inclusion in the header. 00422 00423 00424 Known misgivings: 00425 00426 - When buffer contains dollar signs which are preceeded by 00427 a slash, the slash is stripped even if the $ does not 00428 expand to anything. This is arguably behaviour. 00429 */ 00430 size_t expand_dollar_refs_inline( std::string & buffer, const entity_map & src ); 00431 00432 00433 } } } // namespaces 00434 00435 00436 #endif // s11n_net_s11n_STRINGTOOL_HPP_INCLUDED