Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

strtool.hpp

Go to the documentation of this file.
00001 #ifndef s11n_net_s11n_STRINGTOOL_HPP_INCLUDED
00002 #define s11n_net_s11n_STRINGTOOL_HPP_INCLUDED 1
00003 
00004 #include <string>
00005 #include <map>
00006 #include <locale>
00007 #include <iostream>
00008 #include <sstream>
00009 
00010 namespace s11n { namespace io {
00011 /**
00012 The strtool namespace encapsulates a set of utility functions for
00013 working with string objects. This mini-lib has unfortunately followed
00014 me from source tree to source tree like a little virus. While i have
00015 no special love for this code, it has proven useful time and time again.
00016 */
00017 namespace strtool {
00018 
00019         /**
00020            The functions in the Private namespace should not be used
00021            by client code.
00022         */
00023         namespace STPrivate
00024         {
00025 
00026 
00027                 /**
00028                    Lexically casts str to a value_type, returning
00029                    errorVal if the conversion fails.
00030 
00031                    TODO: implement the following suggestion from  
00032                    Kai Unger <kai.unger@hacon.de> (21 Sept 2004):
00033 
00034                    When the cast is done, you should check if there
00035                    are unread characters left. For example, casting
00036                    "1.2this_definitly_is_not_a_number" to double will
00037                    not result in returning the error value, because
00038                    conversion of "1.2" to 1.2d succeeds and the rest
00039                    of the string is ignored.
00040                 */
00041                 template <typename value_type>
00042                 value_type from_string( const std::string & str, const value_type & errorVal ) throw()
00043                 {
00044                         std::istringstream is( str );
00045                         if ( !is )
00046                                 return errorVal;
00047                         value_type foo = value_type();
00048                         if ( is >> foo )
00049                                 return foo;
00050                         return errorVal;
00051                 }
00052 
00053                 /**
00054                    Returns a string representation of the given
00055                    object, which must be ostreamble.
00056                 */
00057                 template <typename value_type>
00058                 std::string to_string( const value_type & obj ) throw()
00059                 {
00060                         std::ostringstream os;
00061                         // os << std::fixed;
00062                         os << obj;
00063                         return os.str();
00064                 }
00065 
00066 //                 inline std::string to_string( double d ) throw()
00067 //                 {
00068 //                         std::ostringstream os;
00069 //                         os << std::fixed << d;
00070 //                         return os.str();
00071 //                 }
00072 
00073                 /**
00074                    Convenience/efficiency overload.
00075                 */
00076                 inline std::string from_string( const std::string & str, const std::string & /*errorVal*/ ) throw()
00077                 {
00078                         return str;
00079                 }
00080 
00081                 /**
00082                    Convenience/efficiency overload.
00083                 */
00084                 inline std::string from_string( const char *str, const char * /*errorVal*/ ) throw()
00085                 {
00086                         return str;
00087                 }
00088 
00089                 /**
00090                    Convenience/efficiency overload.
00091                 */
00092                 inline std::string to_string( const char *obj ) throw()
00093                 {
00094                         return obj ? obj : "";
00095                 }
00096 
00097                 /**
00098                    Convenience/efficiency overload.
00099                 */
00100                 inline std::string to_string( const std::string & obj ) throw()
00101                 {
00102                         return obj;
00103                 }
00104 
00105 
00106         } // end STPrivate namespace
00107 
00108         /**
00109            Convenience typedef for use with translate_entities().
00110          */
00111         typedef std::map<std::string,std::string> entity_map;
00112 
00113         /**
00114            For each entry in the input string, the characters are
00115            mapped to string sequences using the given
00116            translation_map. Where no mappings exist, the input
00117            sequence is left as-is.
00118 
00119            It returns the number of translations made.
00120 
00121            If reverse_translation == true then a reverse mapping is
00122            done: map values are treated as keys.
00123 
00124            This is useful, for example, for doing XML-entity-to-char
00125            conversions.
00126 
00127        Complexity is essentially linear, based on a combination of
00128        buffer.size() and translation_map.size(). Best used with
00129        small maps on short strings! The speed can be increased
00130        signifcantly, but probably only if we restrict keys and
00131        values to 1 character each.
00132 
00133        Design note: this really should be a function template,
00134        accepting any lexically-castable key/val types, but the
00135        function is quite long, and therefore not really suitable
00136        for inclusion in the header.
00137         */
00138         size_t translate_entities( std::string & buffer, const entity_map & translation_map, bool reverse_translation = false );
00139 
00140 
00141         /**
00142            A policy enum used by trim_string().
00143         */
00144         enum TrimPolicy {
00145         /**
00146            Trim only leading spaces.
00147          */
00148         TrimLeading = 0x01,
00149         /**
00150            Trim only trailing spaces.
00151          */
00152         TrimTrailing = 0x02,
00153         /**
00154            Trim leading and trailing spaces.
00155          */
00156         TrimAll = TrimLeading | TrimTrailing
00157         };
00158 
00159         /**
00160            Trims leading and trailing whitespace from the input string
00161            and returns the number of whitespace characters removed.
00162          */
00163         size_t trim_string( std::string &, TrimPolicy = TrimAll );
00164         /**
00165            Trims leading and trailing whitespace from the input string
00166            and returns the trimmed string.
00167          */
00168         std::string trim_string( const std::string &, TrimPolicy = TrimAll );
00169 
00170 
00171         /**
00172            Attempts to remove all backslash-escaped chars from str.
00173 
00174            Removes backslash-escaped newlines from the input string, including
00175            any whitespace immediately following each backslash.
00176 
00177            The optional slash parameter defines the escape character.
00178         */
00179         size_t strip_slashes( std::string &str, const char slash = '\\' );
00180 
00181         /**
00182            Adds an escape sequence in front of any characters in
00183            instring which are also in the list of chars_to_escape.
00184            Returns the number of escapes added.
00185 
00186            e.g., to escape (with a single backslash) all $, % and \ in
00187            mystring with a backslash:
00188 
00189            <pre>
00190            escape_string( mystring, "$%\\", "\\" );
00191            </pre>
00192 
00193            (WARNING: the doxygen-generated HTML version of these docs
00194            may incorrectly show single backslashes in the above example!)
00195 
00196 
00197         */
00198         size_t escape_string( std::string & instring, const std::string & chars_to_escape, const std::string & escape_seq = "\\" );
00199 
00200         /**
00201            normalize_string() is like trim_string() and
00202            strip_slashes(), combined, plus it removes leading/trailing
00203            quotes:
00204 
00205            <pre>
00206            "this is a \
00207            sample multi-line, backslash-escaped \
00208            string."
00209            </pre>
00210            
00211            Will translate to:
00212            <pre>
00213            this is a sample multi-line, backslash-escaped string.
00214            </pre>
00215         */
00216         void normalize_string( std::string & );
00217 
00218 
00219         /**
00220            Returns the first whitespace-delimited token from the given
00221            string, or an empty string if there is no such token.
00222         */
00223         std::string first_token( const std::string & );
00224 
00225         /**
00226            Returns the passed-in string, minus the first
00227            whitespace-delimited token. An empty string is returned if
00228            there is no second token.
00229          */
00230         std::string after_first_token( const std::string & );
00231 
00232 
00233 
00234         /**
00235            Returns int values for chars '0'-'9', 'a'-'f' and 'A'-'F',
00236            else -1.
00237         */
00238         int int4hexchar( char character );
00239 
00240         /**
00241            Returns decimal value of wd, which is assumed to be a
00242            hex-encoded number. wd may optionally be prefixed with '#',
00243            as in \#ff00ff. Case is insignificant.
00244 
00245            On error -1 is returned, but -1 is also potentially a valid
00246            number, so there is really no way of knowing if it fails or
00247            not. :/
00248         */
00249         int hex2int( const std::string & wd );
00250 
00251 
00252         /**
00253            Lexically casts v to a string.
00254         */
00255         template <typename ValueT>
00256         std::string to( const ValueT & v )
00257         {
00258         return STPrivate::to_string(v);
00259         }
00260 
00261         /**
00262            Lexically casts v to a ValueT, or returns dflt if
00263            conversion fails.
00264         */
00265         template <typename ValueT>
00266         ValueT from( const std::string & v, const ValueT & dflt = ValueT() )
00267         {
00268                 return STPrivate::from_string( v, dflt );
00269         }
00270 
00271 
00272         /**
00273            See translate_entities() for details.
00274         */
00275         typedef std::map<std::string,std::string> entity_map;
00276 
00277 
00278     /**
00279        YAGNI!
00280 
00281        A functor for translating entities in a set of strings.
00282        Designed for use with std::for_each().
00283     */
00284         struct entity_translator
00285         {
00286         /**
00287            Sets the map and reverse options to be used from
00288            calls to operator().
00289         */
00290                 entity_translator( const entity_map & map, bool reverse )
00291             : m_map(&map),m_rev(reverse)
00292                 {
00293                 }
00294 
00295         /**
00296            Calls translate_entities( str, MAP, REVERSE ),
00297            where MAP and REVERSE are the flags set via the
00298            ctor.
00299         */
00300                 inline void operator()( std::string & str ) const
00301                 {
00302                         translate_entities( str, *(this->m_map), this->m_rev );
00303                 }
00304         private:
00305                 const entity_map * m_map;
00306                 bool m_rev;
00307                          
00308         };
00309 
00310         /**
00311            Internal-use initializer for setting up an entity
00312            translation map for default quote-escaping behaviour.
00313         */
00314         struct default_escapes_initializer
00315         {
00316         /**
00317            Adds the following escape sequences to map:
00318 
00319            - 1x backslash (\) == 2x backslash.
00320 
00321            - 1x apostrophe  == 1x backslash 1x apostrophe
00322 
00323            - 1x double-quote  == 1x backslash 1x double-quote  
00324         */
00325                 void operator()( entity_map & map );
00326         };
00327 
00328 
00329         /** Internal marker type. */
00330         template <typename ContextT> struct strtool_sharing_context {};
00331 
00332         /**
00333            Returns the default entity translation map, which can be used to
00334            [un]slash-escape the folling entities: '\\', '\'', '"'.
00335         */
00336         const entity_map & default_escapes_translations();
00337 
00338         /**
00339            Converts v to a string, applies translate_entities(...,trans,reverse ),
00340            and returns the resulting string.
00341         */
00342         template <typename ValueT>
00343         std::string translate( const ValueT & v,
00344                                const entity_map & trans,
00345                                bool reverse )
00346         {
00347                 std::string val = to( v );
00348                 translate_entities( val, trans, reverse );
00349                 return val;
00350         }
00351 
00352 
00353         /**
00354            Calls translate( v,trans, false);
00355         */
00356         template <typename ValueT>
00357         std::string escape( const ValueT & v, const entity_map & trans = default_escapes_translations() )
00358         {
00359                 return translate( v, trans, false );
00360         }
00361 
00362 
00363         /**
00364            Calls translate( v, trans, true );
00365         */
00366         template <typename ValueT>
00367         std::string unescape( const ValueT & v, const entity_map & trans = default_escapes_translations() )
00368         {
00369                 return translate( v, trans, true );
00370         }
00371 
00372         /**
00373            Returns v as a quoted string, using the given quote
00374            character.
00375         */
00376         template <typename ValueT>
00377         std::string quote( const ValueT & v, const std::string & quote = "\'" )
00378         {
00379                 return quote + to( v ) + quote;
00380         }
00381 
00382         /**
00383            Exactly like expand_dollar_refs_inline() but returns a new string
00384            which results from the expansions. The returned string may
00385            be the same as the original.
00386  
00387          */
00388         std::string expand_dollar_refs( const std::string & text, const entity_map & src );
00389 
00390         /**
00391            Parsed env vars out of buffer, replacing them with their
00392            values, as defined in the src map. Accepts variables
00393            in the format ${VAR} and $VAR.
00394 
00395            e.g., ${foo} corresponds to the value set in src["foo"].
00396 
00397            Referencing a variable which is not set does not
00398            expand the variable to an empty value: it is left
00399            as-is. Thus expanding ${FOO} when "FOO" is not set
00400            will result in "${FOO}".
00401 
00402            To get a dollar sign into the resulting string, escape
00403            it with a single backslash: this keeps it from being
00404            parsed as a ${variable}.
00405 
00406        Returns the number of variables expanded.
00407 
00408        Note that this function is much *more* efficient than using
00409        translate_entities() to perform a similar operation.
00410        Because of it's stricter format we can do a single pass
00411        through the string and may not even have to reference the
00412        source map.
00413 
00414        Complexity depends on the number of ${vars} parts are expanded
00415        in buffer: overall runtime depends on buffer length,
00416        plus a non-determinate amount of time per ${var} expanded.
00417 
00418        Design note: this really should be a function template,
00419        accepting any lexically-castable key/val types, but the
00420        function is quite long, and therefore not really suitable
00421        to inclusion in the header.
00422 
00423 
00424        Known misgivings:
00425 
00426        - When buffer contains dollar signs which are preceeded by
00427        a slash, the slash is stripped even if the $ does not
00428        expand to anything. This is arguably behaviour.
00429         */
00430         size_t expand_dollar_refs_inline( std::string & buffer, const entity_map & src );
00431 
00432 
00433 } } } // namespaces
00434 
00435 
00436 #endif // s11n_net_s11n_STRINGTOOL_HPP_INCLUDED

Generated on Mon Dec 26 15:53:17 2005 for libs11n-1.2.3 by  doxygen 1.4.4