Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

data_node_io.hpp

Go to the documentation of this file.
00001 #ifndef s11n_DATA_NODE_IO_H_INCLUDED
00002 #define s11n_DATA_NODE_IO_H_INCLUDED
00003 
00004 ////////////////////////////////////////////////////////////////////////
00005 // data_node_io.hpp
00006 // some i/o interfaces & helpers for s11n
00007 // License: Public Domain
00008 // Author: stephan@s11n.net
00009 ////////////////////////////////////////////////////////////////////////
00010 
00011 
00012 #include <string>
00013 #include <sstream>
00014 #include <list>
00015 #include <map>
00016 #include <deque>
00017 #include <iostream>
00018 #include <memory>// auto_ptr
00019 
00020 #include <cassert>
00021 #include <typeinfo>
00022 
00023 
00024 
00025 // #include <s11n.net/cl/cllite.hpp> // for opening DLLs
00026 
00027 #include <s11n.net/s11n/phoenix.hpp> // phoenix class
00028 
00029 #include <s11n.net/s11n/exception.hpp>
00030 #include <s11n.net/s11n/s11n_debuggering_macros.hpp> // COUT/CERR
00031 #include <s11n.net/s11n/classload.hpp> // classloader()
00032 #include <s11n.net/s11n/serialize.hpp> // unfortunately dep
00033 #include <s11n.net/s11n/traits.hpp> // s11n_traits & node_traits
00034 
00035 #include <s11n.net/s11n/export.hpp> // for exporting symbols to DLL
00036 
00037 ////////////////////////////////////////////////////////////////////////////////
00038 // NO DEPS ON s11n_node.hpp ALLOWED!
00039 ////////////////////////////////////////////////////////////////////////////////
00040 
00041 
00042 namespace s11n {
00043 
00044         namespace io {
00045 
00046         /**
00047            Returns an output stream for the given file
00048            name. Caller owns the returned pointer, which may
00049            be 0.
00050 
00051            The returned stream supports libzl and libbz2 if your
00052            libs11n is built with libs11n_zfstream support, meaning
00053            it can read files compressed with zlib/gzip or bz2lib.
00054         */
00055         std::ostream * get_ostream( const std::string name );
00056 
00057         /**
00058            Returns an input stream for the given file
00059            name. Caller owns the returned pointer, which may
00060            be 0.
00061 
00062            The returned stream supports libzl and libbz2 if
00063            your libs11n is built with libs11n_zfstream
00064            support. That means that if
00065            zfstream::compression_policy() is set, then the
00066            returned string might be a compressing stream.
00067 
00068            If ExternalData is false then name is assumed to be
00069            a string containing input, and a string-reading stream
00070            is returned.
00071         */
00072         std::istream * get_istream( const std::string name, bool ExternalData = true );
00073 
00074                 /**
00075                    Convenience function for grabbing the first line of a file.
00076 
00077                    If ExternalData == true then returns the first line of the
00078                    file, else returns up to the first newline of src.
00079 
00080            See get_magic_cookie( istream & ) for notes on a minor functional
00081            change introduced in version 1.2.1.
00082                 */
00083                 std::string get_magic_cookie( const std::string & src, bool ExternalData = true );
00084 
00085                 /**
00086                    Convenience function for grabbing the first line of a
00087                    stream.
00088 
00089                    Returns the first line of the given stream, or an
00090                    empty string on error.
00091 
00092            As of version 1.2.1, this function behaves slightly
00093            differently than prior versions: the returned
00094            string will be the first consecutive non-control
00095            characters in the line.  This allows us to properly
00096            read some binary formats which use a string
00097            identifier as a magic cookie (e.g.  sqlite
00098            databases). In this context "control characters"
00099            are anything outside the range of ASCII values
00100            [32..126]. This change "shouldn't" affect any
00101            pre-1.2.1 behaviours, which were never tested/used
00102            with binary file formats.
00103                 */
00104                 std::string get_magic_cookie( std::istream & is );
00105 
00106                 /**
00107                    data_node_serializer provides an interface for
00108                    saving/loading a given abstract data node type
00109                    to/from streams.
00110 
00111                    It is designed for containers which comply with
00112                    s11n's Data Node interface and conventions.
00113 
00114 
00115                    Conventions:
00116 
00117                    Must provide:
00118 
00119                    typedef NodeT node_type
00120 
00121                    Two de/serialize functions, following the
00122                    stream-based interface shown here (filename-based
00123                    variants are optional, but convenient for clients).
00124 
00125 
00126            Potential TODOs for 1.3/1.4:
00127 
00128            - Add cancel() and cancelled() to set/query the
00129            read state. This is to support Cancel operations in
00130            UIs which load slow-loading (sqlite3) formats or
00131            large sets and want to safely cancel. Once
00132            cancelled, a read is not restartable (or this is
00133            not guaranteed). All we can really do is provide a
00134            flag and conventions and hope implementations
00135            respect them.
00136 
00137            - Provide some sort of progress feedback mechanism,
00138            at least for reading, again to support users of
00139            slow Serializers. This is complicated by the
00140            unknown-read-size nature of Serializers.
00141                 */
00142                 template <typename NodeT>
00143                 class S11N_EXPORT_API data_node_serializer
00144                 {
00145                 public:
00146                         /**
00147                            The underlying data type used to store
00148                            serialized data.
00149                         */
00150                         typedef NodeT node_type;
00151 
00152 
00153                         data_node_serializer()
00154                         {
00155                                 this->magic_cookie( "WARNING: magic_cookie() not set!" );
00156                                 // ^^^ subclasses must do this.
00157                 typedef ::s11n::node_traits<node_type> NTR;
00158                 NTR::name( this->metadata(), "serializer_metadata" );
00159                                 // this->metadata().name( "serializer_metadata" );
00160                         };
00161                         virtual ~data_node_serializer(){};
00162 
00163 
00164                         /**
00165                            A convenience typedef, mainly for subclasses.
00166                         */
00167                         typedef std::map<std::string,std::string> translation_map;
00168 
00169                         /**
00170                            Returns a map intended for use with
00171                            ::s11n::io::strtool::translate_entities().
00172                            
00173                            The default implementation returns an empty map.
00174                            
00175                            Subclasses should override this to return a translation
00176                            map, if they need one. The default map is empty.
00177 
00178                            Be aware that this may very well be called
00179                            post-main(), so subclasses should take that into
00180                            account and provide post-main()-safe maps!
00181                         */
00182                         virtual const translation_map & entity_translations() const
00183                         {
00184                                 typedef ::s11n::Detail::phoenix<translation_map,data_node_serializer<node_type> > TMap;
00185                                 return TMap::instance();
00186                         }
00187 
00188 
00189 
00190                         /**
00191                            Must be implemented to format node_type to the given ostream.
00192 
00193                            It should return true on success, false on error.
00194 
00195                            The default implementation always returns false.
00196 
00197                            Note that this function does not use
00198                            s11n::serialize() in any way, and is only
00199                            coincidentally related to it.
00200                         */
00201                         virtual bool serialize( const node_type & /*src*/, std::ostream & /*dest*/ )
00202                         {
00203                                 return false;
00204                         }
00205 
00206                        /**
00207                            Overloaded to save dest to the given filename.
00208 
00209                            The default implementation treats destfile
00210                            as a file name and passes the call on to
00211                            serialize(node_type,ostream).  The output
00212                            file is compressed if zfstream::compression_policy()
00213                            has been set to enable it.
00214 
00215                            Returns true on success, false on error.
00216 
00217                            This function is virtual so that
00218                            Serializers which do not deal with
00219                            i/ostreams (e.g., those which use a
00220                            database connection) can override it to
00221                            interpret destfile as, e.g., a
00222                            database-related string (e.g., connection,
00223                            db object name, or whatever).
00224 
00225                Fixed in 1.0.2: returns false when destfile
00226                is empty.
00227                         */
00228                         virtual bool serialize( const node_type & src, const std::string & destfile )
00229                         {
00230                 if( destfile.empty() ) return false;
00231                                 std::ostream * os = ::s11n::io::get_ostream( destfile );
00232                                 if( ! os ) return false;
00233                                 bool b = this->serialize( src, *os );
00234                                 delete( os );
00235                                 return b;
00236                         }
00237 
00238                         /**
00239                            Must be implemented to parse a node_type from the given istream.
00240 
00241                            It should return true on success, false on error.
00242 
00243                            The default implementation always returns 0 and does nothing.
00244 
00245                            Note that this function does not use
00246                            s11n::deserialize() in any way, and is only
00247                            coincidentally related to it.
00248 
00249                Subclasses should try not to have to buffer
00250                the whole stream before parsing, because
00251                object trees can be arbitrarily large and a
00252                buffered copy effectively doubles the
00253                memory needed to store the tree during the
00254                deserialization process. Buffering
00255                behaviour is unspecified by this interface,
00256                however, and subclasses may pre-buffer the
00257                whole stream content if they need to.
00258                         */
00259                         virtual node_type * deserialize( std::istream & )
00260                         {
00261                                 return 0;
00262                         }
00263 
00264 
00265                         /**
00266                            Overloaded to load dest from the given filename.
00267 
00268                            It supports zlib/bz2lib decompression for
00269                            files if your s11n lib supports them.
00270 
00271                            This is virtual for the same reason as
00272                            serialize(string).
00273 
00274                         */
00275                         virtual node_type * deserialize( const std::string & src )
00276                         {
00277                                 typedef std::auto_ptr<std::istream> AP;
00278                                 AP is = AP( ::s11n::io::get_istream( src ) );
00279                                 if( ! is.get() ) return 0;
00280                                 return this->deserialize( *is );
00281                         }
00282 
00283 
00284                         /**
00285                            Gets this object's magic cookie.
00286 
00287                            Cookies are registered with
00288                            <code>class_loader< data_node_serializer<NodeType> ></code>
00289                            types to map files to file input parsers.
00290                         */
00291                         std::string magic_cookie() const
00292                         {
00293                                 return this->m_cookie;
00294                         }
00295 
00296                 protected:
00297                         /**
00298                            Sets the magic cookie for this type.
00299                         */
00300                         void magic_cookie( const std::string & c )
00301                         {
00302                                 this->m_cookie = c;
00303                         }
00304 
00305                         /**
00306                            metadata is an experimental feature
00307                            allowing serializers to store arbitrary
00308                            serializer-specific information in their
00309                            data steams.
00310                          */
00311                         node_type & metadata()
00312                         { return this->m_meta; }
00313                         /**
00314                            A const overload of metadata().
00315                          */
00316                         const node_type & metadata() const
00317                         { return this->m_meta;}
00318                 private:
00319                         std::string m_cookie;
00320                         node_type m_meta;
00321                 }; // data_node_serializer<>
00322 
00323         /**
00324            Tries to guess which Serializer can be used to read
00325            is. Returns an instance of that type on success or
00326            0 on error. It may propagate exceptions.
00327 
00328            Added in 1.2.1.
00329         */
00330         template <typename NodeType>
00331         data_node_serializer<NodeType> * guess_serializer( std::istream & is )
00332         {
00333             typedef data_node_serializer<NodeType> ST;
00334             ST * ser = 0;
00335                         std::string cookie;
00336                         // CERR << "cookie="<<cookie<<std::endl;
00337 #if 1
00338             cookie = get_magic_cookie( is );
00339 #else
00340                         if( ! std::getline( is, cookie ) )
00341             {
00342                 CERR << "Odd: got a null cookie from the istream.\n";
00343                                 return 0; // happens post-main() on valid streams sometimes!?!?!
00344                         }
00345 #endif
00346             if( cookie.empty() ) return 0;
00347             std::string opencmd = "#s11n::io::serializer ";
00348             std::string::size_type at = cookie.find( opencmd );
00349             if( std::string::npos == at )
00350             { // try new approach, added in 1.1.0:
00351                 opencmd = "#!/s11n/io/serializer ";
00352                 at = cookie.find( opencmd );
00353             }
00354 
00355             if( 0 == at )
00356             {
00357                 std::string dll = cookie.substr( opencmd.size() );
00358                 ser = ::s11n::cl::classload<ST>( dll );
00359             }
00360             else
00361             {
00362                 ser =  ::s11n::cl::classload<ST>( cookie );
00363             }
00364 //          if( ! ser )
00365 //          {
00366 //              CERR << "Did not find serializer for cookie ["<<cookie<<"]."<<std::endl;
00367 //              return 0;
00368 //          }
00369             return ser;
00370         }
00371 
00372         /**
00373            An overload which assumes infile is a local file.
00374 
00375            Added in 1.2.1.
00376         */
00377         template <typename NodeType>
00378         data_node_serializer<NodeType> * guess_serializer( std::string const & infile )
00379         {
00380             std::auto_ptr<std::istream> is( get_istream( infile.c_str() ) );
00381             return is.get()
00382                 ? guess_serializer<NodeType>( *is )
00383                 : 0;
00384         }
00385 
00386                 /**
00387 
00388                 Tries to load a NodeType object from the given
00389                 node. It uses the cookie from the input stream (the
00390                 first line) and uses
00391                 s11n::cl::classload<SerializerBaseType>() to find a
00392                 matching Serializer.
00393 
00394                 On error 0 is returned or an exception is thrown,
00395                 else a new pointer, which the caller owns.
00396 
00397                 Achtung: the first line of input from the input stream
00398                 is consumed by this function (to find the cookie), and
00399                 the cookie is not passed on to the handler! The only
00400                 reliable way around this [that i know of] is to buffer
00401                 the whole input as a string, and i don't wanna do that
00402                 (it's really bad for massive data files).
00403 
00404                 ACHTUNG: Only usable for loading ROOT nodes.
00405 
00406                 Special feature:
00407 
00408                 If the first line of the stream is
00409                 "#s11n::io::serializer CLASSNAME" then the CLASSNAME
00410                 token is expected to be a Serializer class name. This
00411                 function will try to classload that object.  If
00412                 successful it will use that type to deserialize the
00413                 input stream. If that fails, it will return 0.
00414         The intention of this feature is to simplify creation
00415         of non-C++ tools which generate s11n data (e.g., perl
00416         scripts), so that they don't need to know the exact
00417         cookies.
00418                 */
00419                 template <typename NodeType>
00420                 NodeType *
00421                 load_node_classload_serializer( std::istream & is )
00422                 {
00423                         typedef data_node_serializer<NodeType> ST;
00424             ST * ser = guess_serializer<NodeType>( is );
00425             if( ! ser ) return 0;
00426             try
00427             {
00428                 return ser->deserialize( is );
00429             }
00430             catch( const s11n_exception & sex )
00431             {
00432                 throw sex;
00433             }
00434             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00435             {
00436                 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ );
00437             }
00438             catch( ... )
00439             {
00440                 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."),
00441                                 __FILE__, __LINE__ );
00442             }
00443             return 0;
00444                 }
00445 
00446         /**
00447            Overloaded to take a filename. This is handled
00448            separately from the stream overload because some
00449            Serializers must behave differently in the face of
00450            streams. e.g., db-based Serializers typically can't
00451            deal with streams.
00452 
00453            Added in 1.2.1.
00454         */
00455                 template <typename NodeType>
00456                 NodeType *
00457                 load_node_classload_serializer( std::string const & src )
00458                 {
00459                         typedef data_node_serializer<NodeType> ST;
00460             ST * ser = guess_serializer<NodeType>( src );
00461             if( ! ser ) return 0;
00462             try
00463             {
00464                 return ser->deserialize( src );
00465             }
00466             catch( const s11n_exception & sex )
00467             {
00468                 throw sex;
00469             }
00470             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00471             {
00472                 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ );
00473             }
00474             catch( ... )
00475             {
00476                 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."),
00477                                 __FILE__, __LINE__ );
00478             }
00479             return 0;
00480                 }
00481 
00482 
00483                 /**
00484                    Returns a node pointer, parsed from the given stream, using
00485                    <code>s11n::io::data_node_serializer<NodeType></code>
00486                    as the base type for looking up a stream handler.
00487 
00488                    ACHTUNG: Only usable for loading ROOT nodes.
00489                 */
00490                 template <typename NodeType>
00491                 NodeType * load_node( std::istream & is )
00492                 {
00493                         return load_node_classload_serializer< NodeType >( is );
00494                 }
00495 
00496                 /**
00497                    Overloaded form of load_node( istream ), provided for
00498                    convenience.
00499 
00500                    If ExternalData is true, input is treated as a file,
00501                    otherwise it is treated as a string containing input
00502                    to parse.
00503 
00504                    ACHTUNG: Only usable for loading ROOT nodes.
00505 
00506            Behaviour change in 1.2.1:
00507 
00508            If (ExternalData) then this call is eventually
00509            passed to ASerializer->deserialize(src). In
00510            previous versions, src was "converted" to a stream
00511            and passed to ASerializer->deserialize(istream),
00512            which does not work for some Serializers. This was
00513            fixed in 1.2.1 to allow the sqlite3 add-on to play
00514            along more transparently with s11nconvert and s11nbrowser.
00515                 */
00516                 template <typename NodeType>
00517                 NodeType * load_node( const std::string & src, bool ExternalData = true )
00518                 {
00519             if( ! ExternalData )
00520             {
00521                 typedef std::auto_ptr<std::istream> AP;
00522                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00523                 if( ! is.get() ) return 0;
00524                 return load_node<NodeType>( *is );
00525             }
00526             return load_node_classload_serializer<NodeType>( src );
00527                 }
00528 
00529                 /**
00530                    Tries to load a SerializableT from the given stream.
00531                    On success returns a new object, else 0.
00532 
00533                    The caller owns the returned pointer.
00534 
00535                    ACHTUNG: Only usable for loading ROOT nodes.
00536                 */
00537                 template <typename NodeT,typename SerializableT>
00538                 SerializableT * load_serializable( std::istream & src )
00539                 {
00540                         typedef std::auto_ptr<NodeT> AP;
00541                         AP node( load_node<NodeT>( src ) );
00542                         if( ! node.get() )
00543                         {
00544                                 CERR << "load_serializable<>(istream) Could not load a root node from the input.\n";
00545                                 return 0;
00546                         }
00547                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00548                 }
00549 
00550                 /**
00551                    An overloaded form which takes an input string. If
00552                    ExternalData is true the string is treated as a file
00553                    name, otherwise it is processed as an input stream.
00554 
00555                    ACHTUNG: Only usable for loading ROOT nodes.
00556 
00557            Behaviour chagne in 1.2.1 when (ExternalData):
00558            load_node(string) is used to load the snode tree,
00559            as opposed to load_node(stream). This change was to
00560            allow non-stream-friendly Serializers (e.g.,
00561            DB-based) to integrate more fully into s11n.
00562                 */
00563                 template <typename NodeT,typename SerializableT>
00564                 SerializableT * load_serializable( const std::string & src, bool ExternalData = true )
00565                 {
00566             if( ! ExternalData )
00567             {
00568                 typedef std::auto_ptr<std::istream> AP;
00569                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00570                 if( ! is.get() )
00571                 {
00572                     // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00573                     return 0;
00574                 }
00575                 return load_serializable<NodeT,SerializableT>( *is );
00576             }
00577                         typedef std::auto_ptr<NodeT> AP;
00578                         AP node( load_node<NodeT>( src ) );
00579                         if( ! node.get() )
00580                         {
00581                                 // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00582                                 return 0;
00583                         }
00584                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00585         }
00586 
00587         } // namespace io
00588 
00589 } // namespace s11n
00590 
00591 #endif // s11n_DATA_NODE_IO_H_INCLUDED

Generated on Sat Dec 10 13:38:25 2005 for libs11n-1.2.1 by  doxygen 1.4.4