data_node_io.hpp

Go to the documentation of this file.
00001 #ifndef s11n_DATA_NODE_IO_H_INCLUDED
00002 #define s11n_DATA_NODE_IO_H_INCLUDED
00003 
00004 ////////////////////////////////////////////////////////////////////////
00005 // data_node_io.hpp
00006 // some i/o interfaces & helpers for s11n
00007 // License: Public Domain
00008 // Author: stephan@s11n.net
00009 ////////////////////////////////////////////////////////////////////////
00010 
00011 
00012 #include <string>
00013 #include <sstream>
00014 #include <list>
00015 #include <map>
00016 #include <deque>
00017 #include <iostream>
00018 #include <memory>// auto_ptr
00019 
00020 #include <cassert>
00021 #include <typeinfo>
00022 
00023 
00024 
00025 // #include <s11n.net/cl/cllite.hpp> // for opening DLLs
00026 
00027 #include <s11n.net/s11n/phoenix.hpp> // phoenix class
00028 
00029 #include <s11n.net/s11n/exception.hpp>
00030 #include <s11n.net/s11n/s11n_debuggering_macros.hpp> // COUT/CERR
00031 #include <s11n.net/s11n/classload.hpp> // classloader()
00032 #include <s11n.net/s11n/serialize.hpp> // unfortunately dep
00033 #include <s11n.net/s11n/traits.hpp> // s11n_traits & node_traits
00034 
00035 #include <s11n.net/s11n/export.hpp> // for exporting symbols to DLL
00036 
00037 ////////////////////////////////////////////////////////////////////////////////
00038 // NO DEPS ON s11n_node.hpp ALLOWED!
00039 ////////////////////////////////////////////////////////////////////////////////
00040 
00041 
00042 #define s11n_SERIALIZER_ENABLE_INTERACTIVE 0 /* an experiment. */
00043 
00044 namespace s11n {
00045 
00046         namespace io {
00047 
00048         /**
00049            Returns an output stream for the given file
00050            name. Caller owns the returned pointer, which may
00051            be 0.
00052 
00053            The returned stream supports libzl and libbz2 if your
00054            libs11n is built with libs11n_zfstream support, meaning
00055            it can read files compressed with zlib/gzip or bz2lib.
00056         */
00057         std::ostream * get_ostream( const std::string name );
00058 
00059         /**
00060            Returns an input stream for the given file
00061            name. Caller owns the returned pointer, which may
00062            be 0.
00063 
00064            The returned stream supports libzl and libbz2 if
00065            your libs11n is built with libs11n_zfstream
00066            support. That means that if
00067            zfstream::compression_policy() is set, then the
00068            returned string might be a compressing stream.
00069 
00070            If ExternalData is false then name is assumed to be
00071            a string containing input, and a string-reading stream
00072            is returned.
00073         */
00074         std::istream * get_istream( const std::string name, bool ExternalData = true );
00075 
00076                 /**
00077                    Convenience function for grabbing the first line of a file.
00078 
00079                    If ExternalData == true then returns the first line of the
00080                    file, else returns up to the first newline of src.
00081 
00082            See get_magic_cookie( istream & ) for notes on a minor functional
00083            change introduced in version 1.2.1.
00084                 */
00085                 std::string get_magic_cookie( const std::string & src, bool ExternalData = true );
00086 
00087                 /**
00088                    Convenience function for grabbing the first line of a
00089                    stream.
00090 
00091                    Returns the first line of the given stream, or an
00092                    empty string on error.
00093 
00094            As of version 1.2.1, this function behaves slightly
00095            differently than prior versions: the returned
00096            string will be the first consecutive non-control
00097            characters in the line.  This allows us to properly
00098            read some binary formats which use a string
00099            identifier as a magic cookie (e.g.  sqlite
00100            databases). In this context "control characters"
00101            are anything outside the range of ASCII values
00102            [32..126]. This change "shouldn't" affect any
00103            pre-1.2.1 behaviours, which were never tested/used
00104            with binary file formats.
00105                 */
00106                 std::string get_magic_cookie( std::istream & is );
00107 
00108 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00109         struct progress_reporter
00110         {
00111             progress_reporter() {}
00112             virtual ~progress_reporter() {}
00113             virtual void operator()( size_t pos, size_t total ) = 0;
00114         };
00115 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00116 
00117 
00118                 /**
00119                    data_node_serializer provides an interface for
00120                    saving/loading a given abstract data node type
00121                    to/from streams.
00122 
00123                    It is designed for containers which comply with
00124                    s11n's Data Node interface and conventions.
00125 
00126 
00127                    Conventions:
00128 
00129                    Must provide:
00130 
00131                    typedef NodeT node_type
00132 
00133                    Two de/serialize functions, following the
00134                    stream-based interface shown here (filename-based
00135                    variants are optional, but convenient for clients).
00136 
00137 
00138            Potential TODOs for 1.3/1.4:
00139 
00140            - Add cancel() and cancelled() to set/query the
00141            read state. This is to support Cancel operations in
00142            UIs which load slow-loading (sqlite3) formats or
00143            large sets and want to safely cancel. Once
00144            cancelled, a read is not restartable (or this is
00145            not guaranteed). All we can really do is provide a
00146            flag and conventions and hope implementations
00147            respect them.
00148 
00149            - Provide some sort of progress feedback mechanism,
00150            at least for reading, again to support users of
00151            slow Serializers. This is complicated by the
00152            unknown-read-size nature of Serializers.
00153                 */
00154                 template <typename NodeT>
00155                 class S11N_EXPORT_API data_node_serializer
00156                 {
00157                 public:
00158                         /**
00159                            The underlying data type used to store
00160                            serialized data.
00161                         */
00162                         typedef NodeT node_type;
00163 
00164 
00165                         data_node_serializer()
00166                         {
00167                                 this->magic_cookie( "WARNING: magic_cookie() not set!" );
00168                                 // ^^^ subclasses must do this.
00169                 typedef ::s11n::node_traits<node_type> NTR;
00170                 NTR::name( this->metadata(), "serializer_metadata" );
00171                                 // this->metadata().name( "serializer_metadata" );
00172                 using namespace s11n::debug;
00173                 S11N_TRACE(TRACE_CTOR) << "data_node_serialier()\n";
00174 
00175                         };
00176                         virtual ~data_node_serializer()
00177             {
00178                 using namespace s11n::debug;
00179                 S11N_TRACE(TRACE_DTOR) << "~data_node_serialier() ["<<this->magic_cookie()<<"]\n";
00180             }
00181 
00182 
00183                         /**
00184                            A convenience typedef, mainly for subclasses.
00185                         */
00186                         typedef std::map<std::string,std::string> translation_map;
00187 
00188                         /**
00189                            Returns a map intended for use with
00190                            ::s11n::io::strtool::translate_entities().
00191                            
00192                            The default implementation returns an empty map.
00193                            
00194                            Subclasses should override this to return a translation
00195                            map, if they need one. The default map is empty.
00196 
00197                            Be aware that this may very well be called
00198                            post-main(), so subclasses should take that into
00199                            account and provide post-main()-safe maps!
00200                         */
00201                         virtual const translation_map & entity_translations() const
00202                         {
00203                                 typedef ::s11n::Detail::phoenix<translation_map,data_node_serializer<node_type> > TMap;
00204                                 return TMap::instance();
00205                         }
00206 
00207 
00208 
00209                         /**
00210                            Must be implemented to format node_type to the given ostream.
00211 
00212                            It should return true on success, false on error.
00213 
00214                            The default implementation always returns false.
00215 
00216                            Note that this function does not use
00217                            s11n::serialize() in any way, and is only
00218                            coincidentally related to it.
00219                         */
00220                         virtual bool serialize( const node_type & /*src*/, std::ostream & /*dest*/ )
00221                         {
00222                                 return false;
00223                         }
00224 
00225                        /**
00226                            Overloaded to save dest to the given filename.
00227 
00228                            The default implementation treats destfile
00229                            as a file name and passes the call on to
00230                            serialize(node_type,ostream).  The output
00231                            file is compressed if zfstream::compression_policy()
00232                            has been set to enable it.
00233 
00234                            Returns true on success, false on error.
00235 
00236                            This function is virtual so that
00237                            Serializers which do not deal with
00238                            i/ostreams (e.g., those which use a
00239                            database connection) can override it to
00240                            interpret destfile as, e.g., a
00241                            database-related string (e.g., connection,
00242                            db object name, or whatever).
00243 
00244                Fixed in 1.0.2: returns false when destfile
00245                is empty.
00246                         */
00247                         virtual bool serialize( const node_type & src, const std::string & destfile )
00248                         {
00249                 if( destfile.empty() ) return false;
00250                                 std::ostream * os = ::s11n::io::get_ostream( destfile );
00251                                 if( ! os ) return false;
00252                                 bool b = this->serialize( src, *os );
00253                                 delete( os );
00254                                 return b;
00255                         }
00256 
00257                         /**
00258                            Must be implemented to parse a node_type from the given istream.
00259 
00260                            It should return true on success, false on error.
00261 
00262                            The default implementation always returns 0 and does nothing.
00263 
00264                            Note that this function does not use
00265                            s11n::deserialize() in any way, and is only
00266                            coincidentally related to it.
00267 
00268                Subclasses should try not to have to buffer
00269                the whole stream before parsing, because
00270                object trees can be arbitrarily large and a
00271                buffered copy effectively doubles the
00272                memory needed to store the tree during the
00273                deserialization process. Buffering
00274                behaviour is unspecified by this interface,
00275                however, and subclasses may pre-buffer the
00276                whole stream content if they need to.
00277                         */
00278                         virtual node_type * deserialize( std::istream & )
00279                         {
00280                                 return 0;
00281                         }
00282 
00283 
00284                         /**
00285                            Overloaded to load dest from the given filename.
00286 
00287                            It supports zlib/bz2lib decompression for
00288                            files if your s11n lib supports them.
00289 
00290                            This is virtual for the same reason as
00291                            serialize(string).
00292 
00293                         */
00294                         virtual node_type * deserialize( const std::string & src )
00295                         {
00296                                 typedef std::auto_ptr<std::istream> AP;
00297                                 AP is = AP( ::s11n::io::get_istream( src ) );
00298                                 if( ! is.get() ) return 0;
00299                                 return this->deserialize( *is );
00300                         }
00301 
00302 
00303                         /**
00304                            Gets this object's magic cookie.
00305 
00306                            Cookies are registered with
00307                            <code>class_loader< data_node_serializer<NodeType> ></code>
00308                            types to map files to file input parsers.
00309                         */
00310                         std::string magic_cookie() const
00311                         {
00312                                 return this->m_cookie;
00313                         }
00314 
00315 
00316 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00317             bool is_cancelled() const { return m_cancelled; }
00318             void cancel() { this->m_cancelled = true; }
00319 
00320             node_type * deserialize( std::string const & src, progress_reporter & p )
00321             {
00322                 this->m_prog = &p;
00323                 node_type * n = 0;
00324                 try
00325                 {
00326                     n = this->deserialize( src );
00327                     this->m_prog = 0;
00328                 }
00329                 catch(...)
00330                 {
00331                     this->m_prog = 0;
00332                     throw;
00333                 }
00334                 return n;
00335             }
00336 
00337             node_type * deserialize( std::istream & src, progress_reporter & p )
00338             {
00339                 this->m_prog = &p;
00340                 node_type * n = 0;
00341                 try
00342                 {
00343                     n = this->deserialize( src );
00344                     this->m_prog = 0;
00345                 }
00346                 catch(...)
00347                 {
00348                     this->m_prog = 0;
00349                     throw;
00350                 }
00351                 return n;
00352             }
00353 
00354                         bool serialize( const node_type & src, std::ostream & dest, progress_reporter & p )
00355                         {
00356                 this->m_prog = &p;
00357                 bool b = false;
00358                 try
00359                 {
00360                     b = this->serialize( src, dest );
00361                     this->m_prog = 0;
00362                 }
00363                 catch(...)
00364                 {
00365                     this->m_prog = 0;
00366                     throw;
00367                 }
00368                 return b;
00369                         }
00370 
00371                         bool serialize( const node_type & src, std::string const & dest, progress_reporter & p )
00372                         {
00373                 this->m_prog = &p;
00374                 bool b = false;
00375                 try
00376                 {
00377                     b = this->serialize( src, dest );
00378                     this->m_prog = 0;
00379                 }
00380                 catch(...)
00381                 {
00382                     this->m_prog = 0;
00383                     throw;
00384                 }
00385                 return b;
00386                         }
00387 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00388 
00389                 protected:
00390                         /**
00391                            Sets the magic cookie for this type.
00392                         */
00393                         void magic_cookie( const std::string & c )
00394                         {
00395                                 this->m_cookie = c;
00396                         }
00397 
00398                         /**
00399                            metadata is an experimental feature
00400                            allowing serializers to store arbitrary
00401                            serializer-specific information in their
00402                            data steams.
00403                          */
00404                         node_type & metadata()
00405                         { return this->m_meta; }
00406                         /**
00407                            A const overload of metadata().
00408                          */
00409                         const node_type & metadata() const
00410                         { return this->m_meta;}
00411 
00412 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00413             void progress( size_t pos, size_t total )
00414             {
00415                 if( this->m_prog )
00416                 {
00417                     this->m_prog->operator()( pos, total );
00418                 }
00419             }
00420             void clear_cancel() { this->m_cancelled = false; }
00421             void assert_not_cancelled()
00422             {
00423                 if( this->is_cancelled() )
00424                 {
00425                     throw ::s11n::s11n_exception("Serializer operation was cancelled.");
00426                 }
00427             }
00428 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00429 
00430                 private:
00431                         std::string m_cookie;
00432                         node_type m_meta;
00433 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00434             bool m_cancelled;
00435             progress_reporter * m_prog;
00436 #endif
00437                 }; // data_node_serializer<>
00438 
00439         /**
00440            Tries to guess which Serializer can be used to read
00441            is. Returns an instance of that type on success or
00442            0 on error. The caller owns the returned object. It
00443            may propagate exceptions.
00444 
00445            Achtung: the first line of input from the input
00446            stream is consumed by this function (to find the
00447            cookie), which means that if this stream is handed
00448            off to the object which this function returns, it
00449            won't get its own cookie.  The only reliable way
00450            around this [that i know of] is to buffer the whole
00451            input as a string, and i don't wanna do that (it's
00452            really bad for massive data files).
00453 
00454            Special feature:
00455            
00456            If the first line of the stream is
00457            "#s11n::io::serializer CLASSNAME" or
00458            "#!/s11n/io/serialize CLASSNAME" then the CLASSNAME
00459            token is expected to be a Serializer class
00460            name. This function will try to classload that
00461            object.  If successful it will use that type to
00462            deserialize the input stream. If that fails, it
00463            will return 0.  The intention of this feature is to
00464            simplify creation of non-C++ tools which generate
00465            s11n data (e.g., perl scripts), so that they don't
00466            need to know the exact cookies.
00467 
00468            Added in 1.2.1.
00469         */
00470         template <typename NodeType>
00471         data_node_serializer<NodeType> * guess_serializer( std::istream & is )
00472         {
00473             typedef data_node_serializer<NodeType> ST;
00474             ST * ser = 0;
00475                         std::string cookie;
00476                         // CERR << "cookie="<<cookie<<std::endl;
00477             cookie = get_magic_cookie( is );
00478             if( cookie.empty() ) return 0;
00479             std::string opencmd = "#s11n::io::serializer ";
00480             std::string::size_type at = cookie.find( opencmd );
00481             if( std::string::npos == at )
00482             { // try new approach, added in 1.1.0:
00483                 opencmd = "#!/s11n/io/serializer ";
00484                 at = cookie.find( opencmd );
00485             }
00486 
00487             if( 0 == at )
00488             {
00489                 std::string dll = cookie.substr( opencmd.size() );
00490                 ser = ::s11n::cl::classload<ST>( dll );
00491             }
00492             else
00493             {
00494                 ser =  ::s11n::cl::classload<ST>( cookie );
00495             }
00496             return ser;
00497         }
00498 
00499         /**
00500            An overload which assumes infile is a local file.
00501 
00502            Added in 1.2.1.
00503         */
00504         template <typename NodeType>
00505         data_node_serializer<NodeType> * guess_serializer( std::string const & infile )
00506         {
00507             std::auto_ptr<std::istream> is( get_istream( infile.c_str() ) );
00508             return is.get()
00509                 ? guess_serializer<NodeType>( *is )
00510                 : 0;
00511         }
00512 
00513                 /**
00514 
00515                 Tries to load a NodeType object from the given
00516                 node. It uses the cookie from the input stream (the
00517                 first line) and uses
00518                 s11n::cl::classload<SerializerBaseType>() to find a
00519                 matching Serializer.
00520 
00521                 On error 0 is returned or an exception is thrown,
00522                 else a new pointer, which the caller owns.
00523 
00524                 ACHTUNG: Only usable for loading ROOT nodes.
00525 
00526         See guess_serializer( std::istream & is ) for more
00527         information, as that function is used to dispatch the
00528         stream.
00529 
00530                 */
00531                 template <typename NodeType>
00532                 NodeType *
00533                 load_node_classload_serializer( std::istream & is )
00534                 {
00535             try
00536             {
00537                 typedef data_node_serializer<NodeType> ST;
00538                 std::auto_ptr<ST> ser( guess_serializer<NodeType>( is ) );
00539                 return ser.get()
00540                     ? ser->deserialize( is )
00541                     : 0;
00542             }
00543             catch( const s11n_exception & sex )
00544             {
00545                 throw sex;
00546             }
00547             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00548             {
00549                 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ );
00550             }
00551             catch( ... )
00552             {
00553                 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."),
00554                                 __FILE__, __LINE__ );
00555             }
00556             return 0;
00557                 }
00558 
00559         /**
00560            Overloaded to take a filename. This is handled
00561            separately from the stream overload because some
00562            Serializers must behave differently in the face of
00563            streams. e.g., db-based Serializers typically can't
00564            deal with streams.
00565 
00566            Added in 1.2.1.
00567         */
00568                 template <typename NodeType>
00569                 NodeType *
00570                 load_node_classload_serializer( std::string const & src )
00571                 {
00572             try
00573             {
00574                 typedef data_node_serializer<NodeType> ST;
00575                 std::auto_ptr<ST> ser( guess_serializer<NodeType>( src ) );
00576                 return ser.get()
00577                     ? ser->deserialize( src )
00578                     : 0;
00579             }
00580             catch( const s11n_exception & sex )
00581             {
00582                 throw sex;
00583             }
00584             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00585             {
00586                 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ );
00587             }
00588             catch( ... )
00589             {
00590                 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."),
00591                                 __FILE__, __LINE__ );
00592             }
00593             return 0;
00594                 }
00595 
00596 
00597                 /**
00598                    Returns a node pointer, parsed from the given stream, using
00599                    <code>s11n::io::data_node_serializer<NodeType></code>
00600                    as the base type for looking up a stream handler.
00601 
00602                    ACHTUNG: Only usable for loading ROOT nodes.
00603                 */
00604                 template <typename NodeType>
00605                 NodeType * load_node( std::istream & is )
00606                 {
00607                         return load_node_classload_serializer< NodeType >( is );
00608                 }
00609 
00610                 /**
00611                    Overloaded form of load_node( istream ), provided for
00612                    convenience.
00613 
00614                    If ExternalData is true, input is treated as a file,
00615                    otherwise it is treated as a string containing input
00616                    to parse.
00617 
00618                    ACHTUNG: Only usable for loading ROOT nodes.
00619 
00620            Behaviour change in 1.2.1:
00621 
00622            If (ExternalData) then this call is eventually
00623            passed to ASerializer->deserialize(src). In
00624            previous versions, src was "converted" to a stream
00625            and passed to ASerializer->deserialize(istream),
00626            which does not work for some Serializers. This was
00627            fixed in 1.2.1 to allow the sqlite3 add-on to play
00628            along more transparently with s11nconvert and s11nbrowser.
00629                 */
00630                 template <typename NodeType>
00631                 NodeType * load_node( const std::string & src, bool ExternalData = true )
00632                 {
00633             if( ! ExternalData )
00634             {
00635                 typedef std::auto_ptr<std::istream> AP;
00636                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00637                 if( ! is.get() ) return 0;
00638                 return load_node<NodeType>( *is );
00639             }
00640             return load_node_classload_serializer<NodeType>( src );
00641                 }
00642 
00643                 /**
00644                    Tries to load a SerializableT from the given stream.
00645                    On success returns a new object, else 0.
00646 
00647                    The caller owns the returned pointer.
00648 
00649                    ACHTUNG: Only usable for loading ROOT nodes.
00650                 */
00651                 template <typename NodeT,typename SerializableT>
00652                 SerializableT * load_serializable( std::istream & src )
00653                 {
00654                         typedef std::auto_ptr<NodeT> AP;
00655                         AP node( load_node<NodeT>( src ) );
00656                         if( ! node.get() )
00657                         {
00658                                 CERR << "load_serializable<>(istream) Could not load a root node from the input.\n";
00659                                 return 0;
00660                         }
00661                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00662                 }
00663 
00664                 /**
00665                    An overloaded form which takes an input string. If
00666                    ExternalData is true the string is treated as a file
00667                    name, otherwise it is processed as an input stream.
00668 
00669                    ACHTUNG: Only usable for loading ROOT nodes.
00670 
00671            Behaviour chagne in 1.2.1 when (ExternalData):
00672            load_node(string) is used to load the snode tree,
00673            as opposed to load_node(stream). This change was to
00674            allow non-stream-friendly Serializers (e.g.,
00675            DB-based) to integrate more fully into s11n.
00676                 */
00677                 template <typename NodeT,typename SerializableT>
00678                 SerializableT * load_serializable( const std::string & src, bool ExternalData = true )
00679                 {
00680             if( ! ExternalData )
00681             {
00682                 typedef std::auto_ptr<std::istream> AP;
00683                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00684                 if( ! is.get() )
00685                 {
00686                     // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00687                     return 0;
00688                 }
00689                 return load_serializable<NodeT,SerializableT>( *is );
00690             }
00691                         typedef std::auto_ptr<NodeT> AP;
00692                         AP node( load_node<NodeT>( src ) );
00693                         if( ! node.get() )
00694                         {
00695                                 // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00696                                 return 0;
00697                         }
00698                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00699         }
00700 
00701         } // namespace io
00702 
00703 } // namespace s11n
00704 
00705 #endif // s11n_DATA_NODE_IO_H_INCLUDED

Generated on Thu Feb 8 10:25:27 2007 for libs11n-1.2.5 by  doxygen 1.5.0