data_node_io.hpp

Go to the documentation of this file.
00001 #ifndef s11n_DATA_NODE_IO_H_INCLUDED
00002 #define s11n_DATA_NODE_IO_H_INCLUDED
00003 
00004 ////////////////////////////////////////////////////////////////////////
00005 // data_node_io.hpp
00006 // some i/o interfaces & helpers for s11n
00007 // License: Public Domain
00008 // Author: stephan@s11n.net
00009 ////////////////////////////////////////////////////////////////////////
00010 
00011 
00012 #include <string>
00013 #include <sstream>
00014 #include <list>
00015 #include <map>
00016 #include <deque>
00017 #include <iostream>
00018 #include <memory>// auto_ptr
00019 
00020 #include <cassert>
00021 #include <typeinfo>
00022 
00023 
00024 
00025 // #include <s11n.net/cl/cllite.hpp> // for opening DLLs
00026 
00027 #include <s11n.net/s11n/phoenix.hpp> // phoenix class
00028 
00029 #include <s11n.net/s11n/exception.hpp>
00030 #include <s11n.net/s11n/s11n_debuggering_macros.hpp> // COUT/CERR
00031 #include <s11n.net/s11n/classload.hpp> // classloader()
00032 #include <s11n.net/s11n/serialize.hpp> // unfortunately dep
00033 #include <s11n.net/s11n/traits.hpp> // s11n_traits & node_traits
00034 
00035 #include <s11n.net/s11n/export.hpp> // for exporting symbols to DLL
00036 
00037 ////////////////////////////////////////////////////////////////////////////////
00038 // NO DEPS ON s11n_node.hpp ALLOWED!
00039 ////////////////////////////////////////////////////////////////////////////////
00040 
00041 
00042 #define s11n_SERIALIZER_ENABLE_INTERACTIVE 0 /* an experiment. */
00043 
00044 namespace s11n {
00045 
00046         namespace io {
00047 
00048         /**
00049            Returns an output stream for the given file
00050            name. Caller owns the returned pointer, which may
00051            be 0.
00052 
00053            The returned stream supports libzl and libbz2 if your
00054            libs11n is built with libs11n_zfstream support, meaning
00055            it can read files compressed with zlib/gzip or bz2lib.
00056         */
00057         std::ostream * get_ostream( const std::string name );
00058 
00059         /**
00060            Returns an input stream for the given file
00061            name. Caller owns the returned pointer, which may
00062            be 0.
00063 
00064            The returned stream supports libzl and libbz2 if
00065            your libs11n is built with libs11n_zfstream
00066            support. That means that if
00067            zfstream::compression_policy() is set, then the
00068            returned string might be a compressing stream.
00069 
00070            If ExternalData is false then name is assumed to be
00071            a string containing input, and a string-reading stream
00072            is returned.
00073         */
00074         std::istream * get_istream( const std::string name, bool ExternalData = true );
00075 
00076                 /**
00077                    Convenience function for grabbing the first line of a file.
00078 
00079                    If ExternalData == true then returns the first line of the
00080                    file, else returns up to the first newline of src.
00081 
00082            See get_magic_cookie( istream & ) for notes on a minor functional
00083            change introduced in version 1.2.1.
00084                 */
00085                 std::string get_magic_cookie( const std::string & src, bool ExternalData = true );
00086 
00087                 /**
00088                    Convenience function for grabbing the first line of a
00089                    stream.
00090 
00091                    Returns the first line of the given stream, or an
00092                    empty string on error.
00093 
00094            As of version 1.2.1, this function behaves slightly
00095            differently than prior versions: the returned
00096            string will be the first consecutive non-control
00097            characters in the line.  This allows us to properly
00098            read some binary formats which use a string
00099            identifier as a magic cookie (e.g.  sqlite
00100            databases). In this context "control characters"
00101            are anything outside the range of ASCII values
00102            [32..126]. This change "shouldn't" affect any
00103            pre-1.2.1 behaviours, which were never tested/used
00104            with binary file formats.
00105                 */
00106                 std::string get_magic_cookie( std::istream & is );
00107 
00108 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00109         struct progress_reporter
00110         {
00111             progress_reporter() {}
00112             virtual ~progress_reporter() {}
00113             virtual void operator()( size_t pos, size_t total ) = 0;
00114         };
00115 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00116 
00117 
00118                 /**
00119                    data_node_serializer provides an interface for
00120                    saving/loading a given abstract data node type
00121                    to/from streams.
00122 
00123                    It is designed for containers which comply with
00124                    s11n's Data Node interface and conventions.
00125 
00126 
00127                    Conventions:
00128 
00129                    Must provide:
00130 
00131                    typedef NodeT node_type
00132 
00133                    Two de/serialize functions, following the
00134                    stream-based interface shown here (filename-based
00135                    variants are optional, but convenient for clients).
00136 
00137 
00138            Potential TODOs for 1.3/1.4:
00139 
00140            - Add cancel() and cancelled() to set/query the
00141            read state. This is to support Cancel operations in
00142            UIs which load slow-loading (sqlite3) formats or
00143            large sets and want to safely cancel. Once
00144            cancelled, a read is not restartable (or this is
00145            not guaranteed). All we can really do is provide a
00146            flag and conventions and hope implementations
00147            respect them.
00148 
00149            - Provide some sort of progress feedback mechanism,
00150            at least for reading, again to support users of
00151            slow Serializers. This is complicated by the
00152            unknown-read-size nature of Serializers.
00153                 */
00154                 template <typename NodeT>
00155                 class S11N_EXPORT_API data_node_serializer
00156                 {
00157                 public:
00158                         /**
00159                            The underlying data type used to store
00160                            serialized data.
00161                         */
00162                         typedef NodeT node_type;
00163 
00164 
00165                         data_node_serializer()
00166                         {
00167                                 this->magic_cookie( "WARNING: magic_cookie() not set!" );
00168                                 // ^^^ subclasses must do this.
00169                 typedef ::s11n::node_traits<node_type> NTR;
00170                 NTR::name( this->metadata(), "serializer_metadata" );
00171                                 // this->metadata().name( "serializer_metadata" );
00172                 using namespace s11n::debug;
00173                 S11N_TRACE(TRACE_CTOR) << "data_node_serialier()\n";
00174 
00175                         };
00176                         virtual ~data_node_serializer()
00177             {
00178                 using namespace s11n::debug;
00179                 S11N_TRACE(TRACE_DTOR) << "~data_node_serialier() ["<<this->magic_cookie()<<"]\n";
00180             }
00181 
00182 
00183                         /**
00184                            A convenience typedef, mainly for subclasses.
00185                         */
00186                         typedef std::map<std::string,std::string> translation_map;
00187 
00188                         /**
00189                            Returns a map intended for use with
00190                            ::s11n::io::strtool::translate_entities().
00191                            
00192                            The default implementation returns an empty map.
00193                            
00194                            Subclasses should override this to return a translation
00195                            map, if they need one. The default map is empty.
00196 
00197                            Be aware that this may very well be called
00198                            post-main(), so subclasses should take that into
00199                            account and provide post-main()-safe maps!
00200                         */
00201                         virtual const translation_map & entity_translations() const
00202                         {
00203                                 typedef ::s11n::Detail::phoenix<translation_map,data_node_serializer<node_type> > TMap;
00204                                 return TMap::instance();
00205                         }
00206 
00207 
00208 
00209                         /**
00210                            Must be implemented to format node_type to the given ostream.
00211 
00212                            It should return true on success, false on error.
00213 
00214                            The default implementation always returns false.
00215 
00216                            Note that this function does not use
00217                            s11n::serialize() in any way, and is only
00218                            coincidentally related to it.
00219                         */
00220                         virtual bool serialize( const node_type & /*src*/, std::ostream & /*dest*/ )
00221                         {
00222                                 return false;
00223                         }
00224 
00225                        /**
00226                            Overloaded to save dest to the given filename.
00227 
00228                            The default implementation treats destfile
00229                            as a file name and passes the call on to
00230                            serialize(node_type,ostream).  The output
00231                            file is compressed if zfstream::compression_policy()
00232                            has been set to enable it.
00233 
00234                            Returns true on success, false on error.
00235 
00236                            This function is virtual so that
00237                            Serializers which do not deal with
00238                            i/ostreams (e.g., those which use a
00239                            database connection) can override it to
00240                            interpret destfile as, e.g., a
00241                            database-related string (e.g., connection,
00242                            db object name, or whatever).
00243 
00244                Fixed in 1.0.2: returns false when destfile
00245                is empty.
00246 
00247                Fixed in 1.2.6: if this->serialize(src,outstream) throws then
00248                this function no longer leaks.
00249                         */
00250                         virtual bool serialize( const node_type & src, const std::string & destfile )
00251                         {
00252                 if( destfile.empty() ) return false;
00253                                 std::auto_ptr<std::ostream> os( ::s11n::io::get_ostream( destfile ) );
00254                                 if( ! os.get() ) return false;
00255                                 bool b = this->serialize( src, *os );
00256                                 return b;
00257                         }
00258 
00259                         /**
00260                            Must be implemented to parse a node_type from the given istream.
00261 
00262                            It should return true on success, false on error.
00263 
00264                            The default implementation always returns 0 and does nothing.
00265 
00266                            Note that this function does not use
00267                            s11n::deserialize() in any way, and is only
00268                            coincidentally related to it.
00269 
00270                Subclasses should try not to have to buffer
00271                the whole stream before parsing, because
00272                object trees can be arbitrarily large and a
00273                buffered copy effectively doubles the
00274                memory needed to store the tree during the
00275                deserialization process. Buffering
00276                behaviour is unspecified by this interface,
00277                however, and subclasses may pre-buffer the
00278                whole stream content if they need to.
00279                         */
00280                         virtual node_type * deserialize( std::istream & )
00281                         {
00282                                 return 0;
00283                         }
00284 
00285 
00286                         /**
00287                            Overloaded to load dest from the given filename.
00288 
00289                            It supports zlib/bz2lib decompression for
00290                            files if your s11n lib supports them.
00291 
00292                            This is virtual for the same reason as
00293                            serialize(string).
00294 
00295                         */
00296                         virtual node_type * deserialize( const std::string & src )
00297                         {
00298                                 typedef std::auto_ptr<std::istream> AP;
00299                                 AP is = AP( ::s11n::io::get_istream( src ) );
00300                                 if( ! is.get() ) return 0;
00301                                 return this->deserialize( *is );
00302                         }
00303 
00304 
00305                         /**
00306                            Gets this object's magic cookie.
00307 
00308                            Cookies are registered with
00309                            <code>class_loader< data_node_serializer<NodeType> ></code>
00310                            types to map files to file input parsers.
00311                         */
00312                         std::string magic_cookie() const
00313                         {
00314                                 return this->m_cookie;
00315                         }
00316 
00317 
00318 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00319             bool is_cancelled() const { return m_cancelled; }
00320             void cancel() { this->m_cancelled = true; }
00321 
00322             node_type * deserialize( std::string const & src, progress_reporter & p )
00323             {
00324                 this->m_prog = &p;
00325                 node_type * n = 0;
00326                 try
00327                 {
00328                     n = this->deserialize( src );
00329                     this->m_prog = 0;
00330                 }
00331                 catch(...)
00332                 {
00333                     this->m_prog = 0;
00334                     throw;
00335                 }
00336                 return n;
00337             }
00338 
00339             node_type * deserialize( std::istream & src, progress_reporter & p )
00340             {
00341                 this->m_prog = &p;
00342                 node_type * n = 0;
00343                 try
00344                 {
00345                     n = this->deserialize( src );
00346                     this->m_prog = 0;
00347                 }
00348                 catch(...)
00349                 {
00350                     this->m_prog = 0;
00351                     throw;
00352                 }
00353                 return n;
00354             }
00355 
00356                         bool serialize( const node_type & src, std::ostream & dest, progress_reporter & p )
00357                         {
00358                 this->m_prog = &p;
00359                 bool b = false;
00360                 try
00361                 {
00362                     b = this->serialize( src, dest );
00363                     this->m_prog = 0;
00364                 }
00365                 catch(...)
00366                 {
00367                     this->m_prog = 0;
00368                     throw;
00369                 }
00370                 return b;
00371                         }
00372 
00373                         bool serialize( const node_type & src, std::string const & dest, progress_reporter & p )
00374                         {
00375                 this->m_prog = &p;
00376                 bool b = false;
00377                 try
00378                 {
00379                     b = this->serialize( src, dest );
00380                     this->m_prog = 0;
00381                 }
00382                 catch(...)
00383                 {
00384                     this->m_prog = 0;
00385                     throw;
00386                 }
00387                 return b;
00388                         }
00389 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00390 
00391                 protected:
00392                         /**
00393                            Sets the magic cookie for this type.
00394                         */
00395                         void magic_cookie( const std::string & c )
00396                         {
00397                                 this->m_cookie = c;
00398                         }
00399 
00400                         /**
00401                            metadata is an experimental feature
00402                            allowing serializers to store arbitrary
00403                            serializer-specific information in their
00404                            data steams.
00405                          */
00406                         node_type & metadata()
00407                         { return this->m_meta; }
00408                         /**
00409                            A const overload of metadata().
00410                          */
00411                         const node_type & metadata() const
00412                         { return this->m_meta;}
00413 
00414 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00415             void progress( size_t pos, size_t total )
00416             {
00417                 if( this->m_prog )
00418                 {
00419                     this->m_prog->operator()( pos, total );
00420                 }
00421             }
00422             void clear_cancel() { this->m_cancelled = false; }
00423             void assert_not_cancelled()
00424             {
00425                 if( this->is_cancelled() )
00426                 {
00427                     throw ::s11n::s11n_exception("Serializer operation was cancelled.");
00428                 }
00429             }
00430 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00431 
00432                 private:
00433                         std::string m_cookie;
00434                         node_type m_meta;
00435 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00436             bool m_cancelled;
00437             progress_reporter * m_prog;
00438 #endif
00439                 }; // data_node_serializer<>
00440 
00441         /**
00442            Tries to guess which Serializer can be used to read
00443            is. Returns an instance of that type on success or
00444            0 on error. The caller owns the returned object. It
00445            may propagate exceptions.
00446 
00447            Achtung: the first line of input from the input
00448            stream is consumed by this function (to find the
00449            cookie), which means that if this stream is handed
00450            off to the object which this function returns, it
00451            won't get its own cookie.  The only reliable way
00452            around this [that i know of] is to buffer the whole
00453            input as a string, and i don't wanna do that (it's
00454            really bad for massive data files).
00455 
00456            Special feature:
00457            
00458            If the first line of the stream is
00459            "#s11n::io::serializer CLASSNAME" or
00460            "#!/s11n/io/serialize CLASSNAME" then the CLASSNAME
00461            token is expected to be a Serializer class
00462            name. This function will try to classload that
00463            object.  If successful it will use that type to
00464            deserialize the input stream. If that fails, it
00465            will return 0.  The intention of this feature is to
00466            simplify creation of non-C++ tools which generate
00467            s11n data (e.g., perl scripts), so that they don't
00468            need to know the exact cookies.
00469 
00470            Added in 1.2.1.
00471         */
00472         template <typename NodeType>
00473         data_node_serializer<NodeType> * guess_serializer( std::istream & is )
00474         {
00475             typedef data_node_serializer<NodeType> ST;
00476             ST * ser = 0;
00477                         std::string cookie;
00478                         // CERR << "cookie="<<cookie<<std::endl;
00479             cookie = get_magic_cookie( is );
00480             if( cookie.empty() ) return 0;
00481             std::string opencmd = "#s11n::io::serializer ";
00482             std::string::size_type at = cookie.find( opencmd );
00483             if( std::string::npos == at )
00484             { // try new approach, added in 1.1.0:
00485                 opencmd = "#!/s11n/io/serializer ";
00486                 at = cookie.find( opencmd );
00487             }
00488 
00489             if( 0 == at )
00490             {
00491                 std::string dll = cookie.substr( opencmd.size() );
00492                 ser = ::s11n::cl::classload<ST>( dll );
00493             }
00494             else
00495             {
00496                 ser =  ::s11n::cl::classload<ST>( cookie );
00497             }
00498             return ser;
00499         }
00500 
00501         /**
00502            An overload which assumes infile is a local file.
00503 
00504            Added in 1.2.1.
00505         */
00506         template <typename NodeType>
00507         data_node_serializer<NodeType> * guess_serializer( std::string const & infile )
00508         {
00509             std::auto_ptr<std::istream> is( get_istream( infile.c_str() ) );
00510             return is.get()
00511                 ? guess_serializer<NodeType>( *is )
00512                 : 0;
00513         }
00514 
00515                 /**
00516 
00517                 Tries to load a NodeType object from the given
00518                 node. It uses the cookie from the input stream (the
00519                 first line) and uses
00520                 s11n::cl::classload<SerializerBaseType>() to find a
00521                 matching Serializer.
00522 
00523                 On error 0 is returned or an exception is thrown,
00524                 else a new pointer, which the caller owns.
00525 
00526                 ACHTUNG: Only usable for loading ROOT nodes.
00527 
00528         See guess_serializer( std::istream & is ) for more
00529         information, as that function is used to dispatch the
00530         stream.
00531 
00532                 */
00533                 template <typename NodeType>
00534                 NodeType *
00535                 load_node_classload_serializer( std::istream & is )
00536                 {
00537             try
00538             {
00539                 typedef data_node_serializer<NodeType> ST;
00540                 std::auto_ptr<ST> ser( guess_serializer<NodeType>( is ) );
00541                 return ser.get()
00542                     ? ser->deserialize( is )
00543                     : 0;
00544             }
00545             catch( const s11n_exception & sex )
00546             {
00547                 throw sex;
00548             }
00549             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00550             {
00551                 throw ::s11n::io_exception( "%s:%d: forwarded exception: %s", 
00552                                 __FILE__, __LINE__, ex.what() );
00553             }
00554             catch( ... )
00555             {
00556                 throw ::s11n::io_exception( "%s:%d: Stream-level deserialization failed for unknown reason.",
00557                                 __FILE__, __LINE__ );
00558             }
00559             return 0;
00560                 }
00561 
00562         /**
00563            Overloaded to take a filename. This is handled
00564            separately from the stream overload because some
00565            Serializers must behave differently in the face of
00566            streams. e.g., db-based Serializers typically can't
00567            deal with streams.
00568 
00569            Added in 1.2.1.
00570         */
00571                 template <typename NodeType>
00572                 NodeType *
00573                 load_node_classload_serializer( std::string const & src )
00574                 {
00575             try
00576             {
00577                 typedef data_node_serializer<NodeType> ST;
00578                 std::auto_ptr<ST> ser( guess_serializer<NodeType>( src ) );
00579                 return ser.get()
00580                     ? ser->deserialize( src )
00581                     : 0;
00582             }
00583             catch( const s11n_exception & sex )
00584             {
00585                 throw sex;
00586             }
00587             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00588             {
00589                 throw ::s11n::io_exception( "%s:%d: forwarded exception: %s", 
00590                                 __FILE__, __LINE__, ex.what() );
00591             }
00592             catch( ... )
00593             {
00594                 throw ::s11n::io_exception( "%s:%d: Stream-level deserialization failed for unknown reason.",
00595                                 __FILE__, __LINE__ );
00596             }
00597             return 0;
00598                 }
00599 
00600 
00601                 /**
00602                    Returns a node pointer, parsed from the given stream, using
00603                    <code>s11n::io::data_node_serializer<NodeType></code>
00604                    as the base type for looking up a stream handler.
00605 
00606                    ACHTUNG: Only usable for loading ROOT nodes.
00607                 */
00608                 template <typename NodeType>
00609                 NodeType * load_node( std::istream & is )
00610                 {
00611                         return load_node_classload_serializer< NodeType >( is );
00612                 }
00613 
00614                 /**
00615                    Overloaded form of load_node( istream ), provided for
00616                    convenience.
00617 
00618                    If ExternalData is true, input is treated as a file,
00619                    otherwise it is treated as a string containing input
00620                    to parse.
00621 
00622                    ACHTUNG: Only usable for loading ROOT nodes.
00623 
00624            Behaviour change in 1.2.1:
00625 
00626            If (ExternalData) then this call is eventually
00627            passed to ASerializer->deserialize(src). In
00628            previous versions, src was "converted" to a stream
00629            and passed to ASerializer->deserialize(istream),
00630            which does not work for some Serializers. This was
00631            fixed in 1.2.1 to allow the sqlite3 add-on to play
00632            along more transparently with s11nconvert and s11nbrowser.
00633                 */
00634                 template <typename NodeType>
00635                 NodeType * load_node( const std::string & src, bool ExternalData = true )
00636                 {
00637             if( ! ExternalData )
00638             {
00639                 typedef std::auto_ptr<std::istream> AP;
00640                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00641                 if( ! is.get() ) return 0;
00642                 return load_node<NodeType>( *is );
00643             }
00644             return load_node_classload_serializer<NodeType>( src );
00645                 }
00646 
00647                 /**
00648                    Tries to load a SerializableT from the given stream.
00649                    On success returns a new object, else 0.
00650 
00651                    The caller owns the returned pointer.
00652 
00653                    ACHTUNG: Only usable for loading ROOT nodes.
00654                 */
00655                 template <typename NodeT,typename SerializableT>
00656                 SerializableT * load_serializable( std::istream & src )
00657                 {
00658                         typedef std::auto_ptr<NodeT> AP;
00659                         AP node( load_node<NodeT>( src ) );
00660                         if( ! node.get() )
00661                         {
00662                                 CERR << "load_serializable<>(istream) Could not load a root node from the input.\n";
00663                                 return 0;
00664                         }
00665                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00666                 }
00667 
00668                 /**
00669                    An overloaded form which takes an input string. If
00670                    ExternalData is true the string is treated as a file
00671                    name, otherwise it is processed as an input stream.
00672 
00673                    ACHTUNG: Only usable for loading ROOT nodes.
00674 
00675            Behaviour chagne in 1.2.1 when (ExternalData):
00676            load_node(string) is used to load the snode tree,
00677            as opposed to load_node(stream). This change was to
00678            allow non-stream-friendly Serializers (e.g.,
00679            DB-based) to integrate more fully into s11n.
00680                 */
00681                 template <typename NodeT,typename SerializableT>
00682                 SerializableT * load_serializable( const std::string & src, bool ExternalData = true )
00683                 {
00684             if( ! ExternalData )
00685             {
00686                 typedef std::auto_ptr<std::istream> AP;
00687                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00688                 if( ! is.get() )
00689                 {
00690                     // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00691                     return 0;
00692                 }
00693                 return load_serializable<NodeT,SerializableT>( *is );
00694             }
00695                         typedef std::auto_ptr<NodeT> AP;
00696                         AP node( load_node<NodeT>( src ) );
00697                         if( ! node.get() )
00698                         {
00699                                 // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00700                                 return 0;
00701                         }
00702                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00703         }
00704 
00705         } // namespace io
00706 
00707 } // namespace s11n
00708 
00709 #endif // s11n_DATA_NODE_IO_H_INCLUDED

Generated on Sun Apr 27 11:48:19 2008 for libs11n-1.2.6 by  doxygen 1.5.3