00001 #ifndef s11n_DATA_NODE_IO_H_INCLUDED 00002 #define s11n_DATA_NODE_IO_H_INCLUDED 00003 00004 //////////////////////////////////////////////////////////////////////// 00005 // data_node_io.hpp 00006 // some i/o interfaces & helpers for s11n 00007 // License: Public Domain 00008 // Author: stephan@s11n.net 00009 //////////////////////////////////////////////////////////////////////// 00010 00011 00012 #include <string> 00013 #include <sstream> 00014 #include <list> 00015 #include <map> 00016 #include <deque> 00017 #include <iostream> 00018 #include <memory>// auto_ptr 00019 00020 #include <cassert> 00021 #include <typeinfo> 00022 00023 00024 00025 // #include <s11n.net/cl/cllite.hpp> // for opening DLLs 00026 00027 #include <s11n.net/s11n/phoenix.hpp> // phoenix class 00028 00029 #include <s11n.net/s11n/exception.hpp> 00030 #include <s11n.net/s11n/s11n_debuggering_macros.hpp> // COUT/CERR 00031 #include <s11n.net/s11n/classload.hpp> // classloader() 00032 #include <s11n.net/s11n/serialize.hpp> // unfortunately dep 00033 #include <s11n.net/s11n/traits.hpp> // s11n_traits & node_traits 00034 00035 #include <s11n.net/s11n/export.hpp> // for exporting symbols to DLL 00036 00037 //////////////////////////////////////////////////////////////////////////////// 00038 // NO DEPS ON s11n_node.hpp ALLOWED! 00039 //////////////////////////////////////////////////////////////////////////////// 00040 00041 00042 namespace s11n { 00043 00044 namespace io { 00045 00046 /** 00047 Returns an output stream for the given file 00048 name. Caller owns the returned pointer, which may 00049 be 0. 00050 00051 The returned stream supports libzl and libbz2 if your 00052 libs11n is built with libs11n_zfstream support, meaning 00053 it can read files compressed with zlib/gzip or bz2lib. 00054 */ 00055 std::ostream * get_ostream( const std::string name ); 00056 00057 /** 00058 Returns an input stream for the given file 00059 name. Caller owns the returned pointer, which may 00060 be 0. 00061 00062 The returned stream supports libzl and libbz2 if 00063 your libs11n is built with libs11n_zfstream 00064 support. That means that if 00065 zfstream::compression_policy() is set, then the 00066 returned string might be a compressing stream. 00067 00068 If ExternalData is false then name is assumed to be 00069 a string containing input, and a string-reading stream 00070 is returned. 00071 */ 00072 std::istream * get_istream( const std::string name, bool ExternalData = true ); 00073 00074 /** 00075 Convenience function for grabbing the first line of a file. 00076 00077 If ExternalData == true then returns the first line of the 00078 file, else returns up to the first newline of src. 00079 00080 See get_magic_cookie( istream & ) for notes on a minor functional 00081 change introduced in version 1.2.1. 00082 */ 00083 std::string get_magic_cookie( const std::string & src, bool ExternalData = true ); 00084 00085 /** 00086 Convenience function for grabbing the first line of a 00087 stream. 00088 00089 Returns the first line of the given stream, or an 00090 empty string on error. 00091 00092 As of version 1.2.1, this function behaves slightly 00093 differently than prior versions: the returned 00094 string will be the first consecutive non-control 00095 characters in the line. This allows us to properly 00096 read some binary formats which use a string 00097 identifier as a magic cookie (e.g. sqlite 00098 databases). In this context "control characters" 00099 are anything outside the range of ASCII values 00100 [32..126]. This change "shouldn't" affect any 00101 pre-1.2.1 behaviours, which were never tested/used 00102 with binary file formats. 00103 */ 00104 std::string get_magic_cookie( std::istream & is ); 00105 00106 /** 00107 data_node_serializer provides an interface for 00108 saving/loading a given abstract data node type 00109 to/from streams. 00110 00111 It is designed for containers which comply with 00112 s11n's Data Node interface and conventions. 00113 00114 00115 Conventions: 00116 00117 Must provide: 00118 00119 typedef NodeT node_type 00120 00121 Two de/serialize functions, following the 00122 stream-based interface shown here (filename-based 00123 variants are optional, but convenient for clients). 00124 00125 00126 Potential TODOs for 1.3/1.4: 00127 00128 - Add cancel() and cancelled() to set/query the 00129 read state. This is to support Cancel operations in 00130 UIs which load slow-loading (sqlite3) formats or 00131 large sets and want to safely cancel. Once 00132 cancelled, a read is not restartable (or this is 00133 not guaranteed). All we can really do is provide a 00134 flag and conventions and hope implementations 00135 respect them. 00136 00137 - Provide some sort of progress feedback mechanism, 00138 at least for reading, again to support users of 00139 slow Serializers. This is complicated by the 00140 unknown-read-size nature of Serializers. 00141 */ 00142 template <typename NodeT> 00143 class S11N_EXPORT_API data_node_serializer 00144 { 00145 public: 00146 /** 00147 The underlying data type used to store 00148 serialized data. 00149 */ 00150 typedef NodeT node_type; 00151 00152 00153 data_node_serializer() 00154 { 00155 this->magic_cookie( "WARNING: magic_cookie() not set!" ); 00156 // ^^^ subclasses must do this. 00157 typedef ::s11n::node_traits<node_type> NTR; 00158 NTR::name( this->metadata(), "serializer_metadata" ); 00159 // this->metadata().name( "serializer_metadata" ); 00160 }; 00161 virtual ~data_node_serializer(){}; 00162 00163 00164 /** 00165 A convenience typedef, mainly for subclasses. 00166 */ 00167 typedef std::map<std::string,std::string> translation_map; 00168 00169 /** 00170 Returns a map intended for use with 00171 ::s11n::io::strtool::translate_entities(). 00172 00173 The default implementation returns an empty map. 00174 00175 Subclasses should override this to return a translation 00176 map, if they need one. The default map is empty. 00177 00178 Be aware that this may very well be called 00179 post-main(), so subclasses should take that into 00180 account and provide post-main()-safe maps! 00181 */ 00182 virtual const translation_map & entity_translations() const 00183 { 00184 typedef ::s11n::Detail::phoenix<translation_map,data_node_serializer<node_type> > TMap; 00185 return TMap::instance(); 00186 } 00187 00188 00189 00190 /** 00191 Must be implemented to format node_type to the given ostream. 00192 00193 It should return true on success, false on error. 00194 00195 The default implementation always returns false. 00196 00197 Note that this function does not use 00198 s11n::serialize() in any way, and is only 00199 coincidentally related to it. 00200 */ 00201 virtual bool serialize( const node_type & /*src*/, std::ostream & /*dest*/ ) 00202 { 00203 return false; 00204 } 00205 00206 /** 00207 Overloaded to save dest to the given filename. 00208 00209 The default implementation treats destfile 00210 as a file name and passes the call on to 00211 serialize(node_type,ostream). The output 00212 file is compressed if zfstream::compression_policy() 00213 has been set to enable it. 00214 00215 Returns true on success, false on error. 00216 00217 This function is virtual so that 00218 Serializers which do not deal with 00219 i/ostreams (e.g., those which use a 00220 database connection) can override it to 00221 interpret destfile as, e.g., a 00222 database-related string (e.g., connection, 00223 db object name, or whatever). 00224 00225 Fixed in 1.0.2: returns false when destfile 00226 is empty. 00227 */ 00228 virtual bool serialize( const node_type & src, const std::string & destfile ) 00229 { 00230 if( destfile.empty() ) return false; 00231 std::ostream * os = ::s11n::io::get_ostream( destfile ); 00232 if( ! os ) return false; 00233 bool b = this->serialize( src, *os ); 00234 delete( os ); 00235 return b; 00236 } 00237 00238 /** 00239 Must be implemented to parse a node_type from the given istream. 00240 00241 It should return true on success, false on error. 00242 00243 The default implementation always returns 0 and does nothing. 00244 00245 Note that this function does not use 00246 s11n::deserialize() in any way, and is only 00247 coincidentally related to it. 00248 00249 Subclasses should try not to have to buffer 00250 the whole stream before parsing, because 00251 object trees can be arbitrarily large and a 00252 buffered copy effectively doubles the 00253 memory needed to store the tree during the 00254 deserialization process. Buffering 00255 behaviour is unspecified by this interface, 00256 however, and subclasses may pre-buffer the 00257 whole stream content if they need to. 00258 */ 00259 virtual node_type * deserialize( std::istream & ) 00260 { 00261 return 0; 00262 } 00263 00264 00265 /** 00266 Overloaded to load dest from the given filename. 00267 00268 It supports zlib/bz2lib decompression for 00269 files if your s11n lib supports them. 00270 00271 This is virtual for the same reason as 00272 serialize(string). 00273 00274 */ 00275 virtual node_type * deserialize( const std::string & src ) 00276 { 00277 typedef std::auto_ptr<std::istream> AP; 00278 AP is = AP( ::s11n::io::get_istream( src ) ); 00279 if( ! is.get() ) return 0; 00280 return this->deserialize( *is ); 00281 } 00282 00283 00284 /** 00285 Gets this object's magic cookie. 00286 00287 Cookies are registered with 00288 <code>class_loader< data_node_serializer<NodeType> ></code> 00289 types to map files to file input parsers. 00290 */ 00291 std::string magic_cookie() const 00292 { 00293 return this->m_cookie; 00294 } 00295 00296 protected: 00297 /** 00298 Sets the magic cookie for this type. 00299 */ 00300 void magic_cookie( const std::string & c ) 00301 { 00302 this->m_cookie = c; 00303 } 00304 00305 /** 00306 metadata is an experimental feature 00307 allowing serializers to store arbitrary 00308 serializer-specific information in their 00309 data steams. 00310 */ 00311 node_type & metadata() 00312 { return this->m_meta; } 00313 /** 00314 A const overload of metadata(). 00315 */ 00316 const node_type & metadata() const 00317 { return this->m_meta;} 00318 private: 00319 std::string m_cookie; 00320 node_type m_meta; 00321 }; // data_node_serializer<> 00322 00323 /** 00324 Tries to guess which Serializer can be used to read 00325 is. Returns an instance of that type on success or 00326 0 on error. It may propagate exceptions. 00327 00328 Added in 1.2.1. 00329 */ 00330 template <typename NodeType> 00331 data_node_serializer<NodeType> * guess_serializer( std::istream & is ) 00332 { 00333 typedef data_node_serializer<NodeType> ST; 00334 ST * ser = 0; 00335 std::string cookie; 00336 // CERR << "cookie="<<cookie<<std::endl; 00337 #if 1 00338 cookie = get_magic_cookie( is ); 00339 #else 00340 if( ! std::getline( is, cookie ) ) 00341 { 00342 CERR << "Odd: got a null cookie from the istream.\n"; 00343 return 0; // happens post-main() on valid streams sometimes!?!?! 00344 } 00345 #endif 00346 if( cookie.empty() ) return 0; 00347 std::string opencmd = "#s11n::io::serializer "; 00348 std::string::size_type at = cookie.find( opencmd ); 00349 if( std::string::npos == at ) 00350 { // try new approach, added in 1.1.0: 00351 opencmd = "#!/s11n/io/serializer "; 00352 at = cookie.find( opencmd ); 00353 } 00354 00355 if( 0 == at ) 00356 { 00357 std::string dll = cookie.substr( opencmd.size() ); 00358 ser = ::s11n::cl::classload<ST>( dll ); 00359 } 00360 else 00361 { 00362 ser = ::s11n::cl::classload<ST>( cookie ); 00363 } 00364 // if( ! ser ) 00365 // { 00366 // CERR << "Did not find serializer for cookie ["<<cookie<<"]."<<std::endl; 00367 // return 0; 00368 // } 00369 return ser; 00370 } 00371 00372 /** 00373 An overload which assumes infile is a local file. 00374 00375 Added in 1.2.1. 00376 */ 00377 template <typename NodeType> 00378 data_node_serializer<NodeType> * guess_serializer( std::string const & infile ) 00379 { 00380 std::auto_ptr<std::istream> is( get_istream( infile.c_str() ) ); 00381 return is.get() 00382 ? guess_serializer<NodeType>( *is ) 00383 : 0; 00384 } 00385 00386 /** 00387 00388 Tries to load a NodeType object from the given 00389 node. It uses the cookie from the input stream (the 00390 first line) and uses 00391 s11n::cl::classload<SerializerBaseType>() to find a 00392 matching Serializer. 00393 00394 On error 0 is returned or an exception is thrown, 00395 else a new pointer, which the caller owns. 00396 00397 Achtung: the first line of input from the input stream 00398 is consumed by this function (to find the cookie), and 00399 the cookie is not passed on to the handler! The only 00400 reliable way around this [that i know of] is to buffer 00401 the whole input as a string, and i don't wanna do that 00402 (it's really bad for massive data files). 00403 00404 ACHTUNG: Only usable for loading ROOT nodes. 00405 00406 Special feature: 00407 00408 If the first line of the stream is 00409 "#s11n::io::serializer CLASSNAME" then the CLASSNAME 00410 token is expected to be a Serializer class name. This 00411 function will try to classload that object. If 00412 successful it will use that type to deserialize the 00413 input stream. If that fails, it will return 0. 00414 The intention of this feature is to simplify creation 00415 of non-C++ tools which generate s11n data (e.g., perl 00416 scripts), so that they don't need to know the exact 00417 cookies. 00418 */ 00419 template <typename NodeType> 00420 NodeType * 00421 load_node_classload_serializer( std::istream & is ) 00422 { 00423 typedef data_node_serializer<NodeType> ST; 00424 ST * ser = guess_serializer<NodeType>( is ); 00425 if( ! ser ) return 0; 00426 try 00427 { 00428 return ser->deserialize( is ); 00429 } 00430 catch( const s11n_exception & sex ) 00431 { 00432 throw sex; 00433 } 00434 catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions 00435 { 00436 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ ); 00437 } 00438 catch( ... ) 00439 { 00440 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."), 00441 __FILE__, __LINE__ ); 00442 } 00443 return 0; 00444 } 00445 00446 /** 00447 Overloaded to take a filename. This is handled 00448 separately from the stream overload because some 00449 Serializers must behave differently in the face of 00450 streams. e.g., db-based Serializers typically can't 00451 deal with streams. 00452 00453 Added in 1.2.1. 00454 */ 00455 template <typename NodeType> 00456 NodeType * 00457 load_node_classload_serializer( std::string const & src ) 00458 { 00459 typedef data_node_serializer<NodeType> ST; 00460 ST * ser = guess_serializer<NodeType>( src ); 00461 if( ! ser ) return 0; 00462 try 00463 { 00464 return ser->deserialize( src ); 00465 } 00466 catch( const s11n_exception & sex ) 00467 { 00468 throw sex; 00469 } 00470 catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions 00471 { 00472 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ ); 00473 } 00474 catch( ... ) 00475 { 00476 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."), 00477 __FILE__, __LINE__ ); 00478 } 00479 return 0; 00480 } 00481 00482 00483 /** 00484 Returns a node pointer, parsed from the given stream, using 00485 <code>s11n::io::data_node_serializer<NodeType></code> 00486 as the base type for looking up a stream handler. 00487 00488 ACHTUNG: Only usable for loading ROOT nodes. 00489 */ 00490 template <typename NodeType> 00491 NodeType * load_node( std::istream & is ) 00492 { 00493 return load_node_classload_serializer< NodeType >( is ); 00494 } 00495 00496 /** 00497 Overloaded form of load_node( istream ), provided for 00498 convenience. 00499 00500 If ExternalData is true, input is treated as a file, 00501 otherwise it is treated as a string containing input 00502 to parse. 00503 00504 ACHTUNG: Only usable for loading ROOT nodes. 00505 00506 Behaviour change in 1.2.1: 00507 00508 If (ExternalData) then this call is eventually 00509 passed to ASerializer->deserialize(src). In 00510 previous versions, src was "converted" to a stream 00511 and passed to ASerializer->deserialize(istream), 00512 which does not work for some Serializers. This was 00513 fixed in 1.2.1 to allow the sqlite3 add-on to play 00514 along more transparently with s11nconvert and s11nbrowser. 00515 */ 00516 template <typename NodeType> 00517 NodeType * load_node( const std::string & src, bool ExternalData = true ) 00518 { 00519 if( ! ExternalData ) 00520 { 00521 typedef std::auto_ptr<std::istream> AP; 00522 AP is( ::s11n::io::get_istream( src, ExternalData ) ); 00523 if( ! is.get() ) return 0; 00524 return load_node<NodeType>( *is ); 00525 } 00526 return load_node_classload_serializer<NodeType>( src ); 00527 } 00528 00529 /** 00530 Tries to load a SerializableT from the given stream. 00531 On success returns a new object, else 0. 00532 00533 The caller owns the returned pointer. 00534 00535 ACHTUNG: Only usable for loading ROOT nodes. 00536 */ 00537 template <typename NodeT,typename SerializableT> 00538 SerializableT * load_serializable( std::istream & src ) 00539 { 00540 typedef std::auto_ptr<NodeT> AP; 00541 AP node( load_node<NodeT>( src ) ); 00542 if( ! node.get() ) 00543 { 00544 CERR << "load_serializable<>(istream) Could not load a root node from the input.\n"; 00545 return 0; 00546 } 00547 return ::s11n::deserialize<NodeT,SerializableT>( *node ); 00548 } 00549 00550 /** 00551 An overloaded form which takes an input string. If 00552 ExternalData is true the string is treated as a file 00553 name, otherwise it is processed as an input stream. 00554 00555 ACHTUNG: Only usable for loading ROOT nodes. 00556 00557 Behaviour chagne in 1.2.1 when (ExternalData): 00558 load_node(string) is used to load the snode tree, 00559 as opposed to load_node(stream). This change was to 00560 allow non-stream-friendly Serializers (e.g., 00561 DB-based) to integrate more fully into s11n. 00562 */ 00563 template <typename NodeT,typename SerializableT> 00564 SerializableT * load_serializable( const std::string & src, bool ExternalData = true ) 00565 { 00566 if( ! ExternalData ) 00567 { 00568 typedef std::auto_ptr<std::istream> AP; 00569 AP is( ::s11n::io::get_istream( src, ExternalData ) ); 00570 if( ! is.get() ) 00571 { 00572 // CERR << "load_serializable<>(string) Could not load a root node from the input.\n"; 00573 return 0; 00574 } 00575 return load_serializable<NodeT,SerializableT>( *is ); 00576 } 00577 typedef std::auto_ptr<NodeT> AP; 00578 AP node( load_node<NodeT>( src ) ); 00579 if( ! node.get() ) 00580 { 00581 // CERR << "load_serializable<>(string) Could not load a root node from the input.\n"; 00582 return 0; 00583 } 00584 return ::s11n::deserialize<NodeT,SerializableT>( *node ); 00585 } 00586 00587 } // namespace io 00588 00589 } // namespace s11n 00590 00591 #endif // s11n_DATA_NODE_IO_H_INCLUDED