00001 #ifndef s11n_io_URL_HPP_INCLUDED 00002 #define s11n_io_URL_HPP_INCLUDED 1 00003 00004 #include <string> 00005 #include <s11n.net/s11n/export.hpp> 00006 #include <s11n.net/s11n/refcount.hpp> 00007 #include <s11n.net/s11n/factory.hpp> 00008 00009 // Reminder: RFC1738: http://www.ietf.org/rfc/rfc1738.txt 00010 00011 namespace s11n { namespace io { 00012 00013 /** 00014 url_parser is a basic implementation for 00015 parsing a URL string into its atomic components. 00016 It is not a full-featured parser, for example it does 00017 not parse key=value arguments at the end of a URL. 00018 00019 This type uses reference-counted internal data and 00020 copy-on-write, so copying it is cheap. 00021 */ 00022 class S11N_EXPORT_API url_parser 00023 { 00024 public: 00025 /** 00026 Parses the given URL. good() reveals the status 00027 of the parse. 00028 */ 00029 url_parser( std::string const & ); 00030 /** 00031 Creates an empty (!good()) parser. 00032 */ 00033 url_parser(); 00034 // Rely on default copy/assign ops: 00035 // url_parser & url_parser( url_parser const & ); 00036 // url_parser & operator=( url_parser const & ); 00037 /** 00038 Functions the same as the string-argument ctor. 00039 */ 00040 url_parser & operator=( std::string const & ); 00041 ~url_parser(); 00042 /** 00043 Returns true if the last parse() got a "valid" URL. 00044 */ 00045 bool good() const; 00046 00047 /** 00048 Parses URLs of the following forms: 00049 00050 scheme://[user[:password]@]host[:[port[:]]][/path/to/resource] 00051 00052 Note that host may have an optional ':' after it 00053 without a port number, and that a port number may be followed 00054 by an optional ':' character. This is to accommodate ssh 00055 URLs and the like: 00056 00057 ssh://user\@host:/path 00058 00059 ssh://user\@host:33:/path 00060 00061 This function returns the same as good(). 00062 00063 If this function returns false then the contents of this 00064 objects are in an undefined state. They should not be used 00065 before a call to parse() succeeds. 00066 */ 00067 bool parse( std::string const & ); 00068 00069 /** Returns the URL most recently passed to parse(). */ 00070 std::string url() const; 00071 /** Returns the scheme part of url(). */ 00072 std::string scheme() const; 00073 /** Returns the user name part of url(), which may be empty. */ 00074 std::string user() const; 00075 /** Returns the user password part of url(), which may be empty. */ 00076 std::string password() const; 00077 /** Returns the host part of url(). */ 00078 std::string host() const; 00079 /** 00080 Returns the resource path part of url(), which may be empty. 00081 00082 Contrary to RFC1738, a leading slash in a URL *is* considered 00083 to be part of the path. 00084 00085 In some protocols (e.g. http) an empty path can be 00086 considered the same as '/', but on others 00087 (e.g. file) such interpretation is not appropriate. 00088 */ 00089 std::string path() const; 00090 00091 /** 00092 If the URL path has a '?' in it, anything after the '?' 00093 is assumed to be a list of arguments, e.g. as those passed 00094 to HTTP GET requests. This string does not contain the leading 00095 '?'. 00096 */ 00097 std::string args_str() const; 00098 00099 typedef std::map<std::string,std::string> args_map_type; 00100 args_map_type const & args_map() const; 00101 00102 /** Returns the port number part of url(), or 0 if no port was specified. */ 00103 unsigned short port() const; 00104 private: 00105 /** 00106 impl holds the private data for a url_parser. 00107 00108 PS: i hate that this has to be in the public 00109 header, but rcptr<impl> needs impl to be a complete 00110 type. 00111 */ 00112 struct impl 00113 { 00114 std::string url; 00115 std::string proto; 00116 std::string user; 00117 std::string pass; 00118 std::string host; 00119 unsigned short port; 00120 std::string path; 00121 std::string args_str; 00122 bool good; 00123 args_map_type args_map; 00124 impl(); 00125 }; 00126 s11n::refcount::rcptr<impl> pimpl; 00127 }; 00128 00129 /** 00130 A factory type intended to be subclassed to provide 00131 protocol-specific i/o streams. 00132 00133 Subclasses must reimplement the virtual functions and 00134 register with the classloader like so: 00135 00136 <pre> 00137 #define S11N_FACREG_TYPE my_subclass_type 00138 #define S11N_FACREG_INTERFACE_TYPE s11n::io::url_stream_factory 00139 #define S11N_FACREG_TYPE_NAME "my_subclass_type" 00140 #include <s11n.net/s11n/factory_reg.hpp> 00141 </pre> 00142 00143 They may also want to set up classloader aliases during 00144 the static initialization phase, as demonstrated for 00145 the file:// protocol in url.cpp. 00146 00147 Note for subclasser: NEVER EVER call 00148 s11n::io::get_i/ostream() from this class, because those 00149 functions dispatch to url_stream_factory when possible, and 00150 callint those from here can cause an endless loop. 00151 00152 */ 00153 class S11N_EXPORT_API url_stream_factory 00154 { 00155 protected: 00156 url_stream_factory() {} 00157 00158 /** 00159 Default implementation returns 0. Subclasses.should return an instance 00160 of a stream capable of writing to the given URL. On error they should 00161 return 0 or throw an exception. 00162 00163 The caller owns the returned pointer, which may be 0. 00164 */ 00165 virtual std::ostream * do_get_ostream( url_parser const & url ) const 00166 { 00167 return 0; 00168 } 00169 /** 00170 Default implementation returns 0. Subclasses.should return an instance 00171 of a stream capable of reading from the given URL. On error they should 00172 return 0 or throw an exception. 00173 00174 The caller owns the returned pointer, which may be 0. 00175 */ 00176 virtual std::istream * do_get_istream( url_parser const & url ) const 00177 { 00178 return 0; 00179 } 00180 00181 public: 00182 virtual ~url_stream_factory() {} 00183 00184 /** 00185 See do_get_ostream(). 00186 */ 00187 std::ostream * get_ostream( url_parser const & url ) const 00188 { 00189 return this->do_get_ostream( url ); 00190 } 00191 /** 00192 See do_get_istream(). 00193 */ 00194 std::istream * get_istream( url_parser const & url ) const 00195 { 00196 return this->do_get_istream( url ); 00197 } 00198 00199 /** 00200 Classloads an instance of url_stream_factory 00201 associated with the given scheme. Caller owns the 00202 returned pointer, which may be 0. 00203 00204 Subclass authors are responsible for registering their 00205 subclasses with the url_stream_factory classloader. 00206 */ 00207 static url_stream_factory * create_factory_for_scheme( std::string const & scheme ); 00208 00209 /** 00210 Registers SubclassT as a subclass of 00211 url_stream_factory such that calling 00212 create_factory_for_scheme(scheme) will return an 00213 instance of SubclassT. SubclassT must be-a 00214 url_stream_factory and must be compatible with the 00215 s11n::fac factory layer. 00216 */ 00217 template <typename SubclassT> 00218 static void register_factory_for_scheme( std::string const & scheme ) 00219 { 00220 s11n::fac::register_subtype< url_stream_factory, SubclassT >( scheme ); 00221 } 00222 00223 }; 00224 00225 /** 00226 Convenience overload. 00227 */ 00228 std::istream * get_url_istream( std::string const & url ); 00229 /** 00230 Classloads an instance of an istream, using a 00231 url_stream_factory to create the stream. Caller owns the 00232 returned pointer, which may be 0. Failure indicates one of: 00233 00234 - !url.good() 00235 00236 - no url_stream_factory was mapped to url.scheme(). 00237 00238 - The factory could not create the required stream. 00239 */ 00240 std::istream * get_url_istream( url_parser const & url ); 00241 00242 /** 00243 Convenience overload. 00244 */ 00245 std::ostream * get_url_ostream( std::string const & url ); 00246 00247 /** 00248 See get_url_istream(). 00249 */ 00250 std::ostream * get_url_ostream( url_parser const & url ); 00251 00252 /** 00253 This factory creates streams for URLs in the following format: 00254 00255 file:[//]/path/to/file 00256 00257 It works for input and output. 00258 00259 If your libs11n is configured/built with 00260 s11n_CONFIG_HAVE_ZFSTREAM set to true then the zfstream 00261 library is used to support bzip2/gzip files. 00262 */ 00263 class S11N_EXPORT_API file_stream_factory : public url_stream_factory 00264 { 00265 public: 00266 file_stream_factory(); 00267 virtual ~file_stream_factory(); 00268 00269 protected: 00270 /** 00271 Creates an ostream for a file:// URL. If your 00272 s11n is built with zfstream support, then 00273 the compressors supported by that library 00274 are supported here. 00275 00276 The caller owns the returned pointer, which may be 00277 0. 00278 */ 00279 virtual std::ostream * do_get_ostream( url_parser const & url ) const; 00280 /** 00281 Creates an istream for a file:// URL. If your 00282 s11n is built with zfstream support, then 00283 the compressors supported by that library 00284 are supported here. 00285 00286 The caller owns the returned pointer, which may be 00287 0. 00288 */ 00289 virtual std::istream * do_get_istream( url_parser const & url ) const; 00290 }; 00291 00292 }} // namespaces 00293 00294 00295 00296 #endif // s11n_io_URL_HPP_INCLUDED