url.hpp

Go to the documentation of this file.
00001 #ifndef s11n_io_URL_HPP_INCLUDED
00002 #define s11n_io_URL_HPP_INCLUDED 1
00003 
00004 #include <string>
00005 #include <s11n.net/s11n/export.hpp>
00006 #include <s11n.net/s11n/refcount.hpp>
00007 #include <s11n.net/s11n/factory.hpp>
00008 
00009 // Reminder: RFC1738: http://www.ietf.org/rfc/rfc1738.txt
00010 
00011 namespace s11n { namespace io {
00012 
00013     /**
00014        url_parser is a basic implementation for
00015        parsing a URL string into its atomic components.
00016        It is not a full-featured parser, for example it does
00017        not parse key=value arguments at the end of a URL.
00018 
00019        This type uses reference-counted internal data and
00020        copy-on-write, so copying it is cheap.
00021     */
00022     class S11N_EXPORT_API url_parser
00023     {
00024     public:
00025         /**
00026            Parses the given URL. good() reveals the status
00027            of the parse.
00028         */
00029         url_parser( std::string const & );
00030         /**
00031            Creates an empty (!good()) parser.
00032         */
00033         url_parser();
00034 // Rely on default copy/assign ops:
00035 //      url_parser & url_parser( url_parser const & );
00036 //      url_parser & operator=( url_parser const & );
00037         /**
00038            Functions the same as the string-argument ctor.
00039          */
00040         url_parser & operator=( std::string const & );
00041         ~url_parser();
00042         /**
00043            Returns true if the last parse() got a "valid" URL.
00044         */
00045         bool good() const;
00046 
00047         /**
00048            Parses URLs of the following forms:
00049 
00050            scheme://[user[:password]@]host[:[port[:]]][/path/to/resource]
00051 
00052            Note that host may have an optional ':' after it
00053            without a port number, and that a port number may be followed
00054            by an optional ':' character. This is to accommodate ssh
00055            URLs and the like:
00056 
00057            ssh://user\@host:/path
00058 
00059            ssh://user\@host:33:/path
00060 
00061            This function returns the same as good().
00062 
00063            If this function returns false then the contents of this
00064            objects are in an undefined state. They should not be used
00065            before a call to parse() succeeds.
00066         */
00067         bool parse( std::string const & );
00068 
00069         /** Returns the URL most recently passed to parse(). */
00070         std::string url() const;
00071         /** Returns the scheme part of url(). */
00072         std::string scheme() const;
00073         /** Returns the user name part of url(), which may be empty. */
00074         std::string user() const;
00075         /** Returns the user password part of url(), which may be empty. */
00076         std::string password() const;
00077         /** Returns the host part of url(). */
00078         std::string host() const;
00079         /**
00080            Returns the resource path part of url(), which may be empty.
00081 
00082            Contrary to RFC1738, a leading slash in a URL *is* considered
00083            to be part of the path.
00084 
00085            In some protocols (e.g. http) an empty path can be
00086            considered the same as '/', but on others
00087            (e.g. file) such interpretation is not appropriate.
00088         */
00089         std::string path() const;
00090 
00091         /**
00092            If the URL path has a '?' in it, anything after the '?'
00093            is assumed to be a list of arguments, e.g. as those passed
00094            to HTTP GET requests. This string does not contain the leading
00095            '?'.
00096         */
00097         std::string args_str() const;
00098 
00099         typedef std::map<std::string,std::string> args_map_type;
00100         args_map_type const & args_map() const;
00101 
00102         /** Returns the port number part of url(), or 0 if no port was specified. */
00103         unsigned short port() const;
00104     private:
00105         /**
00106            impl holds the private data for a url_parser.
00107 
00108            PS: i hate that this has to be in the public
00109            header, but rcptr<impl> needs impl to be a complete
00110            type.
00111         */
00112         struct impl
00113         {
00114             std::string url;
00115             std::string proto;
00116             std::string user;
00117             std::string pass;
00118             std::string host;
00119             unsigned short port;
00120             std::string path;
00121             std::string args_str;
00122             bool good;
00123             args_map_type args_map;
00124             impl();
00125         };
00126         s11n::refcount::rcptr<impl> pimpl;
00127     };
00128 
00129     /**
00130        A factory type intended to be subclassed to provide
00131        protocol-specific i/o streams.
00132 
00133        Subclasses must reimplement the virtual functions and
00134        register with the classloader like so:
00135 
00136 <pre>
00137 #define S11N_FACREG_TYPE my_subclass_type
00138 #define S11N_FACREG_INTERFACE_TYPE s11n::io::url_stream_factory
00139 #define S11N_FACREG_TYPE_NAME "my_subclass_type"
00140 #include <s11n.net/s11n/factory_reg.hpp>
00141 </pre>
00142 
00143            They may also want to set up classloader aliases during
00144        the static initialization phase, as demonstrated for
00145        the file:// protocol in url.cpp.
00146 
00147        Note for subclasser: NEVER EVER call
00148        s11n::io::get_i/ostream() from this class, because those
00149        functions dispatch to url_stream_factory when possible, and
00150        callint those from here can cause an endless loop.
00151 
00152     */
00153     class S11N_EXPORT_API url_stream_factory
00154     {
00155     protected:
00156         url_stream_factory() {}
00157 
00158         /**
00159            Default implementation returns 0. Subclasses.should return an instance
00160            of a stream capable of writing to the given URL. On error they should
00161            return 0 or throw an exception.
00162 
00163            The caller owns the returned pointer, which may be 0.
00164         */
00165         virtual std::ostream * do_get_ostream( url_parser const & url ) const
00166         {
00167             return 0;
00168         }
00169         /**
00170            Default implementation returns 0. Subclasses.should return an instance
00171            of a stream capable of reading from the given URL. On error they should
00172            return 0 or throw an exception.
00173 
00174            The caller owns the returned pointer, which may be 0.
00175         */
00176         virtual std::istream * do_get_istream( url_parser const & url ) const
00177         {
00178             return 0;
00179         }
00180 
00181     public:
00182         virtual ~url_stream_factory() {}
00183 
00184         /**
00185            See do_get_ostream().
00186         */
00187         std::ostream * get_ostream( url_parser const & url ) const
00188         {
00189             return this->do_get_ostream( url );
00190         }
00191         /**
00192            See do_get_istream().
00193         */
00194         std::istream * get_istream( url_parser const & url ) const
00195         {
00196             return this->do_get_istream( url );
00197         }
00198 
00199         /**
00200            Classloads an instance of url_stream_factory
00201            associated with the given scheme. Caller owns the
00202            returned pointer, which may be 0.
00203 
00204            Subclass authors are responsible for registering their
00205            subclasses with the url_stream_factory classloader.
00206         */
00207         static url_stream_factory * create_factory_for_scheme( std::string const & scheme );
00208 
00209         /**
00210            Registers SubclassT as a subclass of
00211            url_stream_factory such that calling
00212            create_factory_for_scheme(scheme) will return an
00213            instance of SubclassT. SubclassT must be-a
00214            url_stream_factory and must be compatible with the
00215            s11n::fac factory layer.
00216         */
00217         template <typename SubclassT>
00218         static void register_factory_for_scheme( std::string const & scheme )
00219         {
00220             s11n::fac::register_subtype< url_stream_factory, SubclassT >( scheme );
00221         }
00222 
00223     };
00224 
00225     /**
00226        Convenience overload.
00227     */
00228     std::istream * get_url_istream( std::string const & url );
00229     /**
00230        Classloads an instance of an istream, using a
00231        url_stream_factory to create the stream. Caller owns the
00232        returned pointer, which may be 0. Failure indicates one of:
00233 
00234        - !url.good()
00235 
00236        - no url_stream_factory was mapped to url.scheme().
00237 
00238        - The factory could not create the required stream.
00239     */
00240     std::istream * get_url_istream( url_parser const & url );
00241 
00242     /**
00243        Convenience overload.
00244     */
00245     std::ostream * get_url_ostream( std::string const & url );
00246 
00247     /**
00248        See get_url_istream().
00249     */
00250     std::ostream * get_url_ostream( url_parser const & url );
00251 
00252     /**
00253        This factory creates streams for URLs in the following format:
00254 
00255        file:[//]/path/to/file
00256 
00257        It works for input and output.
00258 
00259        If your libs11n is configured/built with
00260        s11n_CONFIG_HAVE_ZFSTREAM set to true then the zfstream
00261        library is used to support bzip2/gzip files.
00262     */
00263     class S11N_EXPORT_API file_stream_factory : public url_stream_factory
00264     {
00265     public:
00266         file_stream_factory();
00267         virtual ~file_stream_factory();
00268 
00269     protected:
00270         /**
00271            Creates an ostream for a file:// URL. If your
00272            s11n is built with zfstream support, then
00273            the compressors supported by that library
00274            are supported here.
00275 
00276            The caller owns the returned pointer, which may be
00277            0.
00278         */
00279         virtual std::ostream * do_get_ostream( url_parser const & url ) const;
00280         /**
00281            Creates an istream for a file:// URL. If your
00282            s11n is built with zfstream support, then
00283            the compressors supported by that library
00284            are supported here.
00285 
00286            The caller owns the returned pointer, which may be
00287            0.
00288         */
00289         virtual std::istream * do_get_istream( url_parser const & url ) const;
00290     };
00291 
00292 }} // namespaces
00293 
00294 
00295 
00296 #endif // s11n_io_URL_HPP_INCLUDED

Generated on Sun Apr 27 13:16:04 2008 for libs11n by  doxygen 1.5.3