net/include/pion/net/HTTPParser.hpp

00001 // ------------------------------------------------------------------
00002 // pion-net: a C++ framework for building lightweight HTTP interfaces
00003 // ------------------------------------------------------------------
00004 // Copyright (C) 2007-2008 Atomic Labs, Inc.  (http://www.atomiclabs.com)
00005 //
00006 // Distributed under the Boost Software License, Version 1.0.
00007 // See http://www.boost.org/LICENSE_1_0.txt
00008 //
00009 
00010 #ifndef __PION_HTTPPARSER_HEADER__
00011 #define __PION_HTTPPARSER_HEADER__
00012 
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/logic/tribool.hpp>
00016 #include <pion/PionConfig.hpp>
00017 #include <pion/PionLogger.hpp>
00018 #include <pion/net/HTTPMessage.hpp>
00019 
00020 
00021 namespace pion {    // begin namespace pion
00022 namespace net {     // begin namespace net (Pion Network Library)
00023 
00024 // forward declarations used for finishing HTTP messages
00025 class HTTPRequest;
00026 class HTTPResponse;
00027 
00031 class PION_NET_API HTTPParser :
00032     private boost::noncopyable
00033 {
00034 
00035 public:
00036 
00038     static const std::size_t        DEFAULT_CONTENT_MAX;
00039 
00047     HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00048         : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request),
00049         m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00050         m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00051         m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00052         m_bytes_content_remaining(0), m_bytes_content_read(0),
00053         m_bytes_last_read(0), m_bytes_total_read(0),
00054         m_max_content_length(max_content_length)
00055     {}
00056 
00058     virtual ~HTTPParser() {}
00059 
00070     boost::tribool parse(HTTPMessage& http_msg);
00071 
00083     boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len);
00084 
00090     void finish(HTTPMessage& http_msg) const;
00091 
00098     inline void setReadBuffer(const char *ptr, size_t len) {
00099         m_read_ptr = ptr;
00100         m_read_end_ptr = ptr + len;
00101     }
00102 
00109     inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const {
00110         read_ptr = m_read_ptr;
00111         read_end_ptr = m_read_end_ptr;
00112     }
00113 
00123     inline bool checkPrematureEOF(HTTPMessage& http_msg) {
00124         if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00125             return true;
00126         m_message_parse_state = PARSE_END;
00127         http_msg.concatenateChunks();
00128         finish(http_msg);
00129         return false;
00130     }
00131 
00133     inline void reset(void) {
00134         m_message_parse_state = PARSE_START;
00135         m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00136         m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00137         m_status_code = 0;
00138         m_status_message.erase();
00139         m_method.erase();
00140         m_resource.erase();
00141         m_query_string.erase();
00142         m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00143     }
00144 
00146     inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00147 
00149     inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); } 
00150 
00152     inline std::size_t gcount(void) const { return m_bytes_last_read; }
00153 
00155     inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; }
00156 
00158     inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; }
00159 
00161     inline std::size_t getMaxContentLength(void) const { return m_max_content_length; }
00162 
00164     inline bool isParsingRequest(void) const { return m_is_request; }
00165 
00167     inline bool isParsingResponse(void) const { return ! m_is_request; }
00168 
00170     inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; }
00171 
00173     inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00174 
00176     inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; }
00177 
00179     inline PionLogger getLogger(void) { return m_logger; }
00180 
00181 
00190     static bool contentTypeIsUrlEncoded(HTTPRequest& http_request);
00191 
00202     static bool parseURLEncoded(HTTPTypes::StringDictionary& dict,
00203                                 const char *ptr, const std::size_t len);
00204 
00215     static bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00216                                   const char *ptr, const std::size_t len);
00217 
00227     static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00228         const std::string& cookie_header)
00229     {
00230         return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size());
00231     }
00232 
00242     static inline bool parseURLEncoded(HTTPTypes::StringDictionary& dict,
00243         const std::string& query)
00244     {
00245         return parseURLEncoded(dict, query.c_str(), query.size());
00246     }
00247 
00248 
00249 protected:
00250 
00262     boost::tribool parseHeaders(HTTPMessage& http_msg);
00263 
00269     void updateMessageWithHeaderData(HTTPMessage& http_msg) const;
00270 
00282     boost::tribool finishHeaderParsing(HTTPMessage& http_msg);
00283 
00294     boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers);
00295 
00306     boost::tribool consumeContent(HTTPMessage& http_msg);
00307 
00315     std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers);
00316 
00317     // misc functions used by the parsing functions
00318     inline static bool isChar(int c);
00319     inline static bool isControl(int c);
00320     inline static bool isSpecial(int c);
00321     inline static bool isDigit(int c);
00322     inline static bool isHexDigit(int c);
00323 
00324 
00326     static const boost::uint32_t        STATUS_MESSAGE_MAX;
00327 
00329     static const boost::uint32_t        METHOD_MAX;
00330 
00332     static const boost::uint32_t        RESOURCE_MAX;
00333 
00335     static const boost::uint32_t        QUERY_STRING_MAX;
00336 
00338     static const boost::uint32_t        HEADER_NAME_MAX;
00339 
00341     static const boost::uint32_t        HEADER_VALUE_MAX;
00342 
00344     static const boost::uint32_t        QUERY_NAME_MAX;
00345 
00347     static const boost::uint32_t        QUERY_VALUE_MAX;
00348 
00350     static const boost::uint32_t        COOKIE_NAME_MAX;
00351 
00353     static const boost::uint32_t        COOKIE_VALUE_MAX;
00354 
00355 
00357     mutable PionLogger                  m_logger;
00358 
00360     const bool                          m_is_request;
00361 
00363     const char *                        m_read_ptr;
00364 
00366     const char *                        m_read_end_ptr;
00367 
00368 
00369 private:
00370 
00372     enum MessageParseState {
00373         PARSE_START, PARSE_HEADERS, PARSE_CONTENT,
00374         PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00375     };
00376 
00379     enum HeadersParseState {
00380         PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00381         PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00382         PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00383         PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00384         PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00385         PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00386         PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00387         PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00388         PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00389         PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00390     };
00391 
00394     enum ChunkedContentParseState {
00395         PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE, 
00396         PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00397         PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK, 
00398         PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00399         PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK, 
00400         PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00401     };
00402 
00403 
00405     MessageParseState                   m_message_parse_state;
00406 
00408     HeadersParseState                   m_headers_parse_state;
00409 
00411     ChunkedContentParseState            m_chunked_content_parse_state;
00412 
00414     boost::uint16_t                     m_status_code;
00415 
00417     std::string                         m_status_message;
00418 
00420     std::string                         m_method;
00421 
00423     std::string                         m_resource;
00424 
00426     std::string                         m_query_string;
00427 
00429     std::string                         m_header_name;
00430 
00432     std::string                         m_header_value;
00433 
00435     std::string                         m_chunk_size_str;
00436 
00438     std::size_t                         m_size_of_current_chunk;
00439 
00441     std::size_t                         m_bytes_read_in_current_chunk;
00442 
00444     std::size_t                         m_bytes_content_remaining;
00445 
00447     std::size_t                         m_bytes_content_read;
00448 
00450     std::size_t                         m_bytes_last_read;
00451 
00453     std::size_t                         m_bytes_total_read;
00454 
00456     std::size_t                         m_max_content_length;
00457 };
00458 
00459 
00460 // inline functions for HTTPParser
00461 
00462 inline bool HTTPParser::isChar(int c)
00463 {
00464     return(c >= 0 && c <= 127);
00465 }
00466 
00467 inline bool HTTPParser::isControl(int c)
00468 {
00469     return( (c >= 0 && c <= 31) || c == 127);
00470 }
00471 
00472 inline bool HTTPParser::isSpecial(int c)
00473 {
00474     switch (c) {
00475     case '(': case ')': case '<': case '>': case '@':
00476     case ',': case ';': case ':': case '\\': case '"':
00477     case '/': case '[': case ']': case '?': case '=':
00478     case '{': case '}': case ' ': case '\t':
00479         return true;
00480     default:
00481         return false;
00482     }
00483 }
00484 
00485 inline bool HTTPParser::isDigit(int c)
00486 {
00487     return(c >= '0' && c <= '9');
00488 }
00489 
00490 inline bool HTTPParser::isHexDigit(int c)
00491 {
00492     return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00493 }
00494 
00495 }   // end namespace net
00496 }   // end namespace pion
00497 
00498 #endif

Generated on Fri Dec 4 08:54:29 2009 for pion-net by  doxygen 1.4.7