(original) (raw)
---Implements the HTTP client protocol in a standard form that Nmap scripts can -- take advantage of. -- -- Because HTTP has so many uses, there are a number of interfaces to this -- library. -- -- The most obvious and common ones are simply get
, -- post
, and head
; or, if more control is required, -- generic_request
can be used. These functions take host and port -- as their main parameters and they do what one would expect. The -- get_url
helper function can be used to parse and retrieve a full -- URL. -- -- HTTPS support is transparent. The library uses comm.tryssl
to -- determine whether SSL is required for a request. -- -- These functions return a table of values, including: -- * status-line
- A string representing the status, such as "HTTP/1.1 200 OK", followed by a newline. In case of an error, a description will be provided in this line. -- * status
- The HTTP status value; for example, "200". If an error occurs during a request, then this value is going to be nil. -- * version
- HTTP protocol version string, as stated in the status line. Example: "1.1" -- * header
- An associative array representing the header. Keys are all lowercase, and standard headers, such as 'date', 'content-length', etc. will typically be present. -- * rawheader
- A numbered array of the headers, exactly as the server sent them. While header['content-type'] might be 'text/html', rawheader[3] might be 'Content-type: text/html'. -- * cookies
- A numbered array of the cookies the server sent. Each cookie is a table with the expected keys, such as name
, value
, path
, domain
, and expires
. This table can be sent to the server in subsequent responses in the options
table to any function (see below). -- * rawbody
- The full body, as returned by the server. Chunked transfer encoding is handled transparently. -- * body
- The full body, after processing the Content-Encoding header, if any. The Content-Encoding and Content-Length headers are adjusted to stay consistent with the processed body. -- * incomplete
- Partially received response object, in case of an error. -- * truncated
- A flag to indicate that the body has been truncated -- * decoded
- A list of processed named content encodings (like "identity" or "gzip") -- * undecoded
- A list of named content encodings that could not be processed (due to lack of support or the body being corrupted for a given encoding). A body has been successfully decoded if this list is empty (or nil, if no encodings were used in the first place). -- * location
- A numbered array of the locations of redirects that were followed. -- -- Many of the functions optionally allow an "options" input table, which can -- modify the HTTP request or its processing in many ways like adding headers or -- setting the timeout. The following are valid keys in "options" -- (note: not all options will necessarily affect every function): -- * timeout
: A timeout used for socket operations. -- * header
: A table containing additional headers to be used for the request. For example, options['header']['Content-Type'] = 'text/xml'
-- * content
: The content of the message. This can be either a string, which will be directly added as the body of the message, or a table, which will have each key=value pair added (like a normal POST request). (A corresponding Content-Length header will be added automatically. Set header['Content-Length'] to override it). -- * cookies
: A list of cookies as either a string, which will be directly sent, or a table. If it's a table, the following fields are recognized: name
, value
and path
. Only name
and value
fields are required. -- * auth
: A table containing the keys username
and password
, which will be used for HTTP Basic authentication. -- If a server requires HTTP Digest authentication, then there must also be a key digest
, with value true
. -- If a server requires NTLM authentication, then there must also be a key ntlm
, with value true
. -- * bypass_cache
: Do not perform a lookup in the local HTTP cache. -- * no_cache
: Do not save the result of this request to the local HTTP cache. -- * no_cache_body
: Do not save the body of the response to the local HTTP cache. -- * max_body_size
: Limit the received body to specific number of bytes. Overrides script argument http.max-body-size
. See the script argument for details. -- * truncated_ok
: Do not treat oversized body as error. Overrides script argument http.truncated-ok
. -- * any_af
: Allow connecting to any address family, inet or inet6. By default, these functions will only use the same AF as nmap.address_family to resolve names. (This option is a straight pass-thru to comm.lua
functions.) -- * redirect_ok
: Closure that overrides the default redirect_ok used to validate whether to follow HTTP redirects or not. False, if no HTTP redirects should be followed. Alternatively, a number may be passed to change the number of redirects to follow. -- The following example shows how to write a custom closure that follows 5 consecutive redirects, without the safety checks in the default redirect_ok: -- -- redirect_ok = function(host,port) -- local c = 5 -- return function(url) -- if ( c==0 ) then return false end -- c = c - 1 -- return true -- end -- end --
-- -- If a script is planning on making a lot of requests, the pipelining functions -- can be helpful. pipeline_add
queues requests in a table, and -- pipeline_go
performs the requests, returning the results as an -- array, with the responses in the same order as the requests were added. -- As a simple example: ---- -- Start by defining the 'all' variable as nil -- local all = nil -- -- -- Add two GET requests and one HEAD to the queue but these requests are -- -- not performed yet. The second parameter represents the "options" table -- -- (which we don't need in this example). -- all = http.pipeline_add('/book', nil, all) -- all = http.pipeline_add('/test', nil, all) -- all = http.pipeline_add('/monkeys', nil, all, 'HEAD') -- -- -- Perform all three requests as parallel as Nmap is able to -- local results = http.pipeline_go('nmap.org', 80, all) --
-- -- At this point, results
is an array with three elements. -- Each element is a table containing the HTTP result, as discussed above. -- -- One more interface provided by the HTTP library helps scripts determine -- whether or not a page exists. The identify_404
function will -- try several URLs on the server to determine what the server's 404 pages look -- like. It will attempt to identify customized 404 pages that may not return -- the actual status code 404. If successful, the function -- page_exists
can then be used to determine whether or not a page -- exists. -- -- Some other miscellaneous functions that can come in handy are -- response_contains
, can_use_head
, and -- save_path
. See the appropriate documentation for details. -- -- @args http.max-cache-size The maximum memory size (in bytes) of the cache. -- -- @args http.useragent The value of the User-Agent header field sent with -- requests. By default it is -- "Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)"
. -- A value of the empty string disables sending the User-Agent header field. -- -- @args http.pipeline If set, it represents the number of HTTP requests that'll be -- sent on one connection. This can be set low to make debugging easier, or it -- can be set high to test how a server reacts (its chosen max is ignored). -- @args http.max-pipeline If set, it represents the number of outstanding -- HTTP requests that should be sent together in a single burst. Defaults to -- http.pipeline
(if set), or to what function -- get_pipeline_limit
returns. -- -- @args http.host The value to use in the Host header of all requests unless -- otherwise set. By default, the Host header uses the output of -- stdnse.get_hostname()
. -- -- @args http.max-body-size Limit the received body to specific number of bytes. -- An oversized body results in an error unless script argument -- http.truncated-ok
or request option -- truncated_ok
is set to true. The default is 2097152 (2MB). Use -- value -1 to disable the limit altogether. This argument can be overridden -- case-by-case with request option max_body_size
. -- -- @args http.truncated-ok Do not treat oversized body as error. (Use response -- object flag truncated
to check if the returned body has been -- truncated.) This argument can be overridden case-by-case with request option -- truncated_ok
. -- TODO -- Implement cache system for http pipelines -- local base64 = require "base64" local comm = require "comm" local coroutine = require "coroutine" local math = require "math" local nmap = require "nmap" local os = require "os" local sasl = require "sasl" local shortport = require "shortport" local slaxml = require "slaxml" local stdnse = require "stdnse" local string = require "string" local stringaux = require "stringaux" local table = require "table" local tableaux = require "tableaux" local url = require "url" local ascii_hostname = url.ascii_hostname local smbauth = require "smbauth" local unicode = require "unicode" _ENV = stdnse.module("http", stdnse.seeall) --Use ssl if we have it local have_ssl, openssl = pcall(require,'openssl') --Use zlib if we have it local have_zlib, zlib = pcall(require,'zlib') USER_AGENT = stdnse.get_script_args('http.useragent') or "Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)" local host_header = stdnse.get_script_args('http.host') local MAX_REDIRECT_COUNT = 5 local MAX_BODY_SIZE = tonumber(stdnse.get_script_args('http.max-body-size')) or 2*1024*1024 local TRUNCATED_OK = string.lower(stdnse.get_script_args('http.truncated-ok') or "false") ~= "false" local ERR_OVERSIZED_BODY = "response body too large" --- Recursively copy into a table any elements from another table whose key it -- doesn't have. local function table_augment(to, from) for k, v in pairs(from) do if type( to[k] ) == 'table' then table_augment(to[k], from[k]) else to[k] = from[k] end end end --- Provide the default port for a given scheme. -- The localization is necessary because functions in http.lua like to use -- "url" as a local parameter local get_default_port = url.get_default_port --- Get a value suitable for the Host header field. -- See RFC 2616 sections 14.23 and 5.2. local function get_host_field(host, port, scheme) -- If the global header is set by script-arg, use that. if host_header then return host_header end -- If there's no host, we can't invent a name. if not host then return nil end local hostname = ascii_hostname(host) -- If there's no port, just return hostname. if not port then return hostname end if type(port) == "string" then port = tonumber(port) assert(port, "Invalid port: not a number or table") end if type(port) == "number" then port = {number=port, protocol="tcp"} end local number = port.number if scheme then -- Caller provided scheme. If it's default, return just the hostname. if number == get_default_port(scheme) then return hostname end else scheme = url.get_default_scheme(port) if scheme then -- Caller did not provide scheme, and this port has a default scheme. local ssl_port = shortport.ssl(host, port) if (ssl_port and scheme == 'https') or (not ssl_port and scheme == 'http') then -- If it's SSL and https, or if it's plaintext and http, return just the hostname. return hostname end end end -- No special cases matched, so include the port number in the host header return hostname .. ":" .. number end -- Skip *( SP | HT ) starting at offset. See RFC 2616, section 2.2. -- @return the first index following the spaces. -- @return the spaces skipped over. local function skip_space(s, offset) local _, i, space = s:find("^([ \t]*)", offset) return i + 1, space end -- Get a token starting at offset. See RFC 2616, section 2.2. -- @return the first index following the token, or nil if no token was found. -- @return the token. local function get_token(s, offset) -- All characters except CTL and separators. local _, i, token = s:find("^([^()<>@,;:\\\"/%[%]?={} \0\001-\031\127]+)", offset) if i then return i + 1, token else return nil end end -- Get a quoted-string starting at offset. See RFC 2616, section 2.2. crlf is -- used as the definition for CRLF in the case of LWS within the string. -- @return the first index following the quoted-string, or nil if no -- quoted-string was found. -- @return the contents of the quoted-string, without quotes or backslash -- escapes. local function get_quoted_string(s, offset, crlf) local result = {} local i = offset assert(s:sub(i, i) == "\"") i = i + 1 while i <= s:len() do local c = s:sub(i, i) if c == "\"" then -- Found the closing quote, done. return i + 1, table.concat(result) elseif c == "\\" then -- This is a quoted-pair ("\" CHAR). i = i + 1 c = s:sub(i, i) if c == "" then -- No character following. error("\\ escape at end of input while parsing quoted-string.") end -- Only CHAR may follow a backslash. if c:byte(1) > 127 then error(string.format("Unexpected character with value > 127 (0x%02X) in quoted-string.", c:byte(1))) end else -- This is qdtext, which is TEXT except for '"'. -- TEXT is "any OCTET except CTLs, but including LWS," however "a CRLF is -- allowed in the definition of TEXT only as part of a header field -- continuation." So there are really two definitions of quoted-string, -- depending on whether it's in a header field or not. This function does -- not allow CRLF. c = s:sub(i, i) if c ~= "\t" and c:match("^[\0\001-\031\127]$") then error(string.format("Unexpected control character in quoted-string: 0x%02X.", c:byte(1))) end end result[#result + 1] = c i = i + 1 end return nil end -- Returns the index just past the end of LWS. local function skip_lws(s, pos) local _, e while true do while string.match(s, "^[ \t]", pos) do pos = pos + 1 end _, e = string.find(s, "^\r?\n[ \t]", pos) if not e then return pos end pos = e + 1 end end local digestauth_required = {"username","realm","nonce","digest-uri","response"} ---Validate an 'options' table, which is passed to a number of the HTTP functions. It is -- often difficult to track down a mistake in the options table, and requires fiddling -- with the http.lua source, but this should make that a lot easier. local function validate_options(options) local bad = false if(options == nil) then return true end for key, value in pairs(options) do if(key == 'timeout') then if(type(tonumber(value)) ~= 'number') then stdnse.debug1('http: options.timeout contains a non-numeric value') bad = true end elseif(key == 'header') then if(type(value) ~= 'table') then stdnse.debug1("http: options.header should be a table") bad = true end elseif(key == 'content') then if(type(value) ~= 'string' and type(value) ~= 'table') then stdnse.debug1("http: options.content should be a string or a table") bad = true end elseif(key == 'cookies') then if(type(value) == 'table') then for _, cookie in ipairs(value) do for cookie_key, cookie_value in pairs(cookie) do if(cookie_key == 'name') then if(type(cookie_value) ~= 'string') then stdnse.debug1("http: options.cookies[i].name should be a string") bad = true end elseif(cookie_key == 'value') then if(type(cookie_value) ~= 'string') then stdnse.debug1("http: options.cookies[i].value should be a string") bad = true end elseif(cookie_key == 'path') then if(type(cookie_value) ~= 'string') then stdnse.debug1("http: options.cookies[i].path should be a string") bad = true end elseif(cookie_key == 'expires') then if(type(cookie_value) ~= 'string') then stdnse.debug1("http: options.cookies[i].expires should be a string") bad = true end elseif(cookie_key == 'max-age') then if(type(cookie_value) ~= 'string') then stdnse.debug1("http: options.cookies[i].max-age should be a string") bad = true end elseif not (cookie_key == 'httponly' or cookie_key == 'secure') then stdnse.debug1("http: Unknown field in cookie table: %s", cookie_key) -- Ignore unrecognized attributes (per RFC 6265, Section 5.2) end end end elseif(type(value) ~= 'string') then stdnse.debug1("http: options.cookies should be a table or a string") bad = true end elseif(key == 'auth') then if(type(value) == 'table') then if(value['username'] == nil or value['password'] == nil) then stdnse.debug1("http: options.auth should contain both a 'username' and a 'password' key") bad = true end else stdnse.debug1("http: options.auth should be a table") bad = true end elseif (key == 'digestauth') then if(type(value) == 'table') then for _,k in ipairs(digestauth_required) do if not value[k] then stdnse.debug1("http: options.digestauth missing key: %s",k) bad = true break end end else bad = true stdnse.debug1("http: options.digestauth should be a table") end elseif (key == 'ntlmauth') then stdnse.debug1("Proceeding with ntlm message") elseif(key == 'bypass_cache' or key == 'no_cache' or key == 'no_cache_body' or key == 'any_af' or key == "truncated_ok") then if(type(value) ~= 'boolean') then stdnse.debug1("http: options.%s must be a boolean value", key) bad = true end elseif(key == 'redirect_ok') then if(type(value)~= 'function' and type(value)~='boolean' and type(value) ~= 'number') then stdnse.debug1("http: options.redirect_ok must be a function or boolean or number") bad = true end elseif(key == 'scheme') then if type(value) ~= 'string' then stdnse.debug1("http: options.scheme must be a string") bad = true end elseif(key == 'max_body_size') then if type(value) ~= 'number' then stdnse.debug1("http: options.max_body_size must be a number") bad = true end else stdnse.debug1("http: Unknown key in the options table: %s", key) end end return not(bad) end -- The following recv functions, and the function next_response
-- follow a common pattern. They each take a partial
argument -- whose value is data that has been read from the socket but not yet used in -- parsing, and they return as their second return value a new value for -- partial
. The idea is that, for example, in reading from the -- socket to get the Status-Line, you will probably read too much and read part -- of the header. That part (the "partial") has to be retained when you go to -- parse the header. The common use pattern is this: -- -- local partial -- status_line, partial = recv_line(socket, partial) -- ... -- header, partial = recv_header(socket, partial) -- ... --
-- On error, the functions return nil
, the second return value -- is an error message, and the third value is an unfinished fragment of -- the response body (if any): -- -- body, partial, fragment = recv_body(socket, partial) -- if not body then -- stdnse.debug1("Error encountered: %s", partial) -- stdnse.debug1("Only %d bytes of the body received", (#fragment or 0)) -- end -- ... --
-- Receive a single line (up to \n
). local function recv_line(s, partial) local _, e local status, data local pos partial = partial or "" pos = 1 while true do _, e = string.find(partial, "\n", pos, true) if e then break end status, data = s:receive() if not status then return status, data end pos = #partial partial = partial .. data end return string.sub(partial, 1, e), string.sub(partial, e + 1) end local function line_is_empty(line) return line == "\r\n" or line == "\n" end -- Receive up to and including the first blank line, but return everything up -- to and not including the final blank line. local function recv_header(s, partial) local lines = {} partial = partial or "" while true do local line line, partial = recv_line(s, partial) if not line then return line, partial end if line_is_empty(line) then break end lines[#lines + 1] = line end return table.concat(lines), partial end -- Receive until the connection is closed. local function recv_all(s, partial, maxlen) local parts = {} local part = partial or "" repeat if maxlen then maxlen = maxlen - #part if maxlen < 0 then table.insert(parts, part:sub(1, maxlen - 1)) return nil, ERR_OVERSIZED_BODY, table.concat(parts) end end table.insert(parts, part) local status status, part = s:receive() until not status return table.concat(parts), "" end -- Receive exactly length
bytes. Returns nil
if that -- many aren't available. local function recv_length(s, recvlen, partial, maxlen) local parts = {} local part = partial or "" partial = "" repeat if #part > recvlen then partial = part:sub(recvlen + 1) part = part:sub(1, recvlen) end if maxlen then maxlen = maxlen - #part if maxlen < 0 then table.insert(parts, part:sub(1, maxlen - 1)) return nil, ERR_OVERSIZED_BODY, table.concat(parts) end end table.insert(parts, part) recvlen = recvlen - #part if recvlen == 0 then return table.concat(parts), partial end local status status, part = s:receive() until not status return nil, part, table.concat(parts) end -- Receive until the end of a chunked message body, and return the dechunked -- body. local function recv_chunked(s, partial, maxlen) local chunks = {} repeat local line line, partial = recv_line(s, partial) if not line then return nil, "Chunk size not received; " .. partial, table.concat(chunks) end -- Get the chunk size. local pos = skip_space(line) local chunklen = line:match("^%x+", pos) if not chunklen then return nil, ("Chunked encoding didn't find hex; got %q."):format(line:sub(pos, pos + 10)), table.concat(chunks) end chunklen = tonumber(chunklen, 16) -- Ignore chunk-extensions that may follow here. -- RFC 2616, section 2.1 ("Implied *LWS") seems to allow *LWS between the -- parts of a chunk-extension, but that is ambiguous. Consider this case: -- "1234;a\r\n =1\r\n...". It could be an extension with a chunk-ext-name -- of "a" (and no value), and a chunk-data beginning with " =", or it could -- be a chunk-ext-name of "a" with a value of "1", and a chunk-data -- starting with "...". We don't allow *LWS here, only ( SP | HT ), so the -- first interpretation will prevail. local chunk, fragment chunk, partial, fragment = recv_length(s, chunklen, partial, maxlen) if not chunk then if partial ~= ERR_OVERSIZED_BODY then partial = "Incomplete chunk; " .. partial end table.insert(chunks, fragment) return nil, partial, table.concat(chunks) end table.insert(chunks, chunk) if maxlen then maxlen = maxlen - chunklen end line, partial = recv_line(s, partial) if not line then -- this warning message was initially an error but was adapted -- to support broken servers, such as the Citrix XML Service stdnse.debug2("Didn't find CRLF after chunk-data.") elseif not string.match(line, "^\r?\n") then return nil, ("Didn't find CRLF after chunk-data; got %q."):format(line), table.concat(chunks) end until chunklen == 0 return table.concat(chunks), partial end -- Receive a message body, assuming that the header has already been read by -- recv_header
. The handling is sensitive to the request method -- and the status code of the response. local function recv_body(s, response, method, partial, maxlen) local connection_close, connection_keepalive partial = partial or "" -- First check for Connection: close and Connection: keep-alive. This is -- necessary to handle some servers that don't follow the protocol. connection_close = false connection_keepalive = false if response.header.connection then local offset, token offset = 0 while true do offset, token = get_token(response.header.connection, offset + 1) if not offset then break end if string.lower(token) == "close" then connection_close = true elseif string.lower(token) == "keep-alive" then connection_keepalive = true end end end -- See RFC 2616, section 4.4 "Message Length". -- 1. Any response message which "MUST NOT" include a message-body (such as -- the 1xx, 204, and 304 responses and any response to a HEAD request) is -- always terminated by the first empty line after the header fields... -- -- Despite the above, some servers return a body with response to a HEAD -- request. So if an HTTP/1.0 server returns a response without Connection: -- keep-alive, or any server returns a response with Connection: close, read -- whatever's left on the socket (should be zero bytes). if string.upper(method) == "HEAD" or (response.status >= 100 and response.status <= 199) or response.status == 204 or response.status == 304 then if connection_close or (response.version == "1.0" and not connection_keepalive) then return recv_all(s, partial, maxlen) else return "", partial end end -- 2. If a Transfer-Encoding header field (section 14.41) is present and has -- any value other than "identity", then the transfer-length is defined by -- use of the "chunked" transfer-coding (section 3.6), unless the message -- is terminated by closing the connection. if response.header["transfer-encoding"] and response.header["transfer-encoding"] ~= "identity" then return recv_chunked(s, partial, maxlen) end -- The Citrix XML Service sends a wrong "Transfer-Coding" instead of -- "Transfer-Encoding". if response.header["transfer-coding"] and response.header["transfer-coding"] ~= "identity" then return recv_chunked(s, partial, maxlen) end -- 3. If a Content-Length header field (section 14.13) is present, its decimal -- value in OCTETs represents both the entity-length and the -- transfer-length. The Content-Length header field MUST NOT be sent if -- these two lengths are different (i.e., if a Transfer-Encoding header -- field is present). If a message is received with both a -- Transfer-Encoding header field and a Content-Length header field, the -- latter MUST be ignored. if response.header["content-length"] and not response.header["transfer-encoding"] then local content_length = tonumber(response.header["content-length"]) if not content_length then return nil, string.format("Content-Length %q is non-numeric", response.header["content-length"]) end return recv_length(s, content_length, partial, maxlen) end -- 4. If the message uses the media type "multipart/byteranges", and the -- transfer-length is not otherwise specified, then this self-delimiting -- media type defines the transfer-length. [sic] -- Case 4 is unhandled. -- 5. By the server closing the connection. return recv_all(s, partial, maxlen) end -- Sets response["status-line"], response.status, and response.version. local function parse_status_line(status_line, response) response["status-line"] = status_line local version, status, reason_phrase = string.match(status_line, "^HTTP/(%d+%.%d+) +(%d+)%f[ \r\n] *(.-)\r?\n$") if not version then return nil, string.format("Error parsing status-line %q.", status_line) end -- We don't have a use for the reason_phrase; ignore it. response.version = version response.status = tonumber(status) if not response.status then return nil, string.format("Status code is not numeric: %s", status) end return true end local parse_set_cookie -- defined farther down -- Sets response.header and response.rawheader. local function parse_header(header, response) local pos local name, words local s, e response.header = {} response.rawheader = stringaux.strsplit("\r?\n", header) pos = 1 while pos <= #header do -- Get the field name. e, name = get_token(header, pos) -- Do not bail out if the header is malformed. Consume the header line -- anyway, getting to the next header, but do not create a new entry in -- the "header" table. if e then if header:sub(e, e) ~= ":" then name = nil end pos = e + 1 end -- Skip initial space. pos = skip_lws(header, pos) -- Get non-space words separated by LWS, then join them with a single space. words = {} while pos <= #header and not string.match(header, "^\r?\n", pos) do s = pos while not string.match(header, "^[ \t]", pos) and not string.match(header, "^\r?\n", pos) do pos = pos + 1 end words[#words + 1] = string.sub(header, s, pos - 1) pos = skip_lws(header, pos) end if name then -- Set it in our table. name = string.lower(name) local value = table.concat(words, " ") if response.header[name] then -- TODO: delay concatenation until return to avoid resource exhaustion response.header[name] = response.header[name] .. ", " .. value else response.header[name] = value end -- Update the cookie table if this is a Set-Cookie header if name == "set-cookie" then local cookie, err = parse_set_cookie(value) if cookie then response.cookies[#response.cookies + 1] = cookie else -- Ignore any cookie parsing error end end end -- Next field, or end of string. (If not it's an error.) s, e = string.find(header, "^\r?\n", pos) if not e then return nil, string.format("Header field named %q didn't end with CRLF", name) end pos = e + 1 end return true end -- Parse the contents of a Set-Cookie header field. -- The result is a table of the form -- -- { name = "NAME", value = "VALUE", Comment = "...", Domain = "...", ... } -- -- Every key except "name" and "value" is optional. -- -- This function attempts to support the header parser defined in RFC 6265, -- Section 5.2. -- -- This parser used to support quoted strings for cookie and attribute values -- but this behavior was breaking interoperability. parse_set_cookie = function (s) local name, value local _, pos local cookie = {} s = s:gsub(";", "; ") -- Get the NAME=VALUE part. _, pos, cookie.name, cookie.value = s:find("^[ \t]*(.-)[ \t]*=[ \t]*(.-)[ \t]*%f[;\0]") if not (cookie.name or ""):find("^[^;]+$") then return nil, "Can't get cookie name." end pos = pos + 1 -- Loop over the attributes. while s:sub(pos, pos) == ";" do _, pos, name = s:find("[ \t]*(.-)[ \t]*%f[=;\0]", pos + 1) pos = pos + 1 if s:sub(pos, pos) == "=" then _, pos, value = s:find("[ \t]*(.-)[ \t]*%f[;\0]", pos + 1) pos = pos + 1 else value = "" end name = name:lower() if not (name == "" or name == "name" or name == "value") then cookie[name] = value end end return cookie end --- Attempt to repeatedly decode HTTP response body according to a given list -- of named encodings. -- -- @param body A string representing the raw, undecoded response body. -- @param encodings A list of encodings (string or table) -- @param maxlen A size limit for the decoded body -- @return A decoded body -- @return A list of encodings that were successfully applied -- @return A list of encodings that remain to be applied to decode the body -- completely. -- @return Error string (if any) -- @return Partially decoded body. For corrupted encoding, this is the body -- still undecoded. For oversized body, this is a portion of the decoded -- body, up to the size limit. local decode_body = function (body, encodings, maxlen) if not encodings then return body end if type(encodings) == "string" then encodings = stringaux.strsplit("%W+", encodings) end assert(type(encodings) == "table", "Invalid encoding specification") local decoded = {} local undecoded = tableaux.tcopy(encodings) while #undecoded > 0 do local enc = undecoded[1]:lower() if enc == "identity" then -- do nothing table.insert(decoded, table.remove(undecoded, 1)) elseif enc == "gzip" and have_zlib then local stream = zlib.inflate(body) local status, newbody = pcall(stream.read, stream, maxlen and (maxlen + 1) or "*a") stream:close() if not status then return nil, decoded, undecoded, ("Corrupted Content-Encoding: " .. enc), body end table.insert(decoded, table.remove(undecoded, 1)) newbody = newbody or "" if maxlen and #newbody > maxlen then return nil, decoded, undecoded, ERR_OVERSIZED_BODY, newbody:sub(1, maxlen) end body = newbody else stdnse.debug1("Unsupported Content-Encoding: %s", enc) break end end return body, decoded, undecoded end -- Read one response from the socket s
and return it after -- parsing. -- -- In case of an error, an error message and a partially received response -- (if any) are returned as additional values. local function next_response(s, method, partial, options) local response local status_line, header, body, fragment local status, err partial = partial or "" response = { status=nil, ["status-line"]=nil, header={}, rawheader={}, cookies={}, rawbody="", body="", truncated = nil } status_line, partial = recv_line(s, partial) if not status_line then return nil, partial, response end status, err = parse_status_line(status_line, response) if not status then return nil, err, response end header, partial = recv_header(s, partial) if not header then return nil, partial, response end status, err = parse_header(header, response) if not status then return nil, err, response end options = options or {} local maxlen = math.floor(options.max_body_size or MAX_BODY_SIZE) if maxlen < 0 then maxlen = nil end local truncated_ok = options.truncated_ok if truncated_ok == nil then truncated_ok = TRUNCATED_OK end body, partial, fragment = recv_body(s, response, method, partial, maxlen) response.rawbody = body or fragment response.body = response.rawbody if not body then if partial ~= ERR_OVERSIZED_BODY then return nil, partial, response end response.truncated = true if not truncated_ok then return nil, ("Received " .. ERR_OVERSIZED_BODY), response end end if response.header["content-encoding"] then local dcd, undcd body, dcd, undcd, err, fragment = decode_body(body, response.header["content-encoding"], maxlen) response.body = body or fragment response.decoded = dcd response.undecoded = undcd if not body then if err ~= ERR_OVERSIZED_BODY then return nil, err, response end response.truncated = true if not truncated_ok then return nil, ("Decoded " .. ERR_OVERSIZED_BODY), response end else if response.header["content-length"] then response.header["content-length"] = tostring(#body) end end response.header["content-encoding"] = #undcd > 0 and table.concat(undcd, ", ") or nil end return response, partial end --- Tries to extract the max number of requests that should be made on -- a keep-alive connection based on "Keep-Alive: timeout=xx,max=yy" response -- header. -- -- If the value is not available, an arbitrary value is used. If the connection -- is not explicitly closed by the server, this same value is attempted. -- -- @param response The HTTP response table -- @return The max number of requests on a keep-alive connection local function get_pipeline_limit(response) -- Allow users to override this with a script-arg local pipeline = tonumber(stdnse.get_script_args({'http.pipeline', 'pipeline'})) if pipeline then return pipeline end if response then local hdr = response.header or {} local opts = stringaux.strsplit("[,%s]+", (hdr.connection or ""):lower()) if tableaux.contains(opts, "close") then return 1 end if response.version >= "1.1" or tableaux.contains(opts, "keep-alive") then return 1 + (tonumber((hdr["keep-alive"] or ""):match("max=(%d+)")) or 39) end end return 1 end --- Builds a string to be added to the request mod_options table -- -- @param cookies A cookie jar just like the table returned by parse_set_cookie. -- @param path If the argument exists, only cookies with this path are included in the request -- @return A string to be added to the mod_options table local function buildCookies(cookies, path) if type(cookies) == 'string' then return cookies end local cookie = {} for _, ck in ipairs(cookies or {}) do local ckpath = ck["path"] if not path or not ckpath or ckpath == path or ckpath:sub(-1) == "/" and ckpath == path:sub(1, ckpath:len()) or ckpath .. "/" == path:sub(1, ckpath:len()+1) then cookie[#cookie+1] = ck["name"] .. "=" .. ck["value"] end end return table.concat(cookie, "; ") end -- HTTP cache. -- Cache of GET and HEAD requests. Uses <"host:port:path", record>. -- record is in the format: -- result: The result from http.get or http.head -- last_used: The time the record was last accessed or made. -- get: Was the result received from a request to get or recently wiped? -- size: The size of the record, equal to #record.result.body. local cache = {size = 0}; local function cmp_last_used (r1, r2) return (r1.last_used or 0) < (r2.last_used or 0); end local arg_max_cache_size = tonumber(stdnse.get_script_args({'http.max-cache-size', 'http-max-cache-size'}) or 1e6); local function check_size (cache) local size = cache.size; if size > arg_max_cache_size then stdnse.debug1( "Current http cache size (%d bytes) exceeds max size of %d", size, arg_max_cache_size); table.sort(cache, cmp_last_used); for i, record in ipairs(cache) do if size <= arg_max_cache_size then break end local result = record.result; if type(result.body) == "string" then size = size - record.size; record.size, record.get, result.body = 0, false, ""; end end cache.size = size; end stdnse.debug2("Final http cache size (%d bytes) of max size of %d", size, arg_max_cache_size); return size; end -- Unique value to signal value is being retrieved. -- Also holds <mutex, thread=""> pairs, working thread is value local WORKING = setmetatable({}, {__mode = "v"}); local function lookup_cache (method, host, port, path, options) if(not(validate_options(options))) then return nil end options = options or {}; local bypass_cache = options.bypass_cache; -- do not lookup local no_cache = options.no_cache; -- do not save result local no_cache_body = options.no_cache_body; -- do not save body if type(port) == "table" then port = port.number end local key = ascii_hostname(host)..":"..port..":"..path; local mutex = nmap.mutex(tostring(lookup_cache)..key); local state = { mutex = mutex, key = key, method = method, bypass_cache = bypass_cache, no_cache = no_cache, no_cache_body = no_cache_body, }; while true do mutex "lock"; local record = cache[key]; if bypass_cache or record == nil or method ~= record.method then WORKING[mutex] = coroutine.running(); cache[key], state.old_record = WORKING, record; return nil, state; elseif record == WORKING then local working = WORKING[mutex]; if working == nil or coroutine.status(working) == "dead" then -- thread died before insert_cache could be called cache[key] = nil; -- reset end mutex "done"; else mutex "done"; record.last_used = os.time(); return tableaux.tcopy(record.result), state; end end end local function response_is_cacheable(response) -- if response.status is nil, then an error must have occurred during the request -- and we probably don't want to cache the response if not response.status then return false end -- 206 Partial Content. RFC 2616, 1.34: "...a cache that does not support the -- Range and Content-Range headers MUST NOT cache 206 (Partial Content) -- responses." if response.status == 206 then return false end -- RFC 2616, 13.4. "A response received with any [status code other than 200, -- 203, 206, 300, 301 or 410] (e.g. status codes 302 and 307) MUST NOT be -- returned in a reply to a subsequent request unless there are cache-control -- directives or another header(s) that explicitly allow it." -- We violate the standard here and allow these other codes to be cached, -- with the exceptions listed below. -- 401 Unauthorized. Caching this would prevent us from retrieving it later -- with the correct credentials. if response.status == 401 then return false end -- It is not desirable to cache a truncated response because it could poison -- subsequent requests with different options max-body-size or truncated_ok. if response.truncated then return false end return true end local function insert_cache (state, response) local key = assert(state.key); local mutex = assert(state.mutex); if response == nil or state.no_cache or not response_is_cacheable(response) then cache[key] = state.old_record; else local record = { result = tableaux.tcopy(response), last_used = os.time(), method = state.method, size = type(response.body) == "string" and #response.body or 0, }; response = record.result; -- only modify copy cache[key], cache[#cache+1] = record, record; if state.no_cache_body then response.body = ""; end if type(response.body) == "string" then cache.size = cache.size + #response.body; check_size(cache); end end mutex "done"; end -- Return true if the given method requires a body in the request. In case no -- body was supplied we must send "Content-Length: 0". local function request_method_needs_content_length(method) return method == "POST" end -- For each of the following request functions, host
may either be -- a string or a table, and port
may either be a number or a -- table. -- -- The format of the return value is a table with the following structure: -- {status = 200, status-line = "HTTP/1.1 200 OK", header = {}, rawheader = {}, body ="..."} -- The header table has an entry for each received header with the header name -- being the key. The table also has an entry named "status" which contains the -- http status code of the request. -- In case of an error, the status is nil, status-line describes the problem, -- and member "incomplete" contains a partially received response (if any). local function http_error(status_line, response) stdnse.debug2("HTTP response error: %s", status_line) return { status = nil, ["status-line"] = status_line, header = {}, rawheader = {}, body = nil, rawbody = nil, truncated = nil, incomplete = response } end --- Build an HTTP request from parameters and return it as a string. -- -- @param host The host this request is intended for. -- @param port The port this request is intended for. -- @param method The method to use. -- @param path The path for the request. -- @param options A table of options, which may include the keys: -- * header
: A table containing additional headers to be used for the request. -- * content
: The content of the message (content-length will be added -- set header['Content-Length'] to override) -- * cookies
: A table of cookies in the form returned by parse_set_cookie
. -- * auth
: A table containing the keys username
and password
. -- @return A request string. -- @see generic_request local function build_request(host, port, method, path, options) if(not(validate_options(options))) then return nil end options = options or {} -- Private copy of the options table, used to add default header fields. local mod_options = { header = { Connection = "close", Host = get_host_field(host, port, options.scheme), ["User-Agent"] = USER_AGENT } } if options.cookies then local cookies = buildCookies(options.cookies, path) if #cookies > 0 then mod_options.header["Cookie"] = cookies end end if options.auth and not (options.auth.digest or options.auth.ntlm) then local username = options.auth.username local password = options.auth.password local credentials = "Basic " .. base64.enc(username .. ":" .. password) mod_options.header["Authorization"] = credentials end if options.digestauth then local order = {"username", "realm", "nonce", "digest-uri", "algorithm", "response", "qop", "nc", "cnonce"} local no_quote = {algorithm=true, qop=true, nc=true} local creds = {} for _,k in ipairs(order) do local v = options.digestauth[k] if v then if no_quote[k] then table.insert(creds, ("%s=%s"):format(k,v)) else if k == "digest-uri" then table.insert(creds, ('%s="%s"'):format("uri",v)) else table.insert(creds, ('%s="%s"'):format(k,v)) end end end end local credentials = "Digest "..table.concat(creds, ", ") mod_options.header["Authorization"] = credentials end if options.ntlmauth then mod_options.header["Authorization"] = "NTLM " .. base64.enc(options.ntlmauth) end local body -- Build a form submission from a table, like "k1=v1&k2=v2". if type(options.content) == "table" then local parts = {} for k, v in pairs(options.content) do parts[#parts + 1] = url.escape(k) .. "=" .. url.escape(v) end body = table.concat(parts, "&") mod_options.header["Content-Type"] = "application/x-www-form-urlencoded" elseif options.content then body = options.content elseif request_method_needs_content_length(method) then body = "" end if body then mod_options.header["Content-Length"] = #body end -- Add any other header fields into the local copy. table_augment(mod_options, options) -- We concat this string manually to allow null bytes in requests local request_line = method.." "..path.." HTTP/1.1" local header = {} for name, value in pairs(mod_options.header) do -- we concat this string manually to allow null bytes in requests header[#header + 1] = name..": "..value end return request_line .. "\r\n" .. table.concat(header, "\r\n") .. "\r\n\r\n" .. (body or "") end --- A wrapper for comm.tryssl that strictly obeys options.scheme. If it is -- "https" then only SSL connection is attempted. If "http" then there is no -- HTTPS fallback. local function do_connect(host, port, data, options) if options.scheme == "https" or options.scheme == "http" then -- If the scheme is specifically requested (e.g. -- get_url("https://example.com")) then don't fall back. return comm.opencon(host, port, data, { timeout = options.timeout, any_af = options.any_af, proto = (options.scheme == "https" and "ssl" or "tcp"), }) end return comm.tryssl(host, port, data, {timeout = options.timeout, any_af = options.any_af}) end --- Send a string to a host and port and return the HTTP result. This function -- is like generic_request
, to be used when you have a ready-made -- request, not a collection of request parameters. -- -- @param host The host to connect to. -- @param port The port to connect to. -- @param options A table of other parameters. It may have any of these fields: -- * timeout
: A timeout used for socket operations. -- * header
: A table containing additional headers to be used for the request. -- * content
: The content of the message (content-length will be added -- set header['Content-Length'] to override) -- * cookies
: A table of cookies in the form returned by parse_set_cookie
. -- * auth
: A table containing the keys username
and password
. -- @return A response table, see module documentation for description. -- @see generic_request local function request(host, port, data, options) if(not(validate_options(options))) then return http_error("Options failed to validate.") end local method local response options = options or {} if type(port) == 'table' then if port.protocol and port.protocol ~= 'tcp' then stdnse.debug1("http.request() supports the TCP protocol only, your request to %s cannot be completed.", host) return http_error("Unsupported protocol.") end end method = string.match(data, "^(%S+)") local socket, partial, opts = do_connect(host, port, data, options) if not socket then stdnse.debug1("http.request socket error: %s", partial) return http_error("Error creating socket.") end repeat local incomplete response, partial, incomplete = next_response(socket, method, partial, options) if not response then return http_error("Error in next_response function; " .. partial, incomplete) end -- See RFC 2616, sections 8.2.3 and 10.1.1, for the 100 Continue status. -- Sometimes a server will tell us to "go ahead" with a POST body before -- sending the real response. If we got one of those, skip over it. until not (response.status >= 100 and response.status <= 199) socket:close() -- if SSL was used to retrieve the URL mark this in the response response.ssl = ( opts == 'ssl' ) return response end ---Do a single request with a given method. The response is returned as the standard -- response table (see the module documentation). -- -- The get
, head
, and post
functions are simple -- wrappers around generic_request
. -- -- Any 1XX (informational) responses are discarded. -- -- @param host The host to connect to. -- @param port The port to connect to. -- @param method The method to use; for example, 'GET', 'HEAD', etc. -- @param path The path to retrieve. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above). -- @return A response table, see module documentation for description. -- @see request function generic_request(host, port, method, path, options) if(not(validate_options(options))) then return http_error("Options failed to validate.") end local digest_auth = options and options.auth and options.auth.digest local ntlm_auth = options and options.auth and options.auth.ntlm if (digest_auth or ntlm_auth) and not have_ssl then stdnse.debug1("http: digest and ntlm auth require openssl.") end if digest_auth and have_ssl then -- If we want to do digest authentication, we have to make an initial -- request to get realm, nonce and other fields. local options_with_auth_removed = tableaux.tcopy(options) options_with_auth_removed["auth"] = nil local r = generic_request(host, port, method, path, options_with_auth_removed) local h = r.header['www-authenticate'] if not (r.status and h and h:lower():find("digest.-realm")) then stdnse.debug1("http: the target doesn't support digest auth or there was an error during request.") return http_error("The target doesn't support digest auth or there was an error during request.") end -- Compute the response hash local dmd5 = sasl.DigestMD5:new(h, options.auth.username, options.auth.password, method, path) local _, digest_table = dmd5:calcDigest() options.digestauth = digest_table end if ntlm_auth and have_ssl then local custom_options = tableaux.tcopy(options) -- to be sent with the type 1 request custom_options["auth"] = nil -- removing the auth options -- let's check if the target supports ntlm with a simple get request. -- Setting a timeout here other than nil messes up the authentication if this is the first device sending -- a request to the server. Don't know why. custom_options.timeout = nil local response = generic_request(host, port, method, path, custom_options) local authentication_header = response.header['www-authenticate'] -- get back the timeout option. custom_options.timeout = options.timeout -- cannot deal with truncated responses here custom_options.truncated_ok = false custom_options.header = options.header or {} custom_options.header["Connection"] = "Keep-Alive" -- Keep-Alive headers are needed for authentication. if (not authentication_header) or (not response.status) or (not string.find(authentication_header:lower(), "ntlm")) then stdnse.debug1("http: the target doesn't support NTLM or there was an error during request.") return http_error("The target doesn't support NTLM or there was an error during request.") end -- ntlm works with three messages. we send a request, it sends -- a challenge, we respond to the challenge. local hostname = options.auth.hostname or "localhost" -- the hostname to be sent local workstation_name = options.auth.workstation_name or "NMAP" -- the workstation name to be sent local username = options.auth.username -- the username as specified local auth_blob = "NTLMSSP\x00" .. -- NTLM signature "\x01\x00\x00\x00" .. -- NTLM Type 1 message string.pack("<i4", 1="" 40="" 0xa208b207)="" ..="" --="" flags="" 56,="" 128,="" version,="" extended="" security,="" always="" sign,="" workstation="" supplied,="" domain="" ntlm="" key,="" oem,="" unicode="" string.pack("<i2i2i4="" i2i2i4",#workstation_name,="" #workstation_name,="" +="" #hostname,="" 40)="" supplied="" and="" string.pack("bb<i2",="" os="" version="" info="" 5,="" 1,="" 2600)="" 5.1.2600="" "\x00\x00\x00\x0f"="" end="" (static="" 0x0000000f)="" hostname..="" host="" name="" workstation_name="" --workstation="" custom_options.ntlmauth="auth_blob" check="" if="" the="" protocol="" is="" tcp="" type(port)="=" 'table'="" then="" port.protocol="" ~="tcp" stdnse.debug1("ntlm="" authentication="" supports="" only,="" your="" request="" to="" %s="" cannot="" be="" completed.",="" host)="" return="" http_error("unsupported="" protocol.")="" sends="" type="" message.="" local="" socket,="" partial,="" opts="do_connect(host," port,="" build_request(host,="" method,="" path,="" custom_options),="" options)="" not="" socket="" http_error("could="" create="" send="" message.")="" repeat="" incomplete="" response,="" custom_options)="" response="" http_error("there="" was="" error="" in="" receiving="" of="" message;="" "="" incomplete)="" until="" (response.status="">= 100 and response.status <= 199) authentication_header = response.header['www-authenticate'] -- take out the challenge local type2_response = authentication_header:sub(authentication_header:find(' ')+1, -1) local _, message_type, _, _, _, flags_received, challenge= string.unpack(", flags, and OS Version structure are all present. auth_blob = string.pack("= 100 and response.status <= 199) socket:close() response.ssl = ( opts == 'ssl' ) return response end return request(host, port, build_request(host, port, method, path, options), options) end ---Uploads a file using the PUT method and returns a result table. This is a simple wrapper -- around generic_request
-- -- @param host The host to connect to. -- @param port The port to connect to. -- @param path The path to retrieve. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above). -- @param putdata The contents of the file to upload -- @return A response table, see module documentation for description. -- @see http.generic_request function put(host, port, path, options, putdata) if(not(validate_options(options))) then return http_error("Options failed to validate.") end if ( not(putdata) ) then return http_error("No file to PUT.") end local mod_options = { content = putdata, } table_augment(mod_options, options or {}) return generic_request(host, port, "PUT", path, mod_options) end local function domain (h) return (h:match("%..+%..+") or h):lower() end -- A battery of tests a URL is subjected to in order to decide if it may be -- redirected to. local redirect_ok_rules = { -- Check if there's any credentials in the url function (url, host, port) -- bail if userinfo is present return not url.userinfo end, -- Check if the location is within the domain or host -- -- Notes: -- * A domain match must be exact and at least a second-level domain -- * ccTLDs are not treated as such. The rule will not stop a redirect -- from foo.co.uk to bar.co.uk even though it logically should. function (url, host, port) local hostname = ascii_hostname(host) if hostname == host.ip then return url.host == hostname end return domain(hostname) == domain(url.host) end, -- Check whether the new location has the same port number function (url, host, port) -- port fixup, adds default ports 80 and 443 in case no url.port was -- defined, we do this based on the url scheme local url_port = url.port or get_default_port(url.scheme) if not url_port or url_port == port.number then return true end return false end, -- Check whether the url.scheme matches the port.service function (url, host, port) -- if url.scheme is present then it must match the scanned port if url.scheme and url.port then return true end if url.scheme and url.scheme ~= port.service then return false end return true end, -- make sure we're actually being redirected somewhere and not to the same url function (url, host, port) -- url.path must be set if returning true -- path cannot be unchanged unless host has changed -- TODO: Since we do not know here what the actual old path was then -- the effectiveness of this code is a bit unclear. if not url.path then return false end if url.path == "/" and url.host == (host.targetname or host.ip) then return false end return true end, } --- Provides the default behavior for HTTP redirects. -- -- Redirects will be followed unless they: -- * contain credentials -- * are on a different domain or host -- * have a different port number or URI scheme -- * redirect to the same URI -- * exceed the maximum number of redirects specified -- @param host table as received by the action function -- @param port table as received by the action function -- @param counter number of redirects to follow. -- @return a default closure suitable for option "redirect_ok" function redirect_ok(host, port, counter) -- convert a numeric port to a table if ( "number" == type(port) ) then port = { number = port } end return function(url) if ( counter == 0 ) then return false end counter = counter - 1 for i, rule in ipairs( redirect_ok_rules ) do if ( not(rule( url, host, port )) ) then --stdnse.debug1("Rule failed: %d", i) return false end end return true end end --- Handles a HTTP redirect -- @param host table as received by the script action function -- @param port table as received by the script action function -- @param path string -- @param response table as returned by http.get or http.head -- @return url table as returned by url.parse
or nil if there's no -- redirect taking place function parse_redirect(host, port, path, response) if ( not(tostring(response.status):match("^30[01237]$")) or not(response.header) or not(response.header.location) ) then return nil end port = ( "number" == type(port) ) and { number = port } or port local u = url.parse(response.header.location) if ( not(u.host) ) then -- we're dealing with a relative url u.host = ascii_hostname(host) end -- do port fixup u.port = u.port or get_default_port(u.scheme) or port.number if ( not(u.path) ) then u.path = "/" end u.path = url.absolute(path, u.path) u.path = url.build({ path = u.path, query = u.query, params = u.params, }) return u end local ret_false = function () return false end -- Retrieves the correct function to use to validate HTTP redirects -- @param host table as received by the action function -- @param port table as received by the action function -- @param options table as passed to http.get or http.head -- @return redirect_ok function used to validate HTTP redirects local function get_redirect_ok(host, port, options) if ( options ) then if ( options.redirect_ok == false ) then return ret_false elseif( "function" == type(options.redirect_ok) ) then return options.redirect_ok(host, port) elseif( type(options.redirect_ok) == "number") then return redirect_ok(host, port, options.redirect_ok) else return redirect_ok(host, port, MAX_REDIRECT_COUNT) end else return redirect_ok(host, port, MAX_REDIRECT_COUNT) end end ---Fetches a resource with a GET request and returns the result as a table. -- -- This is a simple wrapper around generic_request
, with the added -- benefit of having local caching and support for HTTP redirects. Redirects -- are followed only if they pass all the validation rules of the redirect_ok -- function. This function may be overridden by supplying a custom function in -- the redirect_ok
field of the options array. The default -- function redirects the request if the destination is: -- * Within the same host or domain -- * Has the same port number -- * Stays within the current scheme -- * Does not exceed MAX_REDIRECT_COUNT
count of redirects -- -- Caching and redirects can be controlled in the options
array, -- see module documentation for more information. -- -- @param host The host to connect to. -- @param port The port to connect to. -- @param path The path to retrieve. -- @param options [optional] A table that lets the caller control socket -- timeouts, HTTP headers, and other parameters. For full -- documentation, see the module documentation (above). -- @return A response table, see module documentation for description. -- @see http.generic_request function get(host, port, path, options) if(not(validate_options(options))) then return http_error("Options failed to validate.") end options = options or {} local redir_check = get_redirect_ok(host, port, options) local response, state, location local u = { host = host, port = port, path = path } repeat response, state = lookup_cache("GET", u.host, u.port, u.path, options); if ( response == nil ) then response = generic_request(u.host, u.port, "GET", u.path, options) insert_cache(state, response); end u = parse_redirect(host, port, path, response) if ( not(u) ) then break end -- Allow redirect to change scheme (e.g. redirect to https) options.scheme = u.scheme or options.scheme location = location or {} table.insert(location, response.header.location) until( not(redir_check(u)) ) response.location = location return response end ---Parses a URL and calls http.get
with the result. The URL can contain -- all the standard fields, protocol://host:port/path -- -- @param u The URL of the host. -- @param options [optional] A table that lets the caller control socket timeouts, HTTP headers, and other parameters. For full documentation, see the module documentation (above). -- @return A response table, see module documentation for description. -- @see http.get function get_url( u, options ) if(not(validate_options(options))) then return http_error("Options failed to validate.") end options = options or {} local parsed = url.parse( u ) local port = {} port.service = parsed.scheme port.number = parsed.port or get_default_port(parsed.scheme) or 80 options.scheme = options.scheme or parsed.scheme local path = parsed.path or "/" if parsed.query then path = path .. "?" .. parsed.query end return get( parsed.ascii_host or parsed.host, port, path, options ) end ---Fetches a resource with a HEAD request. -- -- Like get
, this is a simple wrapper around -- generic_request
with response caching. This function also has -- support for HTTP redirects. Redirects are followed only if they pass all the -- validation rules of the redirect_ok function. This function may be -- overridden by supplying a custom function in the redirect_ok
-- field of the options array. The default function redirects the request if -- the destination is: -- * Within the same host or domain -- * Has the same port number -- * Stays within the current scheme -- * Does not exceed MAX_REDIRECT_COUNT
count of redirects -- -- Caching and redirects can be controlled in the options
array, -- see module documentation for more information. -- -- @param host The host to connect to. -- @param port The port to connect to. -- @param path The path to retrieve. -- @param options [optional] A table that lets the caller control socket -- timeouts, HTTP headers, and other parameters. For full -- documentation, see the module documentation (above). -- @return A response table, see module documentation for description. -- @see http.generic_request function head(host, port, path, options) if(not(validate_options(options))) then return http_error("Options failed to validate.") end options = options or {} local redir_check = get_redirect_ok(host, port, options) local response, state, location local u = { host = host, port = port, path = path } repeat response, state = lookup_cache("HEAD", u.host, u.port, u.path, options); if response == nil then response = generic_request(u.host, u.port, "HEAD", u.path, options) insert_cache(state, response); end u = parse_redirect(host, port, path, response) if ( not(u) ) then break end -- Allow redirect to change scheme (e.g. redirect to https) options.scheme = u.scheme or options.scheme location = location or {} table.insert(location, response.header.location) until( not(redir_check(u)) ) response.location = location return response end ---Fetches a resource with a POST request. -- -- Like get
, this is a simple wrapper around -- generic_request
except that postdata is handled properly. -- -- @param host The host to connect to. -- @param port The port to connect to. -- @param path The path to retrieve. -- @param options [optional] A table that lets the caller control socket -- timeouts, HTTP headers, and other parameters. For full -- documentation, see the module documentation (above). -- @param ignored Ignored for backwards compatibility. -- @param postdata A string or a table of data to be posted. If a table, the -- keys and values must be strings, and they will be encoded -- into an application/x-www-form-encoded form submission. -- @return A response table, see module documentation for description. -- @see http.generic_request function post( host, port, path, options, ignored, postdata ) if(not(validate_options(options))) then return http_error("Options failed to validate.") end local mod_options = { content = postdata, } table_augment(mod_options, options or {}) return generic_request(host, port, "POST", path, mod_options) end -- Deprecated pipeline functions function pGet( host, port, path, options, ignored, allReqs ) stdnse.debug1("WARNING: pGet() is deprecated. Use pipeline_add() instead.") return pipeline_add(path, options, allReqs, 'GET') end function pHead( host, port, path, options, ignored, allReqs ) stdnse.debug1("WARNING: pHead() is deprecated. Use pipeline_add instead.") return pipeline_add(path, options, allReqs, 'HEAD') end function addPipeline(host, port, path, options, ignored, allReqs, method) stdnse.debug1("WARNING: addPipeline() is deprecated! Use pipeline_add instead.") return pipeline_add(path, options, allReqs, method) end function pipeline(host, port, allReqs) stdnse.debug1("WARNING: pipeline() is deprecated. Use pipeline_go() instead.") return pipeline_go(host, port, allReqs) end ---Adds a pending request to the HTTP pipeline. -- -- The HTTP pipeline is a set of requests that will all be sent at the same -- time, or as close as the server allows. This allows more efficient code, -- since requests are automatically buffered and sent simultaneously. -- -- The all_requests
argument contains the current list of queued -- requests (if this is the first time calling pipeline_add
, it -- should be nil
). After adding the request to end of the queue, -- the queue is returned and can be passed to the next -- pipeline_add
call. -- -- When all requests have been queued, call pipeline_go
with the -- all_requests table that has been built. -- -- @param path The path to retrieve. -- @param options [optional] A table that lets the caller control socket -- timeouts, HTTP headers, and other parameters. For full -- documentation, see the module documentation (above). -- @param all_requests [optional] The current pipeline queue (returned from a -- previous add_pipeline
call), or nil if it's -- the first call. -- @param method [optional] The HTTP method ('GET', 'HEAD', 'POST', etc). -- Default: 'GET'. -- @return Table with the pipeline requests (plus this new one) -- @see http.pipeline_go function pipeline_add(path, options, all_requests, method) if not validate_options(options) then return nil end options = tableaux.tcopy(options or {}) method = method or 'GET' all_requests = all_requests or {} table.insert(all_requests, {method=method, path=path, options=options}) return all_requests end ---Makes sure that a given header is set to a given value. Any existing values -- of this header are removed. -- -- @param headers A table of existing headers or nil. -- @param header to set -- @param value to set the header to -- @return An in-place modified table of headers local function force_header (headers, header, value) local headers = headers or {} local header_lc = header:lower() for h in pairs(headers) do if h:lower() == header_lc then headers[h] = nil end end headers[header] = value return headers end local pipeline_comm_opts = {recv_before=false, request_timeout=10000} ---Performs all queued requests in the all_requests variable (created by the -- pipeline_add
function). -- -- Returns an array of responses, each of which is a table as defined in the -- module documentation above. -- -- @param host The host to connect to. -- @param port The port to connect to. -- @param all_requests A table with all the previously built pipeline requests -- @return A list of responses, in the same order as the requests were queued. -- Each response is a table as described in the module documentation. -- The response list may be either nil or shorter than expected (up to -- and including being completely empty) due to communication issues or -- other errors. function pipeline_go(host, port, all_requests) local responses = {} -- Check for an empty set if (not all_requests or #all_requests == 0) then stdnse.debug1("Warning: empty set of requests passed to http.pipeline_go()") return responses end stdnse.debug1("HTTP pipeline: Total number of requests: %d", #all_requests) -- We'll try a first request with keep-alive, just to check if the server -- supports it and how many requests we can send into one socket local req = all_requests[1] req.options.header = force_header(req.options.header, "Connection", "keep-alive") local reqstr = build_request(host, port, req.method, req.path, req.options) local socket, partial, bopt = comm.tryssl(host, port, reqstr, tableaux.tcopy(pipeline_comm_opts)) if not socket then return nil end local resp resp, partial = next_response(socket, req.method, partial, req.options) if not resp then return responses end table.insert(responses, resp) local connsent = 1 -- how many requests to send on one connection local connlimit = get_pipeline_limit(resp) -- how many requests should be sent in a single batch local batchlimit = tonumber(stdnse.get_script_args("http.max-pipeline")) or connlimit stdnse.debug3("HTTP pipeline: connlimit=%d, batchlimit=%d", connlimit, batchlimit) while #responses < #all_requests do -- reconnect if necessary if connsent >= connlimit or resp.truncated or not socket:get_info() then socket:close() stdnse.debug3("HTTP pipeline: reconnecting") socket:connect(host, port, bopt) if not socket then return nil end socket:set_timeout(pipeline_comm_opts.request_timeout) partial = "" connsent = 0 end if connlimit > connsent + #all_requests - #responses then connlimit = connsent + #all_requests - #responses end -- determine the current batch size local batchsize = connlimit - connsent if batchsize > batchlimit then batchsize = batchlimit end stdnse.debug3("HTTP pipeline: batch=%d, conn=%d/%d, resp=%d/%d", batchsize, connsent, connlimit, #responses, #all_requests) -- build and send a batch of requests local requests = {} for i = 1, batchsize do local req = all_requests[#responses + i] local connmode = connsent + i < connlimit and "keep-alive" or "close" req.options.header = force_header(req.options.header, "Connection", connmode) table.insert(requests, build_request(host, port, req.method, req.path, req.options)) end socket:send(table.concat(requests)) -- receive batch responses for i = 1, batchsize do local req = all_requests[#responses + 1] resp, partial = next_response(socket, req.method, partial, req.options) if not resp then stdnse.debug3("HTTP pipeline: response[%d]: %s", #responses + 1, partial) connlimit = connsent + i - 1 if connlimit == 0 then stdnse.debug1("HTTP pipeline: First request on a new connection failed; giving up."); return responses end stdnse.debug1("HTTP pipeline: Received only %d of %d batch responses.\nDecreasing connection limit to %d.", i - 1, batchsize, connlimit) break end table.insert(responses, resp) if resp.truncated then break end end connsent = connsent + batchsize end socket:close() stdnse.debug1("HTTP pipeline: Number of received responses: %d", #responses) return responses end -- Parsing of specific headers. skip_space and the read_* functions return the -- byte index following whatever they have just read, or nil on error. -- Skip whitespace (that has already been folded from LWS). See RFC 2616, -- section 2.2, definition of LWS. local function skip_space(s, pos) local _ _, pos = string.find(s, "^[ \t]*", pos) return pos + 1 end -- See RFC 2616, section 2.2. local function read_token(s, pos) local _, token pos = skip_space(s, pos) -- 1*. CHAR is only byte values 0-127. _, pos, token = string.find(s, "^([^\0\001-\031()<>@,;:\\\"/?={} \t%[%]\127-\255]+)", pos) if token then return pos + 1, token else return nil end end -- See RFC 2616, section 2.2. Here we relax the restriction that TEXT may not -- contain CTLs. local function read_quoted_string(s, pos) local chars = {} if string.sub(s, pos, pos) ~= "\"" then return nil end pos = pos + 1 pos = skip_space(s, pos) while pos <= #s and string.sub(s, pos, pos) ~= "\"" do local c c = string.sub(s, pos, pos) if c == "\\" then if pos < #s then pos = pos + 1 c = string.sub(s, pos, pos) else return nil end end chars[#chars + 1] = c pos = pos + 1 end if pos > #s or string.sub(s, pos, pos) ~= "\"" then return nil end return pos + 1, table.concat(chars) end local function read_token_or_quoted_string(s, pos) pos = skip_space(s, pos) if string.sub(s, pos, pos) == "\"" then return read_quoted_string(s, pos) else return read_token(s, pos) end end --- Create a pattern to find a tag -- -- Case-insensitive search for tags -- @param tag The name of the tag to find -- @param endtag Boolean true if you are looking for an end tag, otherwise it will look for a start tag -- @return A pattern to find the tag function tag_pattern(tag, endtag) if endtag then return "].->" else return "<%s*" .. stringaux.ipattern(tag) .. "%f[%s/>].->" end end --- -- Finds forms in html code -- -- returns table of found forms, in plaintext. -- @param body A response.body
in which to search for forms -- @return A list of forms. function grab_forms(body) local forms = {} if not body then return forms end local form_start_expr = tag_pattern("form") local form_end_expr = tag_pattern("form", true) local form_opening = string.find(body, form_start_expr) while form_opening do local form_closing = string.find(body, form_end_expr, form_opening+1) if form_closing == nil then --html code contains errors break end forms[#forms+1] = string.sub(body, form_opening, form_closing-1) if form_closing+1 <= #body then form_opening = string.find(body, form_start_expr, form_closing+1) else break end end return forms end local function get_attr (html, name) local lhtml = html:lower() local lname = name:lower() -- try the attribute-value syntax first local _, pos = lhtml:find('%s' .. lname .. '%s*=%s*[^%s]') if not pos then -- try the empty attribute syntax and, if found, -- return zero-length string as its value; nil otherwise return lhtml:match('[^%s=]%s+' .. lname .. '[%s/>]') and "" or nil end local value _, value = html:match('^([\'"])(.-)%1', pos) if not value then value = html:match('^[^%s<>=\'"`]+', pos) end return slaxml.parser.unescape(value) end --- -- Parses a form, that is, finds its action and fields. -- @param form A plaintext representation of form -- @return A dictionary with keys: action
, -- method
if one is specified, fields
-- which is a list of fields found in the form each of which has a -- name
attribute and type
if specified. function parse_form(form) local parsed = {} local fields = {} local form_action = get_attr(form, "action") if form_action then parsed["action"] = form_action end -- determine if the form is using get or post local form_method = get_attr(form, "method") if form_method then parsed["method"] = string.lower(form_method) end -- get the id of the form local form_id = get_attr(form, "id") if form_id then parsed["id"] = string.lower(form_id) end -- now identify the fields local input_type local input_name local input_value -- first find regular inputs for f in string.gmatch(form, tag_pattern("input")) do input_type = get_attr(f, "type") input_name = get_attr(f, "name") input_value = get_attr(f, "value") local next_field_index = #fields+1 if input_name then fields[next_field_index] = {} fields[next_field_index]["name"] = input_name if input_type then fields[next_field_index]["type"] = string.lower(input_type) end if input_value then fields[next_field_index]["value"] = input_value end end end -- now search for textareas for f in string.gmatch(form, tag_pattern("textarea")) do input_name = get_attr(f, "name") local next_field_index = #fields+1 if input_name then fields[next_field_index] = {} fields[next_field_index]["name"] = input_name fields[next_field_index]["type"] = "textarea" end end parsed["fields"] = fields return parsed end local MONTH_MAP = { Jan = 1, Feb = 2, Mar = 3, Apr = 4, May = 5, Jun = 6, Jul = 7, Aug = 8, Sep = 9, Oct = 10, Nov = 11, Dec = 12 } --- Parses an HTTP date string -- -- Supports any of the following formats from section 3.3.1 of RFC 2616: -- * Sun, 06 Nov 1994 08:49:37 GMT (RFC 822, updated by RFC 1123) -- * Sunday, 06-Nov-94 08:49:37 GMT (RFC 850, obsoleted by RFC 1036) -- * Sun Nov 6 08:49:37 1994 (ANSI C's asctime()
format) -- @param s the date string. -- @return a table with keys year
, month
, -- day
, hour
, min
, sec
, and -- isdst
, relative to GMT, suitable for input to -- os.time
. function parse_date(s) local day, month, year, hour, min, sec, tz, month_name -- Handle RFC 1123 and 1036 at once. day, month_name, year, hour, min, sec, tz = s:match("^%w+, (%d+)[- ](%w+)[- ](%d+) (%d+):(%d+):(%d+) (%w+)$") if not day then month_name, day, hour, min, sec, year = s:match("%w+ (%w+) ?(%d+) (%d+):(%d+):(%d+) (%d+)") tz = "GMT" end if not day then stdnse.debug1("http.parse_date: can't parse date \"%s\": unknown format.", s) return nil end -- Look up the numeric code for month. month = MONTH_MAP[month_name] if not month then stdnse.debug1("http.parse_date: unknown month name \"%s\".", month_name) return nil end if tz ~= "GMT" then stdnse.debug1("http.parse_date: don't know time zone \"%s\", only \"GMT\".", tz) return nil end day = tonumber(day) year = tonumber(year) hour = tonumber(hour) min = tonumber(min) sec = tonumber(sec) if year < 100 then -- Two-digit year. Make a guess. if year < 70 then year = year + 2000 else year = year + 1900 end end return { year = year, month = month, day = day, hour = hour, min = min, sec = sec, isdst = false } end -- See RFC 2617, section 1.2. This function returns a table with keys "scheme" -- and "params". local function read_auth_challenge(s, pos) local scheme, params pos, scheme = read_token(s, pos) if not scheme then return nil end params = {} pos = skip_space(s, pos) while pos < #s do local name, val local tmp_pos -- We need to peek ahead at this point. It's possible that we've hit the -- end of one challenge and the beginning of another. Section 14.33 says -- that the header value can be 1#challenge, in other words several -- challenges separated by commas. Because the auth-params are also -- separated by commas, the only way we can tell is if we find a token not -- followed by an equals sign. tmp_pos = pos tmp_pos, name = read_token(s, tmp_pos) if not name then pos = skip_space(s, pos + 1) return pos, { scheme = scheme, params = nil } end tmp_pos = skip_space(s, tmp_pos) if string.sub(s, tmp_pos, tmp_pos) ~= "=" then -- No equals sign, must be the beginning of another challenge. break end tmp_pos = tmp_pos + 1 pos = tmp_pos pos, val = read_token_or_quoted_string(s, pos) if not val then return nil end if params[name] then return nil end params[name] = val pos = skip_space(s, pos) if string.sub(s, pos, pos) == "," then pos = skip_space(s, pos + 1) if pos > #s then return nil end end end return pos, { scheme = scheme, params = params } end ---Parses the WWW-Authenticate header as described in RFC 2616, section 14.47 -- and RFC 2617, section 1.2. -- -- The return value is an array of challenges. Each challenge is a table with -- the keys scheme
and params
. -- @param s The header value text. -- @return An array of challenges, or nil
on error. function parse_www_authenticate(s) local challenges = {} local pos pos = 1 while pos <= #s do local challenge pos, challenge = read_auth_challenge(s, pos) if not challenge then return nil end challenges[#challenges + 1] = challenge end return challenges end ---Take the data returned from a HTTP request and return the status string. -- Useful for stdnse.debug
messages and even advanced output. -- -- @param data The response table from any HTTP request -- @return The best status string we could find: either the actual status string, the status code, or ""
. function get_status_string(data) -- Make sure we have valid data if(data == nil) then return "" elseif(data['status-line'] == nil) then if(data['status'] ~= nil) then return data['status'] end return "" end -- We basically want everything after the space local space = string.find(data['status-line'], ' ') if(space == nil) then return data['status-line'] else return (string.sub(data['status-line'], space + 1)):gsub('\r?\n', '') end end ---Determine whether or not the server supports HEAD. -- -- Tests by requesting / and verifying that it returns 200, and doesn't return -- data. We implement the check like this because can't always rely on OPTIONS -- to tell the truth. -- -- Note: If identify_404
returns a 200 status, HEAD requests -- should be disabled. Sometimes, servers use a 200 status code with a message -- explaining that the page wasn't found. In this case, to actually identify -- a 404 page, we need the full body that a HEAD request doesn't supply. -- This is determined automatically if the result_404
field is -- set. -- -- @param host The host object. -- @param port The port to use. -- @param result_404 [optional] The result when an unknown page is requested. -- This is returned by identify_404
. If the 404 -- page returns a 200 code, then we disable HEAD requests. -- @param path The path to request; by default, / is used. -- @return A boolean value: true if HEAD is usable, false otherwise. -- @return If HEAD is usable, the result of the HEAD request is returned (so -- potentially, a script can avoid an extra call to HEAD) function can_use_head(host, port, result_404, path) -- If the 404 result is 200, don't use HEAD. if(result_404 == 200) then return false end -- Default path if(path == nil) then path = '/' end -- Perform a HEAD request and see what happens. local data = head( host, port, path ) if data then if data.status and data.status == 302 and data.header and data.header.location then stdnse.debug1("HTTP: Warning: Host returned 302 and not 200 when performing HEAD.") return false end if data.status and data.status == 200 and data.header then -- check that a body wasn't returned if #data.body > 0 then stdnse.debug1("HTTP: Warning: Host returned data when performing HEAD.") return false end stdnse.debug1("HTTP: Host supports HEAD.") return true, data end stdnse.debug1("HTTP: Didn't receive expected response to HEAD request (got %s).", get_status_string(data)) return false end stdnse.debug1("HTTP: HEAD request completely failed.") return false end --- Try to remove anything that might change within a 404. -- -- For example: -- * A file path (includes URI) -- * A time -- * A date -- * An execution time (numbers in general, really) -- -- The intention is that two 404 pages from different URIs and taken hours -- apart should, whenever possible, look the same. -- -- During this function, we're likely going to over-trim things. This is fine -- -- we want enough to match on that it'll a) be unique, and b) have the best -- chance of not changing. Even if we remove bits and pieces from the file, as -- long as it isn't a significant amount, it'll remain unique. -- -- One case this doesn't cover is if the server generates a random haiku for -- the user. -- -- @param body The body of the page. function clean_404(body) if ( not(body) ) then return end -- Remove anything that looks like time body = string.gsub(body, '%d?%d:%d%d:%d%d', "") body = string.gsub(body, '%d%d:%d%d', "") body = string.gsub(body, 'AM', "") body = string.gsub(body, 'am', "") body = string.gsub(body, 'PM', "") body = string.gsub(body, 'pm', "") -- Remove anything that looks like a date (this includes 6 and 8 digit numbers) -- (this is probably unnecessary, but it's getting pretty close to 11:59 right now, so you never know!) body = string.gsub(body, '%d%d%d%d%d%d%d%d', "") -- 4-digit year (has to go first, because it overlaps 2-digit year) body = string.gsub(body, '%d%d%d%d%-%d%d%-%d%d', "") body = string.gsub(body, '%d%d%d%d/%d%d/%d%d', "") body = string.gsub(body, '%d%d%-%d%d%-%d%d%d%d', "") body = string.gsub(body, '%d%d%/%d%d%/%d%d%d%d', "") body = string.gsub(body, '%d%d%d%d%d%d', "") -- 2-digit year body = string.gsub(body, '%d%d%-%d%d%-%d%d', "") body = string.gsub(body, '%d%d%/%d%d%/%d%d', "") -- Remove anything that looks like a path (note: this will get the URI too) (note2: this interferes with the date removal above, so it can't be moved up) body = string.gsub(body, "/[^ ]+", "") -- Unix - remove everything from a slash till the next space body = string.gsub(body, "[a-zA-Z]:\\[^ ]+", "") -- Windows - remove everything from a "x:\" pattern till the next space -- If we have SSL available, save us a lot of memory by hashing the page (if SSL isn't available, this will work fine, but -- take up more memory). If we're debugging, don't hash (it makes things far harder to debug). if(have_ssl and nmap.debugging() == 0) then return openssl.md5(body) end return body end local function cache_404_response(host, port, response) if type(host) == "table" and host.registry then host.registry.http_404 = host.registry.http_404 or {} local portnum = port if type(port) == "table" then portnum = port.number end host.registry.http_404[portnum] = response end return table.unpack(response) end local bad_responses = { 301, 302, 400, 401, 403, 499, 501, 503 } local identify_404_get_opts = {redirect_ok=false} local identify_404_cache_404 = {true, 404} local identify_404_cache_unknown = {false, "Two known 404 pages returned valid and different pages; unable to identify valid response." } local identify_404_cache_unknown_folder = {false, "Two known 404 pages returned valid and different pages; unable to identify valid response (happened when checking a folder)." } local identify_404_cache_200 = {true, 200} ---Try requesting a non-existent file to determine how the server responds to -- unknown pages ("404 pages") -- -- This tells us -- * what to expect when a non-existent page is requested, and -- * if the server will be impossible to scan. -- -- If the server responds with a 404 status code, as it is supposed to, then -- this function simply returns 404. If it contains one of a series of common -- status codes, including unauthorized, moved, and others, it is returned like -- a 404. -- -- I (Ron Bowes) have observed one host that responds differently for three -- scenarios: -- * A non-existent page, all lowercase (a login page) -- * A non-existent page, with uppercase (a weird error page that says, -- "Filesystem is corrupt.") -- * A page in a non-existent directory (a login page with different font -- colours) -- -- As a result, I've devised three different 404 tests, one to check each of -- these conditions. They all have to match, the tests can proceed; if any of -- them are different, we can't check 404s properly. -- -- @param host The host object. -- @param port The port to which we are establishing the connection. -- @return status Did we succeed? -- @return result If status is false, result is an error message. Otherwise, -- it's the code to expect (typically, but not necessarily, -- '404'). -- @return body Body is a hash of the cleaned-up body that can be used when -- detecting a 404 page that doesn't return a 404 error code. function identify_404(host, port) if type(host) == "table" and host.registry and host.registry.http_404 then local portnum = port if type(port) == "table" then portnum = port.number end local result = host.registry.http_404[portnum] if result then return table.unpack(result) end end local data -- The URLs used to check 404s local URL_404_1 = '/nmaplowercheck' .. os.time(os.date('*t')) local URL_404_2 = '/NmapUpperCheck' .. os.time(os.date('*t')) local URL_404_3 = '/Nmap/folder/check' .. os.time(os.date('*t')) data = get(host, port, URL_404_1, identify_404_get_opts) if(data == nil) then stdnse.debug1("HTTP: Failed while testing for 404 status code") -- do not cache; maybe it will work next time? return false, "Failed while testing for 404 error message" end if(data.status and data.status == 404) then stdnse.debug1("HTTP: Host returns proper 404 result.") return cache_404_response(host, port, identify_404_cache_404) end if(data.status and data.status == 200) then stdnse.debug1("HTTP: Host returns 200 instead of 404.") -- Clean up the body (for example, remove the URI). This makes it easier to validate later if(data.body) then -- Obtain a couple more 404 pages to test different conditions local data2 = get(host, port, URL_404_2) local data3 = get(host, port, URL_404_3) if(data2 == nil or data3 == nil) then stdnse.debug1("HTTP: Failed while testing for extra 404 error messages") -- do not cache; maybe it will work next time? return false, "Failed while testing for extra 404 error messages" end -- Check if the return code became something other than 200. -- Status code: -1 represents unknown. -- If the status is nil or the string "unknown" we switch to -1. if(data2.status ~= 200) then if(type(data2.status) ~= "number") then data2.status = -1 end stdnse.debug1("HTTP: HTTP 404 status changed for second request (became %d).", data2.status) return cache_404_response(host, port, {false, string.format("HTTP 404 status changed for second request (became %d).", data2.status) }) end -- Check if the return code became something other than 200 if(data3.status ~= 200) then if(type(data3.status) ~= "number") then data3.status = -1 end stdnse.debug1("HTTP: HTTP 404 status changed for third request (became %d).", data3.status) return cache_404_response(host, port, {false, string.format("HTTP 404 status changed for third request (became %d).", data3.status) }) end -- Check if the returned bodies (once cleaned up) matches the first returned body local clean_body = clean_404(data.body) local clean_body2 = clean_404(data2.body) local clean_body3 = clean_404(data3.body) if(clean_body ~= clean_body2) then stdnse.debug1("HTTP: Two known 404 pages returned valid and different pages; unable to identify valid response.") stdnse.debug1("HTTP: If you investigate the server and it's possible to clean up the pages, please post to nmap-dev mailing list.") return cache_404_response(host, port, identify_404_cache_unknown) end if(clean_body ~= clean_body3) then stdnse.debug1("HTTP: Two known 404 pages returned valid and different pages; unable to identify valid response (happened when checking a folder).") stdnse.debug1("HTTP: If you investigate the server and it's possible to clean up the pages, please post to nmap-dev mailing list.") return cache_404_response(host, port, identify_404_cache_unknown_folder) end cache_404_response(host, port, {true, 200, clean_body}) return true, 200, clean_body end stdnse.debug1("HTTP: The 200 response didn't contain a body.") return cache_404_response(host, port, identify_404_cache_200) end -- Loop through any expected error codes for _,code in pairs(bad_responses) do if(data.status and data.status == code) then stdnse.debug1("HTTP: Host returns %s instead of 404 File Not Found.", get_status_string(data)) return cache_404_response(host, port, {true, code}) end end stdnse.debug1("Unexpected response returned for 404 check: %s", get_status_string(data)) return cache_404_response(host, port, {true, data.status}) end --- Determine whether or not the page that was returned is a 404 page. -- -- This is actually a pretty simple function, but it's best to keep this logic -- close to identify_404
, since they will generally be used -- together. -- -- @param data The data returned by the HTTP request -- @param result_404 The status code to expect for non-existent pages. This is -- returned by identify_404
. -- @param known_404 The 404 page itself, if result_404
is 200. If -- result_404
is something else, this parameter -- is ignored and can be set to nil
. This is -- returned by identify_404
. -- @param page The page being requested (used in error messages). -- @param displayall [optional] If set to true, don't exclude non-404 errors -- (such as 500). -- @return A boolean value: true if the page appears to exist, and false if it -- does not. function page_exists(data, result_404, known_404, page, displayall) if(data and data.status) then -- Handle the most complicated case first: the "200 Ok" response if(data.status == 200) then if(result_404 == 200) then -- If the 404 response is also "200", deal with it (check if the body matches) if(#data.body == 0) then -- I observed one server that returned a blank string instead of an error, on some occasions stdnse.debug1("HTTP: Page returned a totally empty body; page likely doesn't exist") return false elseif(clean_404(data.body) ~= known_404) then stdnse.debug1("HTTP: Page returned a body that doesn't match known 404 body, therefore it exists (%s)", page) return true else return false end else -- If 404s return something other than 200, and we got a 200, we're good to go stdnse.debug1("HTTP: Page was '%s', it exists! (%s)", get_status_string(data), page) return true end else -- If the result isn't a 200, check if it's a 404 or returns the same code as a 404 returned if(data.status ~= 404 and data.status ~= result_404) then -- If this check succeeded, then the page isn't a standard 404 -- it could be a redirect, authentication request, etc. Unless the user -- asks for everything (with a script argument), only display 401 Authentication Required here. stdnse.debug1("HTTP: Page didn't match the 404 response (%s) (%s)", get_status_string(data), page) if(data.status == 401) then -- "Authentication Required" return true elseif(displayall) then return true end return false else -- Page was a 404, or looked like a 404 return false end end else stdnse.debug1("HTTP: HTTP request failed (is the host still up?)") return false end end local lowercase = function (p) return (p or ''):lower() end local safe_string = function (p) return p or '' end ---Check if the response variable contains the given text. -- -- Response variable could be a return from a http.get, http.post, -- http.pipeline_go, etc. The text can be: -- * Part of a header ('content-type', 'text/html', '200 OK', etc) -- * An entire header ('Content-type: text/html', 'Content-length: 123', etc) -- * Part of the body -- -- The search text is treated as a Lua pattern. -- --@param response The full response table from a HTTP request. --@param pattern The pattern we're searching for. Don't forget to escape '-', -- for example, 'Content%-type'. The pattern can also contain -- captures, like 'abc(.*)def', which will be returned if -- successful. --@param case_sensitive [optional] Set to true
for case-sensitive -- searches. Default: not case sensitive. --@return result True if the string matched, false otherwise --@return matches An array of captures from the match, if any function response_contains(response, pattern, case_sensitive) local m -- If they're searching for the empty string or nil, it's true if(pattern == '' or pattern == nil) then return true end -- Create a function that either lowercases everything or doesn't, depending on case sensitivity local case = case_sensitive and safe_string or lowercase -- Set the case of the pattern pattern = case(pattern) -- Check the status line (eg, 'HTTP/1.1 200 OK') m = {string.match(case(response['status-line']), pattern)}; if(m and #m > 0) then return true, m end -- Check the headers for _, header in pairs(response['rawheader']) do m = {string.match(case(header), pattern)} if(m and #m > 0) then return true, m end end -- Check the body m = {string.match(case(response['body']), pattern)} if(m and #m > 0) then return true, m end return false end ---This function should be called whenever a valid path (a path that doesn't -- contain a known 404 page) is discovered. -- -- It will add the path to the registry in several ways, allowing other scripts -- to take advantage of it in interesting ways. -- --@param host The host the path was discovered on (not necessarily the host -- being scanned). --@param port The port the path was discovered on (not necessarily the port -- being scanned). --@param path The path discovered. Calling this more than once with the same -- path is okay; it'll update the data as much as possible instead -- of adding a duplicate entry --@param status [optional] The status code (200, 404, 500, etc). This can be -- left off if it isn't known. --@param links_to [optional] A table of paths that this page links to. --@param linked_from [optional] A table of paths that link to this page. --@param contenttype [optional] The content-type value for the path, if it's known. function save_path(host, port, path, status, links_to, linked_from, contenttype) -- Make sure we have a proper hostname and port host = ascii_hostname(host) if(type(port) == 'table') then port = port['number'] end -- Parse the path local parsed = url.parse(path) -- contains both query and fragment parsed['raw_querystring'] = parsed['query'] if parsed['fragment'] then parsed['raw_querystring'] = ( parsed['raw_querystring'] or "" ) .. '#' .. parsed['fragment'] end if parsed['raw_querystring'] then parsed['path_query'] = parsed['path'] .. '?' .. parsed['raw_querystring'] else parsed['path_query'] = parsed['path'] end -- Split up the query, if necessary if(parsed['raw_querystring']) then parsed['querystring'] = {} local values = stringaux.strsplit('&', parsed['raw_querystring']) for i, v in ipairs(values) do local name, value = table.unpack(stringaux.strsplit('=', v)) parsed['querystring'][name] = value end end if parsed.host then host = parsed.ascii_host or parsed.host end if parsed.port then port = parsed.port end -- Add to the 'all_pages' key stdnse.registry_add_array({host, 'www', port, 'all_pages'}, parsed['path']) -- Add the URL with querystring to all_pages_full_query stdnse.registry_add_array({host, 'www', port, 'all_pages_full_query'}, parsed['path_query']) -- Add the URL to a key matching the response code if(status) then stdnse.registry_add_array({host, 'www', port, 'status_codes', status}, parsed['path']) end -- If it's a directory, add it to the directories list; otherwise, add it to the files list if(parsed['is_folder']) then stdnse.registry_add_array({host, 'www', port, 'directories'}, parsed['path']) else stdnse.registry_add_array({host, 'www', port, 'files'}, parsed['path']) end -- If we have an extension, add it to the extensions key if(parsed['extension']) then stdnse.registry_add_array({host, 'www', port, 'extensions', parsed['extension']}, parsed['path']) end -- Add an entry for the page and its arguments if(parsed['querystring']) then -- Add all scripts with a querystring to the 'cgi' and 'cgi_full_query' keys stdnse.registry_add_array({host, 'www', port, 'cgi'}, parsed['path']) stdnse.registry_add_array({host, 'www', port, 'cgi_full_query'}, parsed['path_query']) -- Add the query string alone to the registry (probably not necessary) stdnse.registry_add_array({host, 'www', port, 'cgi_querystring', parsed['path'] }, parsed['raw_querystring']) -- Add the individual arguments for the page, along with their values for key, value in pairs(parsed['querystring']) do stdnse.registry_add_array({host, 'www', port, 'cgi_args', parsed['path']}, parsed['querystring']) end end -- Save the pages it links to if(links_to) then if(type(links_to) == 'string') then links_to = {links_to} end for _, v in ipairs(links_to) do stdnse.registry_add_array({host, 'www', port, 'links_to', parsed['path_query']}, v) end end -- Save the pages it's linked from (we save these in the 'links_to' key, reversed) if(linked_from) then if(type(linked_from) == 'string') then linked_from = {linked_from} end for _, v in ipairs(linked_from) do stdnse.registry_add_array({host, 'www', port, 'links_to', v}, parsed['path_query']) end end -- Save it as a content-type, if we have one if(contenttype) then stdnse.registry_add_array({host, 'www', port, 'content-type', contenttype}, parsed['path_query']) end end local unittest = require "unittest" if not unittest.testing() then return _ENV end test_suite = unittest.TestSuite:new() do local cookie_tests = { { name = "#1198: conflicting attribute name", cookie = "JSESSIONID=aaa; name=bbb; value=ccc; attr=ddd", parsed = { name = "JSESSIONID", value = "aaa", attr = "ddd", } }, { name = "#1171: empty attribute value", cookie = "JSESSIONID=aaa; attr1; attr2=; attr3=", parsed = { name = "JSESSIONID", value = "aaa", attr1 = "", attr2 = "", attr3 = "", } }, { name = "#1170: quotes present", cookie = "aaa=\"b\\\"bb\"; pATH = \"ddd eee\" fff", parsed = { name = "aaa", value = "\"b\\\"bb\"", path = "\"ddd eee\" fff" } }, { name = "#1169: empty attributes", cookie = "JSESSIONID=aaa; ; Path=/;;Secure;", parsed = { name = "JSESSIONID", value = "aaa", path = "/", secure = "" } }, { name = "#844: space in a cookie value", cookie = " SESSIONID = IgAAABjN8b3xxxNsLRIiSpHLPn1lE=&IgAAAxxxMT6Bw==&Huawei USG6320&langfrombrows=en-US©right=2014 ;secure", parsed = { name = "SESSIONID", value = "IgAAABjN8b3xxxNsLRIiSpHLPn1lE=&IgAAAxxxMT6Bw==&Huawei USG6320&langfrombrows=en-US©right=2014", secure = "" } }, { name = "#866: unexpected attribute", cookie = " SID=c98fefa3ad659caa20b89582419bb14f; Max-Age=1200; Version=1", parsed = { name = "SID", value = "c98fefa3ad659caa20b89582419bb14f", ["max-age"] = "1200", version = "1" } }, { name = "#731: trailing semicolon", cookie = "session_id=76ca8bc8c19;", parsed = { name = "session_id", value = "76ca8bc8c19" } }, { name = "#229: comma is not a delimiter", cookie = "c1=aaa; path=/bbb/ccc,ddd/eee", parsed = { name = "c1", value = "aaa", path = "/bbb/ccc,ddd/eee" } }, } for _, test in ipairs(cookie_tests) do local parsed = parse_set_cookie(test.cookie) test_suite:add_test(unittest.not_nil(parsed), test.name) if parsed then test_suite:add_test(unittest.keys_equal(parsed, test.parsed), test.name) end end local status_line_tests = { { name = "valid status line", line = "HTTP/1.0 200 OK\r\n", result = true, parsed = { version = "1.0", status = 200, } }, { name = "malformed version in status line", line = "HTTP/1. 200 OK\r\n", result = false, parsed = { version = nil, status = nil, } }, { name = "non-integer status code in status line", line = "HTTP/1.0 20A OK\r\n", result = false, parsed = { version = "1.0", status = nil, } }, { name = "missing reason phrase in status line", line = "HTTP/1.0 200\r\n", result = true, parsed = { version = "1.0", status = 200, } }, } for _, test in ipairs(status_line_tests) do local response = {} local result, error = parse_status_line(test.line, response) if test.result then test_suite:add_test(unittest.not_nil(result), test.name) else test_suite:add_test(unittest.is_nil(result), test.name) test_suite:add_test(unittest.not_nil(error), test.name) end test_suite:add_test(unittest.equal(response["status-line"], test.line), test.name) if result then test_suite:add_test(unittest.equal(response.status, test.parsed.status), test.name) test_suite:add_test(unittest.equal(response.version, test.parsed.version), test.name) end end local content_encoding_tests = {} table.insert(content_encoding_tests, { name = "nil encoding list", encoding = nil, source = "SomePlaintextBody", target = "SomePlaintextBody", decoded = nil, undecoded = nil }) table.insert(content_encoding_tests, { name = "no encoding", encoding = {}, source = "SomePlaintextBody", target = "SomePlaintextBody", decoded = {}, undecoded = {} }) table.insert(content_encoding_tests, { name = "identity encoding", encoding = "identity", source = "SomePlaintextBody", target = "SomePlaintextBody", decoded = {"identity"}, undecoded = {} }) if have_zlib then table.insert(content_encoding_tests, { name = "gzip encoding", encoding = "gzip", source = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = "Hello, World!", decoded = {"gzip"}, undecoded = {} }) table.insert(content_encoding_tests, { name = "corrupted gzip encoding", encoding = "gzip", source = stdnse.fromhex("2f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = nil, decoded = {}, undecoded = {"gzip"}, err = "Corrupted Content-Encoding: gzip", fragment = stdnse.fromhex("2f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000") }) table.insert(content_encoding_tests, { name = "gzip encoding with maxlen", encoding = "gzip", source = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = "Hello, World!", decoded = {"gzip"}, undecoded = {}, maxlen = 999 }) table.insert(content_encoding_tests, { name = "gzip encoding with small maxlen", encoding = "gzip", source = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = nil, decoded = {"gzip"}, undecoded = {}, maxlen = 4, err = ERR_OVERSIZED_BODY, fragment = "Hell" }) table.insert(content_encoding_tests, { name = "gzip encoding with exact maxlen", encoding = "gzip", source = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = "Hello, World!", decoded = {"gzip"}, undecoded = {}, maxlen = 13 }) table.insert(content_encoding_tests, { name = "gzip-encoded empty body", encoding = "gzip", source = "", target = "", decoded = {"gzip"}, undecoded = {}, maxlen = 999 }) end table.insert(content_encoding_tests, { name = "unknown encoding", encoding = "identity, mystery, miracle", source = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), decoded = {"identity"}, undecoded = {"mystery", "miracle"} }) if have_zlib then table.insert(content_encoding_tests, { name = "stacked encoding", encoding = "identity, gzip, identity", source = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = "Hello, World!", decoded = {"identity", "gzip", "identity"}, undecoded = {} }) else table.insert(content_encoding_tests, { name = "stacked encoding", encoding = "identity, gzip, identity", source = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), target = stdnse.fromhex("1f8b0800000000000000f348cdc9c9d75108cf2fca49510400d0c34aec0d000000"), decoded = {"identity"}, undecoded = {"gzip", "identity"} }) end for _, test in ipairs(content_encoding_tests) do local body, dcd, undcd, err, fragment = decode_body(test.source, test.encoding, test.maxlen) test_suite:add_test(unittest.equal(body, test.target), test.name .. " (body)") test_suite:add_test(unittest.identical(dcd, test.decoded), test.name .. " (decoded)") test_suite:add_test(unittest.identical(undcd, test.undecoded), test.name .. " (undecoded)") test_suite:add_test(unittest.equal(err, test.err), test.name .. " (err)") test_suite:add_test(unittest.equal(fragment, test.fragment), test.name .. " (fragment)") end local parse_redirect_tests = {} table.insert(parse_redirect_tests, { name = "redirect plain URL", host = "example.com", port = 80, path = "initial_path", redirect_scheme = "http", redirect_host = "example.com", redirect_port = 80, redirect_path = "/redirect_path", response = { status = 301, header = { location = "http://example.com:80/redirect\_path" } } } ) table.insert(parse_redirect_tests, { name = "redirect URL with query", host = "example.com", port = 80, path = "initial_path", redirect_scheme = "http", redirect_host = "example.com", redirect_port = 80, redirect_path = "/redirect_path?query=1234", response = { status = 301, header = { location = "http://example.com:80/redirect\_path?query=1234" } } } ) table.insert(parse_redirect_tests, { name = "redirect URL with semicolon params", host = "example.com", port = 80, path = "initial_path", redirect_scheme = "http", redirect_host = "example.com", redirect_port = 80, redirect_path = "/redirect_path;param=1234", response = { status = 301, header = { location = "http://example.com:80/redirect\_path;param=1234" } } } ) table.insert(parse_redirect_tests, { name = "redirect URL with multiple semicolon params", host = "example.com", port = 80, path = "initial_path", redirect_scheme = "http", redirect_host = "example.com", redirect_port = 80, redirect_path = "/redirect_path;param1=1234;param2", response = { status = 301, header = { location = "http://example.com:80/redirect\_path;param1=1234;param2" } } } ) table.insert(parse_redirect_tests, { name = "redirect URL with semicolon params and query", host = "example.com", port = 80, path = "initial_path", redirect_scheme = "http", redirect_host = "example.com", redirect_port = 80, redirect_path = "/redirect_path;param1=1234;param2&query=abc", response = { status = 301, header = { location = "http://example.com:80/redirect\_path;param1=1234;param2&query=abc" } } } ) for _, test in ipairs(parse_redirect_tests) do local redirect_url = parse_redirect(test.host, test.port, test.path, test.response) test_suite:add_test(unittest.equal(redirect_url.scheme, test.redirect_scheme)) test_suite:add_test(unittest.equal(redirect_url.host, test.redirect_host)) test_suite:add_test(unittest.equal(redirect_url.port, test.redirect_port)) test_suite:add_test(unittest.equal(redirect_url.path, test.redirect_path)) end end return _ENV;</i4",></mutex,>