cpython: 293180d199f2 Lib/http/server.py (original) (raw)

Fix refleak: PyObject_GetItem returns a new reference, not a borrowed one like PyDict_GetItem.

line wrap: on

line source

"""HTTP server classes. Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, and CGIHTTPRequestHandler for CGI scripts. It does, however, optionally implement HTTP/1.1 persistent connections, as of version 0.3. Notes on CGIHTTPRequestHandler ------------------------------ This class implements GET and POST requests to cgi-bin scripts. If the os.fork() function is not present (e.g. on Windows), subprocess.Popen() is used as a fallback, with slightly altered semantics. In all cases, the implementation is intentionally naive -- all requests are executed synchronously. SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL -- it may execute arbitrary Python code or external programs. Note that status code 200 is sent prior to execution of a CGI script, so scripts cannot send other status codes such as 302 (redirect). XXX To do:

See also:

#

HTTP Working Group T. Berners-Lee

INTERNET-DRAFT R. T. Fielding

<draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen

Expires September 8, 1995 March 8, 1995

#

URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt[](#l42)

#

and

#

Network Working Group R. Fielding

Request for Comments: 2616 et al

Obsoletes: 2068 June 1999

Category: Standards Track

#

URL: http://www.faqs.org/rfcs/rfc2616.html[](#l51)

Log files

---------

#

Here's a quote from the NCSA httpd docs about log file format.

#

| The logfile format is as follows. Each line consists of:

|

| host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb

|

| host: Either the DNS name or the IP number of the remote client

| rfc931: Any information returned by identd for this person,

| - otherwise.

| authuser: If user sent a userid for authentication, the user name,

| - otherwise.

| DD: Day

| Mon: Month (calendar name)

| YYYY: Year

| hh: hour (24-hour format, the machine's timezone)

| mm: minutes

| ss: seconds

| request: The first line of the HTTP request as sent by the client.

| ddd: the status code returned by the server, - if not available.

| bbbb: the total number of bytes sent,

| not including the HTTP/1.0 header, - if not available

|

| You can determine the name of the file accessed through request.

#

(Actually, the latter is only true if you know the server configuration

at the time the request was made!)

version = "0.6" all = ["HTTPServer", "BaseHTTPRequestHandler"] import html import email.message import email.parser import http.client import io import mimetypes import os import posixpath import select import shutil import socket # For gethostbyaddr() import socketserver import sys import time import urllib.parse import copy

Default error message template

DEFAULT_ERROR_MESSAGE = """[](#l105)

[](#l108) [](#l109) [](#l110) Error response[](#l111) [](#l112) [](#l113)

Error response

[](#l114)

Error code: %(code)d

[](#l115)

Message: %(message)s.

[](#l116)

Error code explanation: %(code)s - %(explain)s.

[](#l117) [](#l118) [](#l119) """[](#l120) [](#l121) DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"[](#l122) [](#l123) def _quote_html(html):[](#l124) return html.replace("&", "&").replace("<", "<").replace(">", ">")[](#l125) [](#l126) class HTTPServer(socketserver.TCPServer):[](#l127) [](#l128) allow_reuse_address = 1 # Seems to make sense in testing environment[](#l129) [](#l130) def server_bind(self):[](#l131) """Override server_bind to store the server name."""[](#l132) socketserver.TCPServer.server_bind(self)[](#l133) host, port = self.socket.getsockname()[:2][](#l134) self.server_name = socket.getfqdn(host)[](#l135) self.server_port = port[](#l136) [](#l137) [](#l138) class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):[](#l139) [](#l140) """HTTP request handler base class.[](#l141) [](#l142) The following explanation of HTTP serves to guide you through the[](#l143) code as well as to expose any misunderstandings I may have about[](#l144) HTTP (so you don't need to read the code to figure out I'm wrong[](#l145) :-).[](#l146) [](#l147) HTTP (HyperText Transfer Protocol) is an extensible protocol on[](#l148) top of a reliable stream transport (e.g. TCP/IP). The protocol[](#l149) recognizes three parts to a request:[](#l150) [](#l151) 1. One line identifying the request type and path[](#l152) 2. An optional set of RFC-822-style headers[](#l153) 3. An optional data part[](#l154) [](#l155) The headers and data are separated by a blank line.[](#l156) [](#l157) The first line of the request has the form[](#l158) [](#l159) [](#l160) [](#l161) where is a (case-sensitive) keyword such as GET or POST,[](#l162) is a string containing path information for the request,[](#l163) and should be the string "HTTP/1.0" or "HTTP/1.1".[](#l164) is encoded using the URL encoding scheme (using %xx to signify[](#l165) the ASCII character with hex code xx).[](#l166) [](#l167) The specification specifies that lines are separated by CRLF but[](#l168) for compatibility with the widest range of clients recommends[](#l169) servers also handle LF. Similarly, whitespace in the request line[](#l170) is treated sensibly (allowing multiple spaces between components[](#l171) and allowing trailing whitespace).[](#l172) [](#l173) Similarly, for output, lines ought to be separated by CRLF pairs[](#l174) but most clients grok LF characters just fine.[](#l175) [](#l176) If the first line of the request has the form[](#l177) [](#l178) [](#l179) [](#l180) (i.e. is left out) then this is assumed to be an HTTP[](#l181) 0.9 request; this form has no optional headers and data part and[](#l182) the reply consists of just the data.[](#l183) [](#l184) The reply form of the HTTP 1.x protocol again has three parts:[](#l185) [](#l186) 1. One line giving the response code[](#l187) 2. An optional set of RFC-822-style headers[](#l188) 3. The data[](#l189) [](#l190) Again, the headers and data are separated by a blank line.[](#l191) [](#l192) The response code line has the form[](#l193) [](#l194) [](#l195) [](#l196) where is the protocol version ("HTTP/1.0" or "HTTP/1.1"),[](#l197) is a 3-digit response code indicating success or[](#l198) failure of the request, and is an optional[](#l199) human-readable string explaining what the response code means.[](#l200) [](#l201) This server parses the request and the headers, and then calls a[](#l202) function specific to the request type (). Specifically,[](#l203) a request SPAM will be handled by a method do_SPAM(). If no[](#l204) such method exists the server sends an error response to the[](#l205) client. If it exists, it is called with no arguments:[](#l206) [](#l207) do_SPAM()[](#l208) [](#l209) Note that the request name is case sensitive (i.e. SPAM and spam[](#l210) are different requests).[](#l211) [](#l212) The various request details are stored in instance variables:[](#l213) [](#l214) - client_address is the client IP address in the form (host,[](#l215) port);[](#l216) [](#l217) - command, path and version are the broken-down request line;[](#l218) [](#l219) - headers is an instance of email.message.Message (or a derived[](#l220) class) containing the header information;[](#l221) [](#l222) - rfile is a file object open for reading positioned at the[](#l223) start of the optional input data part;[](#l224) [](#l225) - wfile is a file object open for writing.[](#l226) [](#l227) IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING![](#l228) [](#l229) The first thing to be written must be the response line. Then[](#l230) follow 0 or more header lines, then a blank line, and then the[](#l231) actual data (if any). The meaning of the header lines depends on[](#l232) the command executed by the server; in most cases, when data is[](#l233) returned, there should be at least one header line of the form[](#l234) [](#l235) Content-type: /[](#l236) [](#l237) where and should be registered MIME types,[](#l238) e.g. "text/html" or "text/plain".[](#l239) [](#l240) """[](#l241) [](#l242) # The Python system version, truncated to its first component.[](#l243) sys_version = "Python/" + sys.version.split()[0][](#l244) [](#l245) # The server software version. You may want to override this.[](#l246) # The format is multiple whitespace-separated strings,[](#l247) # where each string is of the form name[/version].[](#l248) server_version = "BaseHTTP/" + __version__[](#l249) [](#l250) error_message_format = DEFAULT_ERROR_MESSAGE[](#l251) error_content_type = DEFAULT_ERROR_CONTENT_TYPE[](#l252) [](#l253) # The default request version. This only affects responses up until[](#l254) # the point where the request line is parsed, so it mainly decides what[](#l255) # the client gets back when sending a malformed request line.[](#l256) # Most web servers default to HTTP 0.9, i.e. don't send a status line.[](#l257) default_request_version = "HTTP/0.9"[](#l258) [](#l259) def parse_request(self):[](#l260) """Parse a request (internal).[](#l261) [](#l262) The request should be stored in self.raw_requestline; the results[](#l263) are in self.command, self.path, self.request_version and[](#l264) self.headers.[](#l265) [](#l266) Return True for success, False for failure; on failure, an[](#l267) error is sent back.[](#l268) [](#l269) """[](#l270) self.command = None # set in case of error on the first line[](#l271) self.request_version = version = self.default_request_version[](#l272) self.close_connection = 1[](#l273) requestline = str(self.raw_requestline, 'iso-8859-1')[](#l274) requestline = requestline.rstrip('\r\n')[](#l275) self.requestline = requestline[](#l276) words = requestline.split()[](#l277) if len(words) == 3:[](#l278) command, path, version = words[](#l279) if version[:5] != 'HTTP/':[](#l280) self.send_error(400, "Bad request version (%r)" % version)[](#l281) return False[](#l282) try:[](#l283) base_version_number = version.split('/', 1)[1][](#l284) version_number = base_version_number.split(".")[](#l285) # RFC 2145 section 3.1 says there can be only one "." and[](#l286) # - major and minor numbers MUST be treated as[](#l287) # separate integers;[](#l288) # - HTTP/2.4 is a lower version than HTTP/2.13, which in[](#l289) # turn is lower than HTTP/12.3;[](#l290) # - Leading zeros MUST be ignored by recipients.[](#l291) if len(version_number) != 2:[](#l292) raise ValueError[](#l293) version_number = int(version_number[0]), int(version_number[1])[](#l294) except (ValueError, IndexError):[](#l295) self.send_error(400, "Bad request version (%r)" % version)[](#l296) return False[](#l297) if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":[](#l298) self.close_connection = 0[](#l299) if version_number >= (2, 0):[](#l300) self.send_error(505,[](#l301) "Invalid HTTP Version (%s)" % base_version_number)[](#l302) return False[](#l303) elif len(words) == 2:[](#l304) command, path = words[](#l305) self.close_connection = 1[](#l306) if command != 'GET':[](#l307) self.send_error(400,[](#l308) "Bad HTTP/0.9 request type (%r)" % command)[](#l309) return False[](#l310) elif not words:[](#l311) return False[](#l312) else:[](#l313) self.send_error(400, "Bad request syntax (%r)" % requestline)[](#l314) return False[](#l315) self.command, self.path, self.request_version = command, path, version[](#l316) [](#l317) # Examine the headers and look for a Connection directive.[](#l318) try:[](#l319) self.headers = http.client.parse_headers(self.rfile,[](#l320) _class=self.MessageClass)[](#l321) except http.client.LineTooLong:[](#l322) self.send_error(400, "Line too long")[](#l323) return False[](#l324) [](#l325) conntype = self.headers.get('Connection', "")[](#l326) if conntype.lower() == 'close':[](#l327) self.close_connection = 1[](#l328) elif (conntype.lower() == 'keep-alive' and[](#l329) self.protocol_version >= "HTTP/1.1"):[](#l330) self.close_connection = 0[](#l331) # Examine the headers and look for an Expect directive[](#l332) expect = self.headers.get('Expect', "")[](#l333) if (expect.lower() == "100-continue" and[](#l334) self.protocol_version >= "HTTP/1.1" and[](#l335) self.request_version >= "HTTP/1.1"):[](#l336) if not self.handle_expect_100():[](#l337) return False[](#l338) return True[](#l339) [](#l340) def handle_expect_100(self):[](#l341) """Decide what to do with an "Expect: 100-continue" header.[](#l342) [](#l343) If the client is expecting a 100 Continue response, we must[](#l344) respond with either a 100 Continue or a final response before[](#l345) waiting for the request body. The default is to always respond[](#l346) with a 100 Continue. You can behave differently (for example,[](#l347) reject unauthorized requests) by overriding this method.[](#l348) [](#l349) This method should either return True (possibly after sending[](#l350) a 100 Continue response) or send an error response and return[](#l351) False.[](#l352) [](#l353) """[](#l354) self.send_response_only(100)[](#l355) self.flush_headers()[](#l356) return True[](#l357) [](#l358) def handle_one_request(self):[](#l359) """Handle a single HTTP request.[](#l360) [](#l361) You normally don't need to override this method; see the class[](#l362) __doc__ string for information on how to handle specific HTTP[](#l363) commands such as GET and POST.[](#l364) [](#l365) """[](#l366) try:[](#l367) self.raw_requestline = self.rfile.readline(65537)[](#l368) if len(self.raw_requestline) > 65536:[](#l369) self.requestline = ''[](#l370) self.request_version = ''[](#l371) self.command = ''[](#l372) self.send_error(414)[](#l373) return[](#l374) if not self.raw_requestline:[](#l375) self.close_connection = 1[](#l376) return[](#l377) if not self.parse_request():[](#l378) # An error code has been sent, just exit[](#l379) return[](#l380) mname = 'do_' + self.command[](#l381) if not hasattr(self, mname):[](#l382) self.send_error(501, "Unsupported method (%r)" % self.command)[](#l383) return[](#l384) method = getattr(self, mname)[](#l385) method()[](#l386) self.wfile.flush() #actually send the response if not already done.[](#l387) except socket.timeout as e:[](#l388) #a read or a write timed out. Discard this connection[](#l389) self.log_error("Request timed out: %r", e)[](#l390) self.close_connection = 1[](#l391) return[](#l392) [](#l393) def handle(self):[](#l394) """Handle multiple requests if necessary."""[](#l395) self.close_connection = 1[](#l396) [](#l397) self.handle_one_request()[](#l398) while not self.close_connection:[](#l399) self.handle_one_request()[](#l400) [](#l401) def send_error(self, code, message=None):[](#l402) """Send and log an error reply.[](#l403) [](#l404) Arguments are the error code, and a detailed message.[](#l405) The detailed message defaults to the short entry matching the[](#l406) response code.[](#l407) [](#l408) This sends an error response (so it must be called before any[](#l409) output has been generated), logs the error, and finally sends[](#l410) a piece of HTML explaining the error to the user.[](#l411) [](#l412) """[](#l413) [](#l414) try:[](#l415) shortmsg, longmsg = self.responses[code][](#l416) except KeyError:[](#l417) shortmsg, longmsg = '???', '???'[](#l418) if message is None:[](#l419) message = shortmsg[](#l420) explain = longmsg[](#l421) self.log_error("code %d, message %s", code, message)[](#l422) # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)[](#l423) content = (self.error_message_format %[](#l424) {'code': code, 'message': _quote_html(message), 'explain': explain})[](#l425) self.send_response(code, message)[](#l426) self.send_header("Content-Type", self.error_content_type)[](#l427) self.send_header('Connection', 'close')[](#l428) self.end_headers()[](#l429) if self.command != 'HEAD' and code >= 200 and code not in (204, 304):[](#l430) self.wfile.write(content.encode('UTF-8', 'replace'))[](#l431) [](#l432) def send_response(self, code, message=None):[](#l433) """Add the response header to the headers buffer and log the[](#l434) response code.[](#l435) [](#l436) Also send two standard headers with the server software[](#l437) version and the current date.[](#l438) [](#l439) """[](#l440) self.log_request(code)[](#l441) self.send_response_only(code, message)[](#l442) self.send_header('Server', self.version_string())[](#l443) self.send_header('Date', self.date_time_string())[](#l444) [](#l445) def send_response_only(self, code, message=None):[](#l446) """Send the response header only."""[](#l447) if message is None:[](#l448) if code in self.responses:[](#l449) message = self.responses[code][0][](#l450) else:[](#l451) message = ''[](#l452) if self.request_version != 'HTTP/0.9':[](#l453) if not hasattr(self, '_headers_buffer'):[](#l454) self._headers_buffer = [][](#l455) self._headers_buffer.append(("%s %d %s\r\n" %[](#l456) (self.protocol_version, code, message)).encode([](#l457) 'latin-1', 'strict'))[](#l458) [](#l459) def send_header(self, keyword, value):[](#l460) """Send a MIME header to the headers buffer."""[](#l461) if self.request_version != 'HTTP/0.9':[](#l462) if not hasattr(self, '_headers_buffer'):[](#l463) self._headers_buffer = [][](#l464) self._headers_buffer.append([](#l465) ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))[](#l466) [](#l467) if keyword.lower() == 'connection':[](#l468) if value.lower() == 'close':[](#l469) self.close_connection = 1[](#l470) elif value.lower() == 'keep-alive':[](#l471) self.close_connection = 0[](#l472) [](#l473) def end_headers(self):[](#l474) """Send the blank line ending the MIME headers."""[](#l475) if self.request_version != 'HTTP/0.9':[](#l476) self._headers_buffer.append(b"\r\n")[](#l477) self.flush_headers()[](#l478) [](#l479) def flush_headers(self):[](#l480) if hasattr(self, '_headers_buffer'):[](#l481) self.wfile.write(b"".join(self._headers_buffer))[](#l482) self._headers_buffer = [][](#l483) [](#l484) def log_request(self, code='-', size='-'):[](#l485) """Log an accepted request.[](#l486) [](#l487) This is called by send_response().[](#l488) [](#l489) """[](#l490) [](#l491) self.log_message('"%s" %s %s',[](#l492) self.requestline, str(code), str(size))[](#l493) [](#l494) def log_error(self, format, *args):[](#l495) """Log an error.[](#l496) [](#l497) This is called when a request cannot be fulfilled. By[](#l498) default it passes the message on to log_message().[](#l499) [](#l500) Arguments are the same as for log_message().[](#l501) [](#l502) XXX This should go to the separate error log.[](#l503) [](#l504) """[](#l505) [](#l506) self.log_message(format, *args)[](#l507) [](#l508) def log_message(self, format, *args):[](#l509) """Log an arbitrary message.[](#l510) [](#l511) This is used by all other logging functions. Override[](#l512) it if you have specific logging wishes.[](#l513) [](#l514) The first argument, FORMAT, is a format string for the[](#l515) message to be logged. If the format string contains[](#l516) any % escapes requiring parameters, they should be[](#l517) specified as subsequent arguments (it's just like[](#l518) printf!).[](#l519) [](#l520) The client host and current date/time are prefixed to[](#l521) every message.[](#l522) [](#l523) """[](#l524) [](#l525) sys.stderr.write("%s - - [%s] %s\n" %[](#l526) (self.address_string(),[](#l527) self.log_date_time_string(),[](#l528) format%args))[](#l529) [](#l530) def version_string(self):[](#l531) """Return the server software version string."""[](#l532) return self.server_version + ' ' + self.sys_version[](#l533) [](#l534) def date_time_string(self, timestamp=None):[](#l535) """Return the current date and time formatted for a message header."""[](#l536) if timestamp is None:[](#l537) timestamp = time.time()[](#l538) year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)[](#l539) s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ([](#l540) self.weekdayname[wd],[](#l541) day, self.monthname[month], year,[](#l542) hh, mm, ss)[](#l543) return s[](#l544) [](#l545) def log_date_time_string(self):[](#l546) """Return the current time formatted for logging."""[](#l547) now = time.time()[](#l548) year, month, day, hh, mm, ss, x, y, z = time.localtime(now)[](#l549) s = "%02d/%3s/%04d %02d:%02d:%02d" % ([](#l550) day, self.monthname[month], year, hh, mm, ss)[](#l551) return s[](#l552) [](#l553) weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][](#l554) [](#l555) monthname = [None,[](#l556) 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',[](#l557) 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][](#l558) [](#l559) def address_string(self):[](#l560) """Return the client address formatted for logging.[](#l561) [](#l562) This version looks up the full hostname using gethostbyaddr(),[](#l563) and tries to find a name that contains at least one dot.[](#l564) [](#l565) """[](#l566) [](#l567) host, port = self.client_address[:2][](#l568) return socket.getfqdn(host)[](#l569) [](#l570) # Essentially static class variables[](#l571) [](#l572) # The version of the HTTP protocol we support.[](#l573) # Set this to HTTP/1.1 to enable automatic keepalive[](#l574) protocol_version = "HTTP/1.0"[](#l575) [](#l576) # MessageClass used to parse headers[](#l577) MessageClass = http.client.HTTPMessage[](#l578) [](#l579) # Table mapping response codes to messages; entries have the[](#l580) # form {code: (shortmessage, longmessage)}.[](#l581) # See RFC 2616.[](#l582) responses = {[](#l583) 100: ('Continue', 'Request received, please continue'),[](#l584) 101: ('Switching Protocols',[](#l585) 'Switching to new protocol; obey Upgrade header'),[](#l586) [](#l587) 200: ('OK', 'Request fulfilled, document follows'),[](#l588) 201: ('Created', 'Document created, URL follows'),[](#l589) 202: ('Accepted',[](#l590) 'Request accepted, processing continues off-line'),[](#l591) 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),[](#l592) 204: ('No Content', 'Request fulfilled, nothing follows'),[](#l593) 205: ('Reset Content', 'Clear input form for further input.'),[](#l594) 206: ('Partial Content', 'Partial content follows.'),[](#l595) [](#l596) 300: ('Multiple Choices',[](#l597) 'Object has several resources -- see URI list'),[](#l598) 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),[](#l599) 302: ('Found', 'Object moved temporarily -- see URI list'),[](#l600) 303: ('See Other', 'Object moved -- see Method and URL list'),[](#l601) 304: ('Not Modified',[](#l602) 'Document has not changed since given time'),[](#l603) 305: ('Use Proxy',[](#l604) 'You must use proxy specified in Location to access this '[](#l605) 'resource.'),[](#l606) 307: ('Temporary Redirect',[](#l607) 'Object moved temporarily -- see URI list'),[](#l608) [](#l609) 400: ('Bad Request',[](#l610) 'Bad request syntax or unsupported method'),[](#l611) 401: ('Unauthorized',[](#l612) 'No permission -- see authorization schemes'),[](#l613) 402: ('Payment Required',[](#l614) 'No payment -- see charging schemes'),[](#l615) 403: ('Forbidden',[](#l616) 'Request forbidden -- authorization will not help'),[](#l617) 404: ('Not Found', 'Nothing matches the given URI'),[](#l618) 405: ('Method Not Allowed',[](#l619) 'Specified method is invalid for this resource.'),[](#l620) 406: ('Not Acceptable', 'URI not available in preferred format.'),[](#l621) 407: ('Proxy Authentication Required', 'You must authenticate with '[](#l622) 'this proxy before proceeding.'),[](#l623) 408: ('Request Timeout', 'Request timed out; try again later.'),[](#l624) 409: ('Conflict', 'Request conflict.'),[](#l625) 410: ('Gone',[](#l626) 'URI no longer exists and has been permanently removed.'),[](#l627) 411: ('Length Required', 'Client must specify Content-Length.'),[](#l628) 412: ('Precondition Failed', 'Precondition in headers is false.'),[](#l629) 413: ('Request Entity Too Large', 'Entity is too large.'),[](#l630) 414: ('Request-URI Too Long', 'URI is too long.'),[](#l631) 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),[](#l632) 416: ('Requested Range Not Satisfiable',[](#l633) 'Cannot satisfy request range.'),[](#l634) 417: ('Expectation Failed',[](#l635) 'Expect condition could not be satisfied.'),[](#l636) [](#l637) 500: ('Internal Server Error', 'Server got itself in trouble'),[](#l638) 501: ('Not Implemented',[](#l639) 'Server does not support this operation'),[](#l640) 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),[](#l641) 503: ('Service Unavailable',[](#l642) 'The server cannot process the request due to a high load'),[](#l643) 504: ('Gateway Timeout',[](#l644) 'The gateway server did not receive a timely response'),[](#l645) 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),[](#l646) }[](#l647) [](#l648) [](#l649) class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):[](#l650) [](#l651) """Simple HTTP request handler with GET and HEAD commands.[](#l652) [](#l653) This serves files from the current directory and any of its[](#l654) subdirectories. The MIME type for files is determined by[](#l655) calling the .guess_type() method.[](#l656) [](#l657) The GET and HEAD requests are identical except that the HEAD[](#l658) request omits the actual contents of the file.[](#l659) [](#l660) """[](#l661) [](#l662) server_version = "SimpleHTTP/" + __version__[](#l663) [](#l664) def do_GET(self):[](#l665) """Serve a GET request."""[](#l666) f = self.send_head()[](#l667) if f:[](#l668) self.copyfile(f, self.wfile)[](#l669) f.close()[](#l670) [](#l671) def do_HEAD(self):[](#l672) """Serve a HEAD request."""[](#l673) f = self.send_head()[](#l674) if f:[](#l675) f.close()[](#l676) [](#l677) def send_head(self):[](#l678) """Common code for GET and HEAD commands.[](#l679) [](#l680) This sends the response code and MIME headers.[](#l681) [](#l682) Return value is either a file object (which has to be copied[](#l683) to the outputfile by the caller unless the command was HEAD,[](#l684) and must be closed by the caller under all circumstances), or[](#l685) None, in which case the caller has nothing further to do.[](#l686) [](#l687) """[](#l688) path = self.translate_path(self.path)[](#l689) f = None[](#l690) if os.path.isdir(path):[](#l691) if not self.path.endswith('/'):[](#l692) # redirect browser - doing basically what apache does[](#l693) self.send_response(301)[](#l694) self.send_header("Location", self.path + "/")[](#l695) self.end_headers()[](#l696) return None[](#l697) for index in "index.html", "index.htm":[](#l698) index = os.path.join(path, index)[](#l699) if os.path.exists(index):[](#l700) path = index[](#l701) break[](#l702) else:[](#l703) return self.list_directory(path)[](#l704) ctype = self.guess_type(path)[](#l705) try:[](#l706) f = open(path, 'rb')[](#l707) except IOError:[](#l708) self.send_error(404, "File not found")[](#l709) return None[](#l710) self.send_response(200)[](#l711) self.send_header("Content-type", ctype)[](#l712) fs = os.fstat(f.fileno())[](#l713) self.send_header("Content-Length", str(fs[6]))[](#l714) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))[](#l715) self.end_headers()[](#l716) return f[](#l717) [](#l718) def list_directory(self, path):[](#l719) """Helper to produce a directory listing (absent index.html).[](#l720) [](#l721) Return value is either a file object, or None (indicating an[](#l722) error). In either case, the headers are sent, making the[](#l723) interface the same as for send_head().[](#l724) [](#l725) """[](#l726) try:[](#l727) list = os.listdir(path)[](#l728) except os.error:[](#l729) self.send_error(404, "No permission to list directory")[](#l730) return None[](#l731) list.sort(key=lambda a: a.lower())[](#l732) r = [][](#l733) displaypath = html.escape(urllib.parse.unquote(self.path))[](#l734) enc = sys.getfilesystemencoding()[](#l735) title = 'Directory listing for %s' % displaypath[](#l736) r.append('')[](#l738) r.append('\n')[](#l739) r.append('' % enc)[](#l741) r.append('%s\n' % title)[](#l742) r.append('\n

%s

' % title)[](#l743) r.append('
\n
    ')[](#l744) for name in list:[](#l745) fullname = os.path.join(path, name)[](#l746) displayname = linkname = name[](#l747) # Append / for directories or @ for symbolic links[](#l748) if os.path.isdir(fullname):[](#l749) displayname = name + "/"[](#l750) linkname = name + "/"[](#l751) if os.path.islink(fullname):[](#l752) displayname = name + "@"[](#l753) # Note: a link to a directory displays with @ and links with /[](#l754) r.append('
  • %s
  • '[](#l755) % (urllib.parse.quote(linkname), html.escape(displayname)))[](#l756) r.append('
\n
\n\n\n')[](#l757) encoded = '\n'.join(r).encode(enc)[](#l758) f = io.BytesIO()[](#l759) f.write(encoded)[](#l760) f.seek(0)[](#l761) self.send_response(200)[](#l762) self.send_header("Content-type", "text/html; charset=%s" % enc)[](#l763) self.send_header("Content-Length", str(len(encoded)))[](#l764) self.end_headers()[](#l765) return f[](#l766) [](#l767) def translate_path(self, path):[](#l768) """Translate a /-separated PATH to the local filename syntax.[](#l769) [](#l770) Components that mean special things to the local file system[](#l771) (e.g. drive or directory names) are ignored. (XXX They should[](#l772) probably be diagnosed.)[](#l773) [](#l774) """[](#l775) # abandon query parameters[](#l776) path = path.split('?',1)[0][](#l777) path = path.split('#',1)[0][](#l778) path = posixpath.normpath(urllib.parse.unquote(path))[](#l779) words = path.split('/')[](#l780) words = filter(None, words)[](#l781) path = os.getcwd()[](#l782) for word in words:[](#l783) drive, word = os.path.splitdrive(word)[](#l784) head, word = os.path.split(word)[](#l785) if word in (os.curdir, os.pardir): continue[](#l786) path = os.path.join(path, word)[](#l787) return path[](#l788) [](#l789) def copyfile(self, source, outputfile):[](#l790) """Copy all data between two file objects.[](#l791) [](#l792) The SOURCE argument is a file object open for reading[](#l793) (or anything with a read() method) and the DESTINATION[](#l794) argument is a file object open for writing (or[](#l795) anything with a write() method).[](#l796) [](#l797) The only reason for overriding this would be to change[](#l798) the block size or perhaps to replace newlines by CRLF[](#l799) -- note however that this the default server uses this[](#l800) to copy binary data as well.[](#l801) [](#l802) """[](#l803) shutil.copyfileobj(source, outputfile)[](#l804) [](#l805) def guess_type(self, path):[](#l806) """Guess the type of a file.[](#l807) [](#l808) Argument is a PATH (a filename).[](#l809) [](#l810) Return value is a string of the form type/subtype,[](#l811) usable for a MIME Content-type header.[](#l812) [](#l813) The default implementation looks the file's extension[](#l814) up in the table self.extensions_map, using application/octet-stream[](#l815) as a default; however it would be permissible (if[](#l816) slow) to look inside the data to make a better guess.[](#l817) [](#l818) """[](#l819) [](#l820) base, ext = posixpath.splitext(path)[](#l821) if ext in self.extensions_map:[](#l822) return self.extensions_map[ext][](#l823) ext = ext.lower()[](#l824) if ext in self.extensions_map:[](#l825) return self.extensions_map[ext][](#l826) else:[](#l827) return self.extensions_map[''][](#l828) [](#l829) if not mimetypes.inited:[](#l830) mimetypes.init() # try to read system mime.types[](#l831) extensions_map = mimetypes.types_map.copy()[](#l832) extensions_map.update({[](#l833) '': 'application/octet-stream', # Default[](#l834) '.py': 'text/plain',[](#l835) '.c': 'text/plain',[](#l836) '.h': 'text/plain',[](#l837) })[](#l838) [](#l839) [](#l840) # Utilities for CGIHTTPRequestHandler[](#l841) [](#l842) def _url_collapse_path(path):[](#l843) """[](#l844) Given a URL path, remove extra '/'s and '.' path elements and collapse[](#l845) any '..' references and returns a colllapsed path.[](#l846) [](#l847) Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.[](#l848) The utility of this function is limited to is_cgi method and helps[](#l849) preventing some security attacks.[](#l850) [](#l851) Returns: A tuple of (head, tail) where tail is everything after the final /[](#l852) and head is everything before it. Head will always start with a '/' and,[](#l853) if it contains anything else, never have a trailing '/'.[](#l854) [](#l855) Raises: IndexError if too many '..' occur within the path.[](#l856) [](#l857) """[](#l858) # Similar to os.path.split(os.path.normpath(path)) but specific to URL[](#l859) # path semantics rather than local operating system semantics.[](#l860) path_parts = path.split('/')[](#l861) head_parts = [][](#l862) for part in path_parts[:-1]:[](#l863) if part == '..':[](#l864) head_parts.pop() # IndexError if more '..' than prior parts[](#l865) elif part and part != '.':[](#l866) head_parts.append( part )[](#l867) if path_parts:[](#l868) tail_part = path_parts.pop()[](#l869) if tail_part:[](#l870) if tail_part == '..':[](#l871) head_parts.pop()[](#l872) tail_part = ''[](#l873) elif tail_part == '.':[](#l874) tail_part = ''[](#l875) else:[](#l876) tail_part = ''[](#l877) [](#l878) splitpath = ('/' + '/'.join(head_parts), tail_part)[](#l879) collapsed_path = "/".join(splitpath)[](#l880) [](#l881) return collapsed_path[](#l882) [](#l883) [](#l884) [](#l885) nobody = None[](#l886) [](#l887) def nobody_uid():[](#l888) """Internal routine to get nobody's uid"""[](#l889) global nobody[](#l890) if nobody:[](#l891) return nobody[](#l892) try:[](#l893) import pwd[](#l894) except ImportError:[](#l895) return -1[](#l896) try:[](#l897) nobody = pwd.getpwnam('nobody')[2][](#l898) except KeyError:[](#l899) nobody = 1 + max(x[2] for x in pwd.getpwall())[](#l900) return nobody[](#l901) [](#l902) [](#l903) def executable(path):[](#l904) """Test for executable file."""[](#l905) return os.access(path, os.X_OK)[](#l906) [](#l907) [](#l908) class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):[](#l909) [](#l910) """Complete HTTP server with GET, HEAD and POST commands.[](#l911) [](#l912) GET and HEAD also support running CGI scripts.[](#l913) [](#l914) The POST command is *only* implemented for CGI scripts.[](#l915) [](#l916) """[](#l917) [](#l918) # Determine platform specifics[](#l919) have_fork = hasattr(os, 'fork')[](#l920) [](#l921) # Make rfile unbuffered -- we need to read one line and then pass[](#l922) # the rest to a subprocess, so we can't use buffered input.[](#l923) rbufsize = 0[](#l924) [](#l925) def do_POST(self):[](#l926) """Serve a POST request.[](#l927) [](#l928) This is only implemented for CGI scripts.[](#l929) [](#l930) """[](#l931) [](#l932) if self.is_cgi():[](#l933) self.run_cgi()[](#l934) else:[](#l935) self.send_error(501, "Can only POST to CGI scripts")[](#l936) [](#l937) def send_head(self):[](#l938) """Version of send_head that support CGI scripts"""[](#l939) if self.is_cgi():[](#l940) return self.run_cgi()[](#l941) else:[](#l942) return SimpleHTTPRequestHandler.send_head(self)[](#l943) [](#l944) def is_cgi(self):[](#l945) """Test whether self.path corresponds to a CGI script.[](#l946) [](#l947) Returns True and updates the cgi_info attribute to the tuple[](#l948) (dir, rest) if self.path requires running a CGI script.[](#l949) Returns False otherwise.[](#l950) [](#l951) If any exception is raised, the caller should assume that[](#l952) self.path was rejected as invalid and act accordingly.[](#l953) [](#l954) The default implementation tests whether the normalized url[](#l955) path begins with one of the strings in self.cgi_directories[](#l956) (and the next character is a '/' or the end of the string).[](#l957) [](#l958) """[](#l959) collapsed_path = _url_collapse_path(self.path)[](#l960) dir_sep = collapsed_path.find('/', 1)[](#l961) head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:][](#l962) if head in self.cgi_directories:[](#l963) self.cgi_info = head, tail[](#l964) return True[](#l965) return False[](#l966) [](#l967) [](#l968) cgi_directories = ['/cgi-bin', '/htbin'][](#l969) [](#l970) def is_executable(self, path):[](#l971) """Test whether argument path is an executable file."""[](#l972) return executable(path)[](#l973) [](#l974) def is_python(self, path):[](#l975) """Test whether argument path is a Python script."""[](#l976) head, tail = os.path.splitext(path)[](#l977) return tail.lower() in (".py", ".pyw")[](#l978) [](#l979) def run_cgi(self):[](#l980) """Execute a CGI script."""[](#l981) path = self.path[](#l982) dir, rest = self.cgi_info[](#l983) [](#l984) i = path.find('/', len(dir) + 1)[](#l985) while i >= 0:[](#l986) nextdir = path[:i][](#l987) nextrest = path[i+1:][](#l988) [](#l989) scriptdir = self.translate_path(nextdir)[](#l990) if os.path.isdir(scriptdir):[](#l991) dir, rest = nextdir, nextrest[](#l992) i = path.find('/', len(dir) + 1)[](#l993) else:[](#l994) break[](#l995) [](#l996) # find an explicit query string, if present.[](#l997) i = rest.rfind('?')[](#l998) if i >= 0:[](#l999) rest, query = rest[:i], rest[i+1:][](#l1000) else:[](#l1001) query = ''[](#l1002) [](#l1003) # dissect the part after the directory name into a script name &[](#l1004) # a possible additional path, to be stored in PATH_INFO.[](#l1005) i = rest.find('/')[](#l1006) if i >= 0:[](#l1007) script, rest = rest[:i], rest[i:][](#l1008) else:[](#l1009) script, rest = rest, ''[](#l1010) [](#l1011) scriptname = dir + '/' + script[](#l1012) scriptfile = self.translate_path(scriptname)[](#l1013) if not os.path.exists(scriptfile):[](#l1014) self.send_error(404, "No such CGI script (%r)" % scriptname)[](#l1015) return[](#l1016) if not os.path.isfile(scriptfile):[](#l1017) self.send_error(403, "CGI script is not a plain file (%r)" %[](#l1018) scriptname)[](#l1019) return[](#l1020) ispy = self.is_python(scriptname)[](#l1021) if self.have_fork or not ispy:[](#l1022) if not self.is_executable(scriptfile):[](#l1023) self.send_error(403, "CGI script is not executable (%r)" %[](#l1024) scriptname)[](#l1025) return[](#l1026) [](#l1027) # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html[](#l1028) # XXX Much of the following could be prepared ahead of time![](#l1029) env = copy.deepcopy(os.environ)[](#l1030) env['SERVER_SOFTWARE'] = self.version_string()[](#l1031) env['SERVER_NAME'] = self.server.server_name[](#l1032) env['GATEWAY_INTERFACE'] = 'CGI/1.1'[](#l1033) env['SERVER_PROTOCOL'] = self.protocol_version[](#l1034) env['SERVER_PORT'] = str(self.server.server_port)[](#l1035) env['REQUEST_METHOD'] = self.command[](#l1036) uqrest = urllib.parse.unquote(rest)[](#l1037) env['PATH_INFO'] = uqrest[](#l1038) env['PATH_TRANSLATED'] = self.translate_path(uqrest)[](#l1039) env['SCRIPT_NAME'] = scriptname[](#l1040) if query:[](#l1041) env['QUERY_STRING'] = query[](#l1042) host = self.address_string()[](#l1043) if host != self.client_address[0]:[](#l1044) env['REMOTE_HOST'] = host[](#l1045) env['REMOTE_ADDR'] = self.client_address[0][](#l1046) authorization = self.headers.get("authorization")[](#l1047) if authorization:[](#l1048) authorization = authorization.split()[](#l1049) if len(authorization) == 2:[](#l1050) import base64, binascii[](#l1051) env['AUTH_TYPE'] = authorization[0][](#l1052) if authorization[0].lower() == "basic":[](#l1053) try:[](#l1054) authorization = authorization[1].encode('ascii')[](#l1055) authorization = base64.decodebytes(authorization).\[](#l1056) decode('ascii')[](#l1057) except (binascii.Error, UnicodeError):[](#l1058) pass[](#l1059) else:[](#l1060) authorization = authorization.split(':')[](#l1061) if len(authorization) == 2:[](#l1062) env['REMOTE_USER'] = authorization[0][](#l1063) # XXX REMOTE_IDENT[](#l1064) if self.headers.get('content-type') is None:[](#l1065) env['CONTENT_TYPE'] = self.headers.get_content_type()[](#l1066) else:[](#l1067) env['CONTENT_TYPE'] = self.headers['content-type'][](#l1068) length = self.headers.get('content-length')[](#l1069) if length:[](#l1070) env['CONTENT_LENGTH'] = length[](#l1071) referer = self.headers.get('referer')[](#l1072) if referer:[](#l1073) env['HTTP_REFERER'] = referer[](#l1074) accept = [][](#l1075) for line in self.headers.getallmatchingheaders('accept'):[](#l1076) if line[:1] in "\t\n\r ":[](#l1077) accept.append(line.strip())[](#l1078) else:[](#l1079) accept = accept + line[7:].split(',')[](#l1080) env['HTTP_ACCEPT'] = ','.join(accept)[](#l1081) ua = self.headers.get('user-agent')[](#l1082) if ua:[](#l1083) env['HTTP_USER_AGENT'] = ua[](#l1084) co = filter(None, self.headers.get_all('cookie', []))[](#l1085) cookie_str = ', '.join(co)[](#l1086) if cookie_str:[](#l1087) env['HTTP_COOKIE'] = cookie_str[](#l1088) # XXX Other HTTP_* headers[](#l1089) # Since we're setting the env in the parent, provide empty[](#l1090) # values to override previously set values[](#l1091) for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',[](#l1092) 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):[](#l1093) env.setdefault(k, "")[](#l1094) [](#l1095) self.send_response(200, "Script output follows")[](#l1096) self.flush_headers()[](#l1097) [](#l1098) decoded_query = query.replace('+', ' ')[](#l1099) [](#l1100) if self.have_fork:[](#l1101) # Unix -- fork as we should[](#l1102) args = [script][](#l1103) if '=' not in decoded_query:[](#l1104) args.append(decoded_query)[](#l1105) nobody = nobody_uid()[](#l1106) self.wfile.flush() # Always flush before forking[](#l1107) pid = os.fork()[](#l1108) if pid != 0:[](#l1109) # Parent[](#l1110) pid, sts = os.waitpid(pid, 0)[](#l1111) # throw away additional data [see bug #427345][](#l1112) while select.select([self.rfile], [], [], 0)[0]:[](#l1113) if not self.rfile.read(1):[](#l1114) break[](#l1115) if sts:[](#l1116) self.log_error("CGI script exit status %#x", sts)[](#l1117) return[](#l1118) # Child[](#l1119) try:[](#l1120) try:[](#l1121) os.setuid(nobody)[](#l1122) except os.error:[](#l1123) pass[](#l1124) os.dup2(self.rfile.fileno(), 0)[](#l1125) os.dup2(self.wfile.fileno(), 1)[](#l1126) os.execve(scriptfile, args, env)[](#l1127) except:[](#l1128) self.server.handle_error(self.request, self.client_address)[](#l1129) os._exit(127)[](#l1130) [](#l1131) else:[](#l1132) # Non-Unix -- use subprocess[](#l1133) import subprocess[](#l1134) cmdline = [scriptfile][](#l1135) if self.is_python(scriptfile):[](#l1136) interp = sys.executable[](#l1137) if interp.lower().endswith("w.exe"):[](#l1138) # On Windows, use python.exe, not pythonw.exe[](#l1139) interp = interp[:-5] + interp[-4:][](#l1140) cmdline = [interp, '-u'] + cmdline[](#l1141) if '=' not in query:[](#l1142) cmdline.append(query)[](#l1143) self.log_message("command: %s", subprocess.list2cmdline(cmdline))[](#l1144) try:[](#l1145) nbytes = int(length)[](#l1146) except (TypeError, ValueError):[](#l1147) nbytes = 0[](#l1148) p = subprocess.Popen(cmdline,[](#l1149) stdin=subprocess.PIPE,[](#l1150) stdout=subprocess.PIPE,[](#l1151) stderr=subprocess.PIPE,[](#l1152) env = env[](#l1153) )[](#l1154) if self.command.lower() == "post" and nbytes > 0:[](#l1155) data = self.rfile.read(nbytes)[](#l1156) else:[](#l1157) data = None[](#l1158) # throw away additional data [see bug #427345][](#l1159) while select.select([self.rfile._sock], [], [], 0)[0]:[](#l1160) if not self.rfile._sock.recv(1):[](#l1161) break[](#l1162) stdout, stderr = p.communicate(data)[](#l1163) self.wfile.write(stdout)[](#l1164) if stderr:[](#l1165) self.log_error('%s', stderr)[](#l1166) p.stderr.close()[](#l1167) p.stdout.close()[](#l1168) status = p.returncode[](#l1169) if status:[](#l1170) self.log_error("CGI script exit status %#x", status)[](#l1171) else:[](#l1172) self.log_message("CGI script exited OK")[](#l1173) [](#l1174) [](#l1175) def test(HandlerClass = BaseHTTPRequestHandler,[](#l1176) ServerClass = HTTPServer, protocol="HTTP/1.0"):[](#l1177) """Test the HTTP request handler class.[](#l1178) [](#l1179) This runs an HTTP server on port 8000 (or the first command line[](#l1180) argument).[](#l1181) [](#l1182) """[](#l1183) [](#l1184) if sys.argv[1:]:[](#l1185) port = int(sys.argv[1])[](#l1186) else:[](#l1187) port = 8000[](#l1188) server_address = ('', port)[](#l1189) [](#l1190) HandlerClass.protocol_version = protocol[](#l1191) httpd = ServerClass(server_address, HandlerClass)[](#l1192) [](#l1193) sa = httpd.socket.getsockname()[](#l1194) print("Serving HTTP on", sa[0], "port", sa[1], "...")[](#l1195) try:[](#l1196) httpd.serve_forever()[](#l1197) except KeyboardInterrupt:[](#l1198) print("\nKeyboard interrupt received, exiting.")[](#l1199) httpd.server_close()[](#l1200) sys.exit(0)[](#l1201) [](#l1202) if __name__ == '__main__':[](#l1203) test(HandlerClass=SimpleHTTPRequestHandler)[](#l1204)