(original) (raw)
changeset: 75102:a0f43f4481e0 branch: 2.7 parent: 75072:2e8b28dbc395 parent: 75101:19e6e55f09f3 user: Benjamin Peterson benjamin@python.org date: Mon Feb 20 21:44:56 2012 -0500 files: Doc/library/sys.rst Doc/reference/datamodel.rst Doc/using/cmdline.rst Include/object.h Include/pythonrun.h Lib/os.py Lib/test/test_cmd_line.py Lib/test/test_hash.py Lib/test/test_os.py Lib/test/test_set.py Lib/test/test_support.py Lib/test/test_symtable.py Lib/test/test_sys.py Makefile.pre.in Misc/NEWS Misc/python.man Modules/main.c Modules/posixmodule.c Objects/object.c Objects/stringobject.c Objects/unicodeobject.c PCbuild/pythoncore.vcproj Python/pythonrun.c Python/sysmodule.c description: merge 2.6 with hash randomization fix diff -r 2e8b28dbc395 -r a0f43f4481e0 Doc/library/sys.rst --- a/Doc/library/sys.rst Mon Feb 20 15:20:37 2012 -0500 +++ b/Doc/library/sys.rst Mon Feb 20 21:44:56 2012 -0500 @@ -286,6 +286,7 @@ :const:`verbose` :option:`-v` :const:`unicode` :option:`-U` :const:`bytes_warning` :option:`-b` + :const:`hash_randomization` :option:`-R` ============================= =================================== .. versionadded:: 2.6 diff -r 2e8b28dbc395 -r a0f43f4481e0 Doc/reference/datamodel.rst --- a/Doc/reference/datamodel.rst Mon Feb 20 15:20:37 2012 -0500 +++ b/Doc/reference/datamodel.rst Mon Feb 20 21:44:56 2012 -0500 @@ -1282,6 +1282,8 @@ modules are still available at the time when the :meth:`__del__` method is called. + See also the :option:`-R` command-line option. + .. method:: object.__repr__(self) diff -r 2e8b28dbc395 -r a0f43f4481e0 Doc/using/cmdline.rst --- a/Doc/using/cmdline.rst Mon Feb 20 15:20:37 2012 -0500 +++ b/Doc/using/cmdline.rst Mon Feb 20 21:44:56 2012 -0500 @@ -24,7 +24,7 @@ When invoking Python, you may specify any of these options:: - python [-BdEiOQsStuUvVWxX3?] [-c command | -m module-name | script | - ] [args] + python [-BdEiOQsRStuUvVWxX3?] [-c command | -m module-name | script | - ] [args] The most common use case is, of course, a simple invocation of a script:: @@ -253,6 +253,29 @@ :pep:`238` -- Changing the division operator +.. cmdoption:: -R + + Turn on hash randomization, so that the :meth:`__hash__` values of str, + bytes and datetime objects are "salted" with an unpredictable random value. + Although they remain constant within an individual Python process, they are + not predictable between repeated invocations of Python. + + This is intended to provide protection against a denial-of-service caused by + carefully-chosen inputs that exploit the worst case performance of a dict + insertion, O(n^2) complexity. See + http://www.ocert.org/advisories/ocert-2011-003.html for details. + + Changing hash values affects the order in which keys are retrieved from a + dict. Although Python has never made guarantees about this ordering (and it + typically varies between 32-bit and 64-bit builds), enough real-world code + implicitly relies on this non-guaranteed behavior that the randomization is + disabled by default. + + See also :envvar:`PYTHONHASHSEED`. + + .. versionadded:: 2.6.8 + + .. cmdoption:: -s Don't add the :data:`user site-packages directory ` to @@ -522,6 +545,27 @@ .. versionadded:: 2.6 +.. envvar:: PYTHONHASHSEED + + If this variable is set to ``random``, the effect is the same as specifying + the :option:`-R` option: a random value is used to seed the hashes of str, + bytes and datetime objects. + + If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a + fixed seed for generating the hash() of the types covered by the hash + randomization. + + Its purpose is to allow repeatable hashing, such as for selftests for the + interpreter itself, or to allow a cluster of python processes to share hash + values. + + The integer must be a decimal number in the range [0,4294967295]. + Specifying the value 0 will lead to the same hash values as when hash + randomization is disabled. + + .. versionadded:: 2.6.8 + + .. envvar:: PYTHONIOENCODING Overrides the encoding used for stdin/stdout/stderr, in the syntax diff -r 2e8b28dbc395 -r a0f43f4481e0 Include/object.h --- a/Include/object.h Mon Feb 20 15:20:37 2012 -0500 +++ b/Include/object.h Mon Feb 20 21:44:56 2012 -0500 @@ -517,6 +517,12 @@ PyAPI_FUNC(long) _Py_HashDouble(double); PyAPI_FUNC(long) _Py_HashPointer(void*); +typedef struct { + long prefix; + long suffix; +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + /* Helper for passing objects to printf and the like */ #define PyObject_REPR(obj) PyString_AS_STRING(PyObject_Repr(obj)) diff -r 2e8b28dbc395 -r a0f43f4481e0 Include/pydebug.h --- a/Include/pydebug.h Mon Feb 20 15:20:37 2012 -0500 +++ b/Include/pydebug.h Mon Feb 20 21:44:56 2012 -0500 @@ -26,6 +26,7 @@ PyAPI_DATA(int) _Py_QnewFlag; /* Warn about 3.x issues */ PyAPI_DATA(int) Py_Py3kWarningFlag; +PyAPI_DATA(int) Py_HashRandomizationFlag; /* this is a wrapper around getenv() that pays attention to Py_IgnoreEnvironmentFlag. It should be used for getting variables like diff -r 2e8b28dbc395 -r a0f43f4481e0 Include/pythonrun.h --- a/Include/pythonrun.h Mon Feb 20 15:20:37 2012 -0500 +++ b/Include/pythonrun.h Mon Feb 20 21:44:56 2012 -0500 @@ -171,6 +171,8 @@ PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); +/* Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); #ifdef __cplusplus } diff -r 2e8b28dbc395 -r a0f43f4481e0 Lib/os.py --- a/Lib/os.py Mon Feb 20 15:20:37 2012 -0500 +++ b/Lib/os.py Mon Feb 20 21:44:56 2012 -0500 @@ -738,22 +738,3 @@ _make_statvfs_result) except NameError: # statvfs_result may not exist pass - -if not _exists("urandom"): - def urandom(n): - """urandom(n) -> str - - Return a string of n random bytes suitable for cryptographic use. - - """ - try: - _urandomfd = open("/dev/urandom", O_RDONLY) - except (OSError, IOError): - raise NotImplementedError("/dev/urandom (or equivalent) not found") - try: - bs = b"" - while n > len(bs): - bs += read(_urandomfd, n - len(bs)) - finally: - close(_urandomfd) - return bs diff -r 2e8b28dbc395 -r a0f43f4481e0 Lib/test/test_cmd_line.py --- a/Lib/test/test_cmd_line.py Mon Feb 20 15:20:37 2012 -0500 +++ b/Lib/test/test_cmd_line.py Mon Feb 20 21:44:56 2012 -0500 @@ -86,6 +86,20 @@ self.exit_code('-c', 'pass'), 0) + def test_hash_randomization(self): + # Verify that -R enables hash randomization: + self.verify_valid_flag('-R') + hashes = [] + for i in range(2): + code = 'print(hash("spam"))' + data = self.start_python('-R', '-c', code) + hashes.append(data) + self.assertNotEqual(hashes[0], hashes[1]) + + # Verify that sys.flags contains hash_randomization + code = 'import sys; print sys.flags' + data = self.start_python('-R', '-c', code) + self.assertTrue('hash_randomization=1' in data) def test_main(): test.test_support.run_unittest(CmdLineTest) diff -r 2e8b28dbc395 -r a0f43f4481e0 Lib/test/test_hash.py --- a/Lib/test/test_hash.py Mon Feb 20 15:20:37 2012 -0500 +++ b/Lib/test/test_hash.py Mon Feb 20 21:44:56 2012 -0500 @@ -3,10 +3,18 @@ # # Also test that hash implementations are inherited as expected +import os +import sys +import struct +import datetime import unittest +import subprocess + from test import test_support from collections import Hashable +IS_64BIT = (struct.calcsize('l') == 8) + class HashEqualityTestCase(unittest.TestCase): @@ -134,10 +142,100 @@ for obj in self.hashes_to_check: self.assertEqual(hash(obj), _default_hash(obj)) +class HashRandomizationTests(unittest.TestCase): + + # Each subclass should define a field "repr_", containing the repr() of + # an object to be tested + + def get_hash_command(self, repr_): + return 'print(hash(%s))' % repr_ + + def get_hash(self, repr_, seed=None): + env = os.environ.copy() + if seed is not None: + env['PYTHONHASHSEED'] = str(seed) + else: + env.pop('PYTHONHASHSEED', None) + cmd_line = [sys.executable, '-c', self.get_hash_command(repr_)] + p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + env=env) + out, err = p.communicate() + out = test_support.strip_python_stderr(out) + return int(out.strip()) + + def test_randomized_hash(self): + # two runs should return different hashes + run1 = self.get_hash(self.repr_, seed='random') + run2 = self.get_hash(self.repr_, seed='random') + self.assertNotEqual(run1, run2) + +class StringlikeHashRandomizationTests(HashRandomizationTests): + def test_null_hash(self): + # PYTHONHASHSEED=0 disables the randomized hash + if IS_64BIT: + known_hash_of_obj = 1453079729188098211 + else: + known_hash_of_obj = -1600925533 + + # Randomization is disabled by default: + self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj) + + # It can also be disabled by setting the seed to 0: + self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) + + def test_fixed_hash(self): + # test a fixed seed for the randomized hash + # Note that all types share the same values: + if IS_64BIT: + h = -4410911502303878509 + else: + h = -206076799 + self.assertEqual(self.get_hash(self.repr_, seed=42), h) + +class StrHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr('abc') + + def test_empty_string(self): + self.assertEqual(hash(""), 0) + +class UnicodeHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr(u'abc') + + def test_empty_string(self): + self.assertEqual(hash(u""), 0) + +class BufferHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = 'buffer("abc")' + + def test_empty_string(self): + self.assertEqual(hash(buffer("")), 0) + +class DatetimeTests(HashRandomizationTests): + def get_hash_command(self, repr_): + return 'import datetime; print(hash(%s))' % repr_ + +class DatetimeDateTests(DatetimeTests): + repr_ = repr(datetime.date(1066, 10, 14)) + +class DatetimeDatetimeTests(DatetimeTests): + repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7)) + +class DatetimeTimeTests(DatetimeTests): + repr_ = repr(datetime.time(0)) + + def test_main(): test_support.run_unittest(HashEqualityTestCase, HashInheritanceTestCase, - HashBuiltinsTestCase) + HashBuiltinsTestCase, + StrHashRandomizationTests, + UnicodeHashRandomizationTests, + BufferHashRandomizationTests, + DatetimeDateTests, + DatetimeDatetimeTests, + DatetimeTimeTests) + if __name__ == "__main__": diff -r 2e8b28dbc395 -r a0f43f4481e0 Lib/test/test_os.py --- a/Lib/test/test_os.py Mon Feb 20 15:20:37 2012 -0500 +++ b/Lib/test/test_os.py Mon Feb 20 21:44:56 2012 -0500 @@ -10,6 +10,7 @@ import signal import subprocess import time + from test import test_support import mmap import uuid @@ -536,8 +537,41 @@ self.assertRaises(TypeError, os.urandom, 0.9) self.assertRaises(TypeError, os.urandom, 1.1) self.assertRaises(TypeError, os.urandom, 2.0) - except NotImplementedError: - pass + self.assertEqual(len(os.urandom(0.9)), 0) + self.assertEqual(len(os.urandom(1.1)), 1) + self.assertEqual(len(os.urandom(2.0)), 2) + + def test_urandom_length(self): + self.assertEqual(len(os.urandom(0)), 0) + self.assertEqual(len(os.urandom(1)), 1) + self.assertEqual(len(os.urandom(10)), 10) + self.assertEqual(len(os.urandom(100)), 100) + self.assertEqual(len(os.urandom(1000)), 1000) + + def test_urandom_value(self): + data1 = os.urandom(16) + data2 = os.urandom(16) + self.assertNotEqual(data1, data2) + + def get_urandom_subprocess(self, count): + code = '\n'.join(( + 'import os, sys', + 'data = os.urandom(%s)' % count, + 'sys.stdout.write(data)', + 'sys.stdout.flush()')) + cmd_line = [sys.executable, '-c', code] + p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + out, err = p.communicate() + out = test_support.strip_python_stderr(out) + self.assertEqual(len(out), count) + return out + + def test_urandom_subprocess(self): + data1 = self.get_urandom_subprocess(16) + data2 = self.get_urandom_subprocess(16) + self.assertNotEqual(data1, data2) +>>>>>>> other def test_execvpe_with_bad_arglist(self): self.assertRaises(ValueError, os.execvpe, 'notepad', [], None) diff -r 2e8b28dbc395 -r a0f43f4481e0 Lib/test/test_set.py --- a/Lib/test/test_set.py Mon Feb 20 15:20:37 2012 -0500 +++ b/Lib/test/test_set.py Mon Feb 20 21:44:56 2012 -0500 @@ -687,6 +687,17 @@ if self.repr is not None: self.assertEqual(repr(self.set), self.repr) + def check_repr_against_values(self): + text = repr(self.set) + self.assertTrue(text.startswith('{')) + self.assertTrue(text.endswith('}')) + + result = text[1:-1].split(', ') + result.sort() + sorted_repr_values = [repr(value) for value in self.values] + sorted_repr_values.sort() + self.assertEqual(result, sorted_repr_values) + def test_print(self): fo = open(test_support.TESTFN, "wb") try: @@ -836,6 +847,46 @@ self.length = 3 self.repr = None +#------------------------------------------------------------------------------ + +class TestBasicOpsString(TestBasicOps): + def setUp(self): + self.case = "string set" + self.values = ["a", "b", "c"] + self.set = set(self.values) + self.dup = set(self.values) + self.length = 3 + + def test_repr(self): + self.check_repr_against_values() + +#------------------------------------------------------------------------------ + +class TestBasicOpsUnicode(TestBasicOps): + def setUp(self): + self.case = "unicode set" + self.values = [u"a", u"b", u"c"] + self.set = set(self.values) + self.dup = set(self.values) + self.length = 3 + + def test_repr(self): + self.check_repr_against_values() + +#------------------------------------------------------------------------------ + +class TestBasicOpsMixedStringUnicode(TestBasicOps): + def setUp(self): + self.case = "string and bytes set" + self.values = ["a", "b", u"a", u"b"] + self.set = set(self.values) + self.dup = set(self.values) + self.length = 4 + + def test_repr(self): + with test_support.check_warnings(): + self.check_repr_against_values() + #============================================================================== def baditer(): diff -r 2e8b28dbc395 -r a0f43f4481e0 Lib/test/test_support.py --- a/Lib/test/test_support.py Mon Feb 20 15:20:37 2012 -0500 +++ b/Lib/test/test_support.py Mon Feb 20 21:44:56 2012 -0500 @@ -36,8 +36,8 @@ "BasicTestRunner", "run_unittest", "run_doctest", "threading_setup", "threading_cleanup", "reap_children", "cpython_only", "check_impl_detail", "get_attribute", "py3k_bytes", - "import_fresh_module"] - + "import_fresh_module", "threading_cleanup", "reap_children", + "strip_python_stderr"] class Error(Exception): """Base class for regression test exceptions.""" diff -r 2e8b28dbc395 -r a0f43f4481e0 Lib/test/test_sys.py --- a/Lib/test/test_sys.py Mon Feb 20 15:20:37 2012 -0500 +++ b/Lib/test/test_sys.py Mon Feb 20 21:44:56 2012 -0500 @@ -438,7 +438,7 @@ attrs = ("debug", "py3k_warning", "division_warning", "division_new", "inspect", "interactive", "optimize", "dont_write_bytecode", "no_site", "ignore_environment", "tabcheck", "verbose", - "unicode", "bytes_warning") + "unicode", "bytes_warning", "hash_randomization") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) self.assertEqual(type(getattr(sys.flags, attr)), int, attr) diff -r 2e8b28dbc395 -r a0f43f4481e0 Makefile.pre.in --- a/Makefile.pre.in Mon Feb 20 15:20:37 2012 -0500 +++ b/Makefile.pre.in Mon Feb 20 21:44:56 2012 -0500 @@ -290,6 +290,7 @@ Python/pymath.o \ Python/pystate.o \ Python/pythonrun.o \ + Python/random.o \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ @@ -739,7 +740,7 @@ -@if which pybuildbot.identify >/dev/null 2>&1; then \ pybuildbot.identify "CC='$(CC)'" "CXX='$(CXX)'"; \ fi - (TESTPYTHON)(TESTPYTHON) (TESTPYTHON)(TESTPROG) -uall -rwW (TESTOPTS)+(TESTOPTS) + (TESTOPTS)+(TESTPYTHON) -R (TESTPROG)−uall−rwW(TESTPROG) -uall -rwW (TESTPROG)−uall−rwW(TESTOPTS) QUICKTESTOPTS= $(TESTOPTS) -x test_subprocess test_io test_lib2to3 \ test_multibytecodec test_urllib2_localnet test_itertools \ diff -r 2e8b28dbc395 -r a0f43f4481e0 Misc/NEWS --- a/Misc/NEWS Mon Feb 20 15:20:37 2012 -0500 +++ b/Misc/NEWS Mon Feb 20 21:44:56 2012 -0500 @@ -12,6 +12,11 @@ - Issue #13020: Fix a reference leak when allocating a structsequence object fails. Patch by Suman Saha. +- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED + environment variable, to provide an opt-in way to protect against denial of + service attacks due to hash collisions within the dict and set types. Patch + by David Malcolm, based on work by Victor Stinner. + - Issue #11235: Fix OverflowError when trying to import a source file whose modification time doesn't fit in a 32-bit timestamp. diff -r 2e8b28dbc395 -r a0f43f4481e0 Misc/python.man --- a/Misc/python.man Mon Feb 20 15:20:37 2012 -0500 +++ b/Misc/python.man Mon Feb 20 21:44:56 2012 -0500 @@ -34,6 +34,9 @@ .B \-OO ] [ +.B \-R +] +[ .B -Q .I argument ] @@ -151,6 +154,18 @@ .B \-OO Discard docstrings in addition to the \fB-O\fP optimizations. .TP +.B \-R +Turn on "hash randomization", so that the hash() values of str, bytes and +datetime objects are "salted" with an unpredictable pseudo-random value. +Although they remain constant within an individual Python process, they are +not predictable between repeated invocations of Python. +.IP +This is intended to provide protection against a denial of service +caused by carefully-chosen inputs that exploit the worst case performance +of a dict insertion, O(n^2) complexity. See +http://www.ocert.org/advisories/ocert-2011-003.html +for details. +.TP .BI "\-Q " argument Division control; see PEP 238. The argument must be one of "old" (the default, int/int and long/long return an int or long), "new" (new @@ -423,6 +438,20 @@ .IP PYTHONWARNINGS If this is set to a comma-separated string it is equivalent to specifying the \fB\-W\fP option for each separate value. +.IP PYTHONHASHSEED +If this variable is set to "random", the effect is the same as specifying +the \fB-R\fP option: a random value is used to seed the hashes of str, +bytes and datetime objects. + +If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for +generating the hash() of the types covered by the hash randomization. Its +purpose is to allow repeatable hashing, such as for selftests for the +interpreter itself, or to allow a cluster of python processes to share hash +values. + +The integer must be a decimal number in the range [0,4294967295]. Specifying +the value 0 will lead to the same hash values as when hash randomization is +disabled. .SH AUTHOR The Python Software Foundation: http://www.python.org/psf .SH INTERNET RESOURCES diff -r 2e8b28dbc395 -r a0f43f4481e0 Modules/main.c --- a/Modules/main.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Modules/main.c Mon Feb 20 21:44:56 2012 -0500 @@ -40,7 +40,7 @@ static int orig_argc; /* command line options */ -#define BASE_OPTS "3bBc:dEhiJm:OQ:sStuUvVW:xX?" +#define BASE_OPTS "3bBc:dEhiJm:OQ:RsStuUvVW:xX?" #ifndef RISCOS #define PROGRAM_OPTS BASE_OPTS @@ -71,6 +71,9 @@ -m mod : run library module as a script (terminates option list)\n\ -O : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\ -OO : remove doc-strings in addition to the -O optimizations\n\ +-R : use a pseudo-random salt to make hash() values of various types be\n\ + unpredictable between separate invocations of the interpreter, as\n\ + a defense against denial-of-service attacks\n\ -Q arg : division options: -Qold (default), -Qwarn, -Qwarnall, -Qnew\n\ -s : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\ -S : don't imply 'import site' on initialization\n\ @@ -102,6 +105,12 @@ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\ "; +static char *usage_6 = "\ +PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\ + as specifying the :option:`-R` option: a random value is used to seed the\n\ + hashes of str, bytes and datetime objects. It can also be set to an integer\n\ + in the range [0,4294967295] to get hash values with a predictable seed.\n\ +"; static int @@ -118,6 +127,7 @@ fputs(usage_3, f); fprintf(f, usage_4, DELIM); fprintf(f, usage_5, DELIM, PYTHONHOMEHELP); + fputs(usage_6, f); } #if defined(__VMS) if (exitcode == 0) { @@ -389,6 +399,10 @@ PySys_AddWarnOption(_PyOS_optarg); break; + case 'R': + Py_HashRandomizationFlag++; + break; + /* This space reserved for other options */ default: diff -r 2e8b28dbc395 -r a0f43f4481e0 Modules/posixmodule.c --- a/Modules/posixmodule.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Modules/posixmodule.c Mon Feb 20 21:44:56 2012 -0500 @@ -8538,117 +8538,35 @@ } #endif -#ifdef MS_WINDOWS - -PyDoc_STRVAR(win32_urandom__doc__, +PyDoc_STRVAR(posix_urandom__doc__, "urandom(n) -> str\n\n\ -Return a string of n random bytes suitable for cryptographic use."); - -typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ - LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ - DWORD dwFlags ); -typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ - BYTE *pbBuffer ); - -static CRYPTGENRANDOM pCryptGenRandom = NULL; -/* This handle is never explicitly released. Instead, the operating - system will release it when the process terminates. */ -static HCRYPTPROV hCryptProv = 0; - -static PyObject* -win32_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; - - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) +Return n random bytes suitable for cryptographic use."); + +static PyObject * +posix_urandom(PyObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *result; + int ret; + + /* Read arguments */ + if (!PyArg_ParseTuple(args, "n:urandom", &size)) + return NULL; + if (size < 0) return PyErr_Format(PyExc_ValueError, "negative argument not allowed"); - - if (hCryptProv == 0) { - HINSTANCE hAdvAPI32 = NULL; - CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; - - /* Obtain handle to the DLL containing CryptoAPI - This should not fail */ - hAdvAPI32 = GetModuleHandle("advapi32.dll"); - if(hAdvAPI32 == NULL) - return win32_error("GetModuleHandle", NULL); - - /* Obtain pointers to the CryptoAPI functions - This will fail on some early versions of Win95 */ - pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( - hAdvAPI32, - "CryptAcquireContextA"); - if (pCryptAcquireContext == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptAcquireContextA not found"); - - pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress( - hAdvAPI32, "CryptGenRandom"); - if (pCryptGenRandom == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptGenRandom not found"); - - /* Acquire context */ - if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, - PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - return win32_error("CryptAcquireContext", NULL); - } - - /* Allocate bytes */ - result = PyString_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - memset(PyString_AS_STRING(result), 0, howMany); /* zero seed */ - if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*) - PyString_AS_STRING(result))) { - Py_DECREF(result); - return win32_error("CryptGenRandom", NULL); - } + result = PyBytes_FromStringAndSize(NULL, size); + if (result == NULL) + return NULL; + + ret = _PyOS_URandom(PyBytes_AS_STRING(result), + PyBytes_GET_SIZE(result)); + if (ret == -1) { + Py_DECREF(result); + return NULL; } return result; } -#endif - -#ifdef __VMS -/* Use openssl random routine */ -#include -PyDoc_STRVAR(vms_urandom__doc__, -"urandom(n) -> str\n\n\ -Return a string of n random bytes suitable for cryptographic use."); - -static PyObject* -vms_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; - - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) - return PyErr_Format(PyExc_ValueError, - "negative argument not allowed"); - - /* Allocate bytes */ - result = PyString_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - if (RAND_pseudo_bytes((unsigned char*) - PyString_AS_STRING(result), - howMany) < 0) { - Py_DECREF(result); - return PyErr_Format(PyExc_ValueError, - "RAND_pseudo_bytes"); - } - } - return result; -} -#endif #ifdef HAVE_SETRESUID PyDoc_STRVAR(posix_setresuid__doc__, @@ -9035,12 +8953,6 @@ #ifdef HAVE_GETLOADAVG {"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__}, #endif - #ifdef MS_WINDOWS - {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__}, - #endif - #ifdef __VMS - {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__}, - #endif #ifdef HAVE_SETRESUID {"setresuid", posix_setresuid, METH_VARARGS, posix_setresuid__doc__}, #endif @@ -9053,7 +8965,7 @@ #ifdef HAVE_GETRESGID {"getresgid", posix_getresgid, METH_NOARGS, posix_getresgid__doc__}, #endif - + {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__}, {NULL, NULL} /* Sentinel */ }; diff -r 2e8b28dbc395 -r a0f43f4481e0 Objects/bufferobject.c --- a/Objects/bufferobject.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Objects/bufferobject.c Mon Feb 20 21:44:56 2012 -0500 @@ -334,10 +334,20 @@ return -1; p = (unsigned char *) ptr; len = size; - x = *p << 7; + /* + We make the hash of the empty buffer be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + self->b_hash = 0; + return 0; + } + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= size; + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; self->b_hash = x; diff -r 2e8b28dbc395 -r a0f43f4481e0 Objects/object.c --- a/Objects/object.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Objects/object.c Mon Feb 20 21:44:56 2012 -0500 @@ -1094,6 +1094,8 @@ return -1; } +_Py_HashSecret_t _Py_HashSecret; + long PyObject_Hash(PyObject *v) { diff -r 2e8b28dbc395 -r a0f43f4481e0 Objects/stringobject.c --- a/Objects/stringobject.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Objects/stringobject.c Mon Feb 20 21:44:56 2012 -0500 @@ -1265,11 +1265,21 @@ if (a->ob_shash != -1) return a->ob_shash; len = Py_SIZE(a); + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + a->ob_shash = 0; + return 0; + } p = (unsigned char *) a->ob_sval; - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= Py_SIZE(a); + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; a->ob_shash = x; diff -r 2e8b28dbc395 -r a0f43f4481e0 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Objects/unicodeobject.c Mon Feb 20 21:44:56 2012 -0500 @@ -6541,11 +6541,21 @@ if (self->hash != -1) return self->hash; len = PyUnicode_GET_SIZE(self); + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + self->hash = 0; + return 0; + } p = PyUnicode_AS_UNICODE(self); - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= PyUnicode_GET_SIZE(self); + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; self->hash = x; diff -r 2e8b28dbc395 -r a0f43f4481e0 PCbuild/pythoncore.vcproj --- a/PCbuild/pythoncore.vcproj Mon Feb 20 15:20:37 2012 -0500 +++ b/PCbuild/pythoncore.vcproj Mon Feb 20 21:44:56 2012 -0500 @@ -1,3 +1,4 @@ +<<<<<<< local +======= +++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++>>>>>>> other diff -r 2e8b28dbc395 -r a0f43f4481e0 Python/pythonrun.c --- a/Python/pythonrun.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Python/pythonrun.c Mon Feb 20 21:44:56 2012 -0500 @@ -67,6 +67,7 @@ static void call_ll_exitfuncs(void); extern void _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); +extern void _PyRandom_Init(void); #ifdef WITH_THREAD extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); @@ -89,6 +90,7 @@ true divisions (which they will be in 2.3). */ int _Py_QnewFlag = 0; int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ +int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */ /* PyModule_GetWarningsModule is no longer necessary as of 2.6 since _warnings is builtin. This API should not be used. */ @@ -166,6 +168,12 @@ Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p); if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); + /* The variable is only tested for existence here; _PyRandom_Init will + check its value further. */ + if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0') + Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p); + + _PyRandom_Init(); interp = PyInterpreterState_New(); if (interp == NULL) diff -r 2e8b28dbc395 -r a0f43f4481e0 Python/random.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Python/random.c Mon Feb 20 21:44:56 2012 -0500 @@ -0,0 +1,302 @@ +#include "Python.h" +#ifdef MS_WINDOWS +#include +#else +#include +#endif + +static int random_initialized = 0; + +#ifdef MS_WINDOWS +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ + LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ + DWORD dwFlags ); +typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ + BYTE *pbBuffer ); + +static CRYPTGENRANDOM pCryptGenRandom = NULL; +/* This handle is never explicitly released. Instead, the operating + system will release it when the process terminates. */ +static HCRYPTPROV hCryptProv = 0; + +static int +win32_urandom_init(int raise) +{ + HINSTANCE hAdvAPI32 = NULL; + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + + /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */ + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if(hAdvAPI32 == NULL) + goto error; + + /* Obtain pointers to the CryptoAPI functions. This will fail on some early + versions of Win95. */ + pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( + hAdvAPI32, "CryptAcquireContextA"); + if (pCryptAcquireContext == NULL) + goto error; + + pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, + "CryptGenRandom"); + if (pCryptGenRandom == NULL) + goto error; + + /* Acquire context */ + if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, + PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + goto error; + + return 0; + +error: + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialize Windows random API (CryptoGen)"); + return -1; +} + +/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen + API. Return 0 on success, or -1 on error. */ +static int +win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + Py_ssize_t chunk; + + if (hCryptProv == 0) + { + if (win32_urandom_init(raise) == -1) + return -1; + } + + while (size > 0) + { + chunk = size > INT_MAX ? INT_MAX : size; + if (!pCryptGenRandom(hCryptProv, chunk, buffer)) + { + /* CryptGenRandom() failed */ + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialized the randomized hash " + "secret using CryptoGen)"); + return -1; + } + buffer += chunk; + size -= chunk; + } + return 0; +} +#endif /* MS_WINDOWS */ + + +#ifdef __VMS +/* Use openssl random routine */ +#include +static int +vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + if (RAND_pseudo_bytes(buffer, size) < 0) { + if (raise) { + PyErr_Format(PyExc_ValueError, + "RAND_pseudo_bytes"); + } else { + Py_FatalError("Failed to initialize the randomized hash " + "secret using RAND_pseudo_bytes"); + } + return -1; + } + return 0; +} +#endif /* __VMS */ + + +#if !defined(MS_WINDOWS) && !defined(__VMS) + +/* Read size bytes from /dev/urandom into buffer. + Call Py_FatalError() on error. */ +static void +dev_urandom_noraise(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + assert (0 < size); + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + Py_FatalError("Failed to open /dev/urandom"); + + while (0 < size) + { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + Py_FatalError("Failed to read bytes from /dev/urandom"); + break; + } + buffer += n; + size -= (Py_ssize_t)n; + } + close(fd); +} + +/* Read size bytes from /dev/urandom into buffer. + Return 0 on success, raise an exception and return -1 on error. */ +static int +dev_urandom_python(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + if (size <= 0) + return 0; + + Py_BEGIN_ALLOW_THREADS + fd = open("/dev/urandom", O_RDONLY); + Py_END_ALLOW_THREADS + if (fd < 0) + { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom"); + return -1; + } + + Py_BEGIN_ALLOW_THREADS + do { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + break; + buffer += n; + size -= (Py_ssize_t)n; + } while (0 < size); + Py_END_ALLOW_THREADS + + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + if (n < 0) + PyErr_SetFromErrno(PyExc_OSError); + else + PyErr_Format(PyExc_RuntimeError, + "Failed to read %zi bytes from /dev/urandom", + size); + close(fd); + return -1; + } + close(fd); + return 0; +} +#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */ + +/* Fill buffer with pseudo-random bytes generated by a linear congruent + generator (LCG): + + x(n+1) = (x(n) * 214013 + 2531011) % 2^32 + + Use bits 23..16 of x(n) to generate a byte. */ +static void +lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size) +{ + size_t index; + unsigned int x; + + x = x0; + for (index=0; index < size; index++) { + x *= 214013; + x += 2531011; + /* modulo 2 ^ (8 * sizeof(int)) */ + buffer[index] = (x >> 16) & 0xff; + } +} + +/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic + use, from the operating random number generator (RNG). + + Return 0 on success, raise an exception and return -1 on error. */ +int +_PyOS_URandom(void *buffer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative argument not allowed"); + return -1; + } + if (size == 0) + return 0; + +#ifdef MS_WINDOWS + return win32_urandom((unsigned char *)buffer, size, 1); +#else +# ifdef __VMS + return vms_urandom((unsigned char *)buffer, size, 1); +# else + return dev_urandom_python((char*)buffer, size); +# endif +#endif +} + +void +_PyRandom_Init(void) +{ + char *env; + void *secret = &_Py_HashSecret; + Py_ssize_t secret_size = sizeof(_Py_HashSecret); + + if (random_initialized) + return; + random_initialized = 1; + + /* + By default, hash randomization is disabled, and only + enabled if PYTHONHASHSEED is set to non-empty or if + "-R" is provided at the command line: + */ + if (!Py_HashRandomizationFlag) { + /* Disable the randomized hash: */ + memset(secret, 0, secret_size); + return; + } + + /* + Hash randomization is enabled. Generate a per-process secret, + using PYTHONHASHSEED if provided. + */ + + env = Py_GETENV("PYTHONHASHSEED"); + if (env && *env != '\0' && strcmp(env, "random") != 0) { + char *endptr = env; + unsigned long seed; + seed = strtoul(env, &endptr, 10); + if (*endptr != '\0' + || seed > 4294967295UL + || (errno == ERANGE && seed == ULONG_MAX)) + { + Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer " + "in range [0; 4294967295]"); + } + if (seed == 0) { + /* disable the randomized hash */ + memset(secret, 0, secret_size); + } + else { + lcg_urandom(seed, (unsigned char*)secret, secret_size); + } + } + else { +#ifdef MS_WINDOWS + (void)win32_urandom((unsigned char *)secret, secret_size, 0); +#else /* #ifdef MS_WINDOWS */ +# ifdef __VMS + vms_urandom((unsigned char *)secret, secret_size, 0); +# else + dev_urandom_noraise((char*)secret, secret_size); +# endif +#endif + } +} diff -r 2e8b28dbc395 -r a0f43f4481e0 Python/sysmodule.c --- a/Python/sysmodule.c Mon Feb 20 15:20:37 2012 -0500 +++ b/Python/sysmodule.c Mon Feb 20 21:44:56 2012 -0500 @@ -1209,6 +1209,7 @@ {"unicode", "-U"}, /* {"skip_first", "-x"}, */ {"bytes_warning", "-b"}, + {"hash_randomization", "-R"}, {0} }; @@ -1217,9 +1218,9 @@ flags__doc__, /* doc */ flags_fields, /* fields */ #ifdef RISCOS - 16 + 17 #else - 15 + 16 #endif }; @@ -1256,6 +1257,7 @@ SetFlag(Py_UnicodeFlag); /* SetFlag(skipfirstline); */ SetFlag(Py_BytesWarningFlag); + SetFlag(Py_HashRandomizationFlag); #undef SetFlag if (PyErr_Occurred()) {/benjamin@python.org