cpython: 6b7704fe1be1 (original) (raw)
Mercurial > cpython
changeset 75100:6b7704fe1be1 2.6
- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED environment variable, to provide an opt-in way to protect against denial of service attacks due to hash collisions within the dict and set types. Patch by David Malcolm, based on work by Victor Stinner. [#13703]
line wrap: on
line diff
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -289,6 +289,11 @@ always available.
+------------------------------+------------------------------------------+
| :const:bytes_warning
| -b |
+------------------------------+------------------------------------------+
- +------------------------------+------------------------------------------+
- | :const:
hash_randomization
| -R | - | | |
- | | .. versionadded:: 2.6.8 |
- +------------------------------+------------------------------------------+ .. versionadded:: 2.6
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -1273,6 +1273,8 @@ Basic customization
modules are still available at the time when the :meth:__del__
method is
called.
--- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -21,7 +21,7 @@ Command line When invoking Python, you may specify any of these options::
The most common use case is, of course, a simple invocation of a script::
@@ -239,6 +239,29 @@ Miscellaneous options
:pep:238
-- Changing the division operator
+.. cmdoption:: -R
+
- Turn on hash randomization, so that the :meth:
__hash__
values of str, - bytes and datetime objects are "salted" with an unpredictable random value.
- Although they remain constant within an individual Python process, they are
- not predictable between repeated invocations of Python. +
- This is intended to provide protection against a denial-of-service caused by
- carefully-chosen inputs that exploit the worst case performance of a dict
- insertion, O(n^2) complexity. See
- http://www.ocert.org/advisories/ocert-2011-003.html for details. +
- Changing hash values affects the order in which keys are retrieved from a
- dict. Although Python has never made guarantees about this ordering (and it
- typically varies between 32-bit and 64-bit builds), enough real-world code
- implicitly relies on this non-guaranteed behavior that the randomization is
- disabled by default. +
- See also :envvar:
PYTHONHASHSEED
. + - .. versionadded:: 2.6.8 +
+ .. cmdoption:: -s Don't add user site directory to sys.path @@ -501,6 +524,27 @@ These environment variables influence Py .. versionadded:: 2.6 +.. envvar:: PYTHONHASHSEED +
- If this variable is set to
random
, the effect is the same as specifying - the :option:
-R
option: a random value is used to seed the hashes of str, - bytes and datetime objects. +
- If :envvar:
PYTHONHASHSEED
is set to an integer value, it is used as a - fixed seed for generating the hash() of the types covered by the hash
- randomization. +
- Its purpose is to allow repeatable hashing, such as for selftests for the
- interpreter itself, or to allow a cluster of python processes to share hash
- values. +
- The integer must be a decimal number in the range [0,4294967295].
- Specifying the value 0 will lead to the same hash values as when hash
- randomization is disabled. +
- .. versionadded:: 2.6.8 +
+ .. envvar:: PYTHONIOENCODING Overrides the encoding used for stdin/stdout/stderr, in the syntax
--- a/Include/object.h +++ b/Include/object.h @@ -506,6 +506,12 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject PyAPI_FUNC(long) _Py_HashDouble(double); PyAPI_FUNC(long) _Py_HashPointer(void); +typedef struct {
+} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + /* Helper for passing objects to printf and the like */ #define PyObject_REPR(obj) PyString_AS_STRING(PyObject_Repr(obj))
--- a/Include/pydebug.h +++ b/Include/pydebug.h @@ -26,6 +26,7 @@ PyAPI_DATA(int) Py_NoUserSiteDirectory; PyAPI_DATA(int) _Py_QnewFlag; /* Warn about 3.x issues / PyAPI_DATA(int) Py_Py3kWarningFlag; +PyAPI_DATA(int) Py_HashRandomizationFlag; / this is a wrapper around getenv() that pays attention to Py_IgnoreEnvironmentFlag. It should be used for getting variables like
--- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -168,6 +168,8 @@ typedef void (PyOS_sighandler_t)(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); +/ Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); #ifdef __cplusplus }
--- a/Lib/os.py +++ b/Lib/os.py @@ -742,22 +742,3 @@ try: _make_statvfs_result) except NameError: # statvfs_result may not exist pass - -if not _exists("urandom"):
Return a string of n random bytes suitable for cryptographic use.[](#l7.12)
"""[](#l7.14)
try:[](#l7.15)
_urandomfd = open("/dev/urandom", O_RDONLY)[](#l7.16)
except (OSError, IOError):[](#l7.17)
raise NotImplementedError("/dev/urandom (or equivalent) not found")[](#l7.18)
try:[](#l7.19)
bs = b""[](#l7.20)
while n - len(bs) >= 1:[](#l7.21)
bs += read(_urandomfd, n - len(bs))[](#l7.22)
finally:[](#l7.23)
close(_urandomfd)[](#l7.24)
return bs[](#l7.25)
--- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -103,6 +103,20 @@ class CmdLineTest(unittest.TestCase): self.exit_code('-c', 'pass'), 0)
- def test_hash_randomization(self):
# Verify that -R enables hash randomization:[](#l8.8)
self.verify_valid_flag('-R')[](#l8.9)
hashes = [][](#l8.10)
for i in range(2):[](#l8.11)
code = 'print(hash("spam"))'[](#l8.12)
data = self.start_python('-R', '-c', code)[](#l8.13)
hashes.append(data)[](#l8.14)
self.assertNotEqual(hashes[0], hashes[1])[](#l8.15)
# Verify that sys.flags contains hash_randomization[](#l8.17)
code = 'import sys; print sys.flags'[](#l8.18)
data = self.start_python('-R', '-c', code)[](#l8.19)
self.assertTrue('hash_randomization=1' in data)[](#l8.20)
def test_main(): test.test_support.run_unittest(CmdLineTest)
--- a/Lib/test/test_hash.py +++ b/Lib/test/test_hash.py @@ -3,10 +3,18 @@ #
Also test that hash implementations are inherited as expected
+import os +import sys +import struct +import datetime import unittest +import subprocess + from test import test_support from collections import Hashable +IS_64BIT = (struct.calcsize('l') == 8) + class HashEqualityTestCase(unittest.TestCase): @@ -133,10 +141,100 @@ class HashBuiltinsTestCase(unittest.Test for obj in self.hashes_to_check: self.assertEqual(hash(obj), _default_hash(obj)) +class HashRandomizationTests(unittest.TestCase): +
- def get_hash(self, repr_, seed=None):
env = os.environ.copy()[](#l9.35)
if seed is not None:[](#l9.36)
env['PYTHONHASHSEED'] = str(seed)[](#l9.37)
else:[](#l9.38)
env.pop('PYTHONHASHSEED', None)[](#l9.39)
cmd_line = [sys.executable, '-c', self.get_hash_command(repr_)][](#l9.40)
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,[](#l9.41)
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,[](#l9.42)
env=env)[](#l9.43)
out, err = p.communicate()[](#l9.44)
out = test_support.strip_python_stderr(out)[](#l9.45)
return int(out.strip())[](#l9.46)
- def test_randomized_hash(self):
# two runs should return different hashes[](#l9.49)
run1 = self.get_hash(self.repr_, seed='random')[](#l9.50)
run2 = self.get_hash(self.repr_, seed='random')[](#l9.51)
self.assertNotEqual(run1, run2)[](#l9.52)
+ +class StringlikeHashRandomizationTests(HashRandomizationTests):
- def test_null_hash(self):
# PYTHONHASHSEED=0 disables the randomized hash[](#l9.56)
if IS_64BIT:[](#l9.57)
known_hash_of_obj = 1453079729188098211[](#l9.58)
else:[](#l9.59)
known_hash_of_obj = -1600925533[](#l9.60)
# Randomization is disabled by default:[](#l9.62)
self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)[](#l9.63)
# It can also be disabled by setting the seed to 0:[](#l9.65)
self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)[](#l9.66)
- def test_fixed_hash(self):
# test a fixed seed for the randomized hash[](#l9.69)
# Note that all types share the same values:[](#l9.70)
if IS_64BIT:[](#l9.71)
h = -4410911502303878509[](#l9.72)
else:[](#l9.73)
h = -206076799[](#l9.74)
self.assertEqual(self.get_hash(self.repr_, seed=42), h)[](#l9.75)
+ +class StrHashRandomizationTests(StringlikeHashRandomizationTests):
+ +class UnicodeHashRandomizationTests(StringlikeHashRandomizationTests):
+ +class BufferHashRandomizationTests(StringlikeHashRandomizationTests):
+ +class DatetimeTests(HashRandomizationTests):
+ +class DatetimeDateTests(DatetimeTests):
+ +class DatetimeDatetimeTests(DatetimeTests):
+ +class DatetimeTimeTests(DatetimeTests):
+ + def test_main(): test_support.run_unittest(HashEqualityTestCase, HashInheritanceTestCase,
HashBuiltinsTestCase)[](#l9.112)
HashBuiltinsTestCase,[](#l9.113)
StrHashRandomizationTests,[](#l9.114)
UnicodeHashRandomizationTests,[](#l9.115)
BufferHashRandomizationTests,[](#l9.116)
DatetimeDateTests,[](#l9.117)
DatetimeDatetimeTests,[](#l9.118)
DatetimeTimeTests)[](#l9.119)
[](#l9.120)
--- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -6,6 +6,8 @@ import os import unittest import warnings import sys +import subprocess + from test import test_support warnings.filterwarnings("ignore", "tempnam", RuntimeWarning, name) @@ -499,18 +501,46 @@ class DevNullTests (unittest.TestCase): class URandomTests (unittest.TestCase): def test_urandom(self):
try:[](#l10.16)
with test_support.check_warnings():[](#l10.17)
self.assertEqual(len(os.urandom(1)), 1)[](#l10.18)
self.assertEqual(len(os.urandom(10)), 10)[](#l10.19)
self.assertEqual(len(os.urandom(100)), 100)[](#l10.20)
self.assertEqual(len(os.urandom(1000)), 1000)[](#l10.21)
# see http://bugs.python.org/issue3708[](#l10.22)
self.assertEqual(len(os.urandom(0.9)), 0)[](#l10.23)
self.assertEqual(len(os.urandom(1.1)), 1)[](#l10.24)
self.assertEqual(len(os.urandom(2.0)), 2)[](#l10.25)
except NotImplementedError:[](#l10.26)
pass[](#l10.27)
with test_support.check_warnings():[](#l10.28)
self.assertEqual(len(os.urandom(1)), 1)[](#l10.29)
self.assertEqual(len(os.urandom(10)), 10)[](#l10.30)
self.assertEqual(len(os.urandom(100)), 100)[](#l10.31)
self.assertEqual(len(os.urandom(1000)), 1000)[](#l10.32)
# see http://bugs.python.org/issue3708[](#l10.33)
self.assertEqual(len(os.urandom(0.9)), 0)[](#l10.34)
self.assertEqual(len(os.urandom(1.1)), 1)[](#l10.35)
self.assertEqual(len(os.urandom(2.0)), 2)[](#l10.36)
- def test_urandom_length(self):
self.assertEqual(len(os.urandom(0)), 0)[](#l10.39)
self.assertEqual(len(os.urandom(1)), 1)[](#l10.40)
self.assertEqual(len(os.urandom(10)), 10)[](#l10.41)
self.assertEqual(len(os.urandom(100)), 100)[](#l10.42)
self.assertEqual(len(os.urandom(1000)), 1000)[](#l10.43)
- def test_urandom_value(self):
data1 = os.urandom(16)[](#l10.46)
data2 = os.urandom(16)[](#l10.47)
self.assertNotEqual(data1, data2)[](#l10.48)
- def get_urandom_subprocess(self, count):
code = '\n'.join(([](#l10.51)
'import os, sys',[](#l10.52)
'data = os.urandom(%s)' % count,[](#l10.53)
'sys.stdout.write(data)',[](#l10.54)
'sys.stdout.flush()'))[](#l10.55)
cmd_line = [sys.executable, '-c', code][](#l10.56)
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,[](#l10.57)
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)[](#l10.58)
out, err = p.communicate()[](#l10.59)
out = test_support.strip_python_stderr(out)[](#l10.60)
self.assertEqual(len(out), count)[](#l10.61)
return out[](#l10.62)
- def test_urandom_subprocess(self):
data1 = self.get_urandom_subprocess(16)[](#l10.65)
data2 = self.get_urandom_subprocess(16)[](#l10.66)
self.assertNotEqual(data1, data2)[](#l10.67)
class Win32ErrorTests(unittest.TestCase): def test_rename(self):
--- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -6,7 +6,6 @@ import weakref import operator import copy import pickle -import os from random import randrange, shuffle import sys import collections @@ -688,6 +687,17 @@ class TestBasicOps(unittest.TestCase): if self.repr is not None: self.assertEqual(repr(self.set), self.repr)
- def check_repr_against_values(self):
text = repr(self.set)[](#l11.16)
self.assertTrue(text.startswith('{'))[](#l11.17)
self.assertTrue(text.endswith('}'))[](#l11.18)
result = text[1:-1].split(', ')[](#l11.20)
result.sort()[](#l11.21)
sorted_repr_values = [repr(value) for value in self.values][](#l11.22)
sorted_repr_values.sort()[](#l11.23)
self.assertEqual(result, sorted_repr_values)[](#l11.24)
+ def test_print(self): fo = open(test_support.TESTFN, "wb") try: @@ -837,6 +847,46 @@ class TestBasicOpsTriple(TestBasicOps): self.length = 3 self.repr = None +#------------------------------------------------------------------------------ + +class TestBasicOpsString(TestBasicOps):
- def setUp(self):
self.case = "string set"[](#l11.37)
self.values = ["a", "b", "c"][](#l11.38)
self.set = set(self.values)[](#l11.39)
self.dup = set(self.values)[](#l11.40)
self.length = 3[](#l11.41)
+ +#------------------------------------------------------------------------------ + +class TestBasicOpsUnicode(TestBasicOps):
- def setUp(self):
self.case = "unicode set"[](#l11.50)
self.values = [u"a", u"b", u"c"][](#l11.51)
self.set = set(self.values)[](#l11.52)
self.dup = set(self.values)[](#l11.53)
self.length = 3[](#l11.54)
+ +#------------------------------------------------------------------------------ + +class TestBasicOpsMixedStringUnicode(TestBasicOps):
- def setUp(self):
self.case = "string and bytes set"[](#l11.63)
self.values = ["a", "b", u"a", u"b"][](#l11.64)
self.set = set(self.values)[](#l11.65)
self.dup = set(self.values)[](#l11.66)
self.length = 4[](#l11.67)
- def test_repr(self):
with test_support.check_warnings():[](#l11.70)
self.check_repr_against_values()[](#l11.71)
+ #============================================================================== def baditer():
--- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -24,7 +24,7 @@ import re "captured_stdout", "TransientResource", "transient_internet", "run_with_locale", "set_memlimit", "bigmemtest", "bigaddrspacetest", "BasicTestRunner", "run_unittest", "run_doctest", "threading_setup",
"threading_cleanup", "reap_children"][](#l12.7)
"threading_cleanup", "reap_children", "strip_python_stderr"][](#l12.8)
class Error(Exception): """Base class for regression test exceptions.""" @@ -893,3 +893,13 @@ def reap_children(): break except: break + +def strip_python_stderr(stderr):
- This will typically be run on the result of the communicate() method
- of a subprocess.Popen object.
- """
- stderr = re.sub(br"[\d+ refs]\r?\n?$", b"", stderr).strip()
- return stderr
--- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -105,10 +105,11 @@ class SymtableTest(unittest.TestCase): def test_function_info(self): func = self.spam
self.assertEqual(func.get_parameters(), ("a", "b", "kw", "var"))[](#l13.7)
self.assertEqual(func.get_locals(),[](#l13.8)
self.assertEqual([](#l13.9)
tuple(sorted(func.get_parameters())), ("a", "b", "kw", "var"))[](#l13.10)
self.assertEqual(tuple(sorted(func.get_locals())),[](#l13.11) ("a", "b", "bar", "internal", "kw", "var", "x"))[](#l13.12)
self.assertEqual(func.get_globals(), ("bar", "glob"))[](#l13.13)
self.assertEqual(tuple(sorted(func.get_globals())), ("bar", "glob"))[](#l13.14) self.assertEqual(self.internal.get_frees(), ("x",))[](#l13.15)
--- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -384,7 +384,7 @@ class SysModuleTest(unittest.TestCase): attrs = ("debug", "py3k_warning", "division_warning", "division_new", "inspect", "interactive", "optimize", "dont_write_bytecode", "no_site", "ignore_environment", "tabcheck", "verbose",
"unicode", "bytes_warning")[](#l14.7)
"unicode", "bytes_warning", "hash_randomization")[](#l14.8) for attr in attrs:[](#l14.9) self.assert_(hasattr(sys.flags, attr), attr)[](#l14.10) self.assertEqual(type(getattr(sys.flags, attr)), int, attr)[](#l14.11)
--- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -280,6 +280,7 @@ PYTHON_OBJS= [](#l15.3) Python/pymath.o [](#l15.4) Python/pystate.o [](#l15.5) Python/pythonrun.o [](#l15.6)
Python/random.o \[](#l15.7) Python/structmember.o \[](#l15.8) Python/symtable.o \[](#l15.9) Python/sysmodule.o \[](#l15.10)
@@ -708,7 +709,7 @@ buildbottest: all platform -@if which pybuildbot.identify >/dev/null 2>&1; then [](#l15.12) pybuildbot.identify "CC='$(CC)'" "CXX='$(CXX)'"; [](#l15.13) fi - (TESTPYTHON)(TESTPYTHON) (TESTPYTHON)(TESTPROG) -uall -rw $(TESTOPTS) + (TESTPYTHON)−R(TESTPYTHON) -R (TESTPYTHON)−R(TESTPROG) -uall -rw $(TESTOPTS) QUICKTESTOPTS= $(TESTOPTS) -x test_thread test_signal test_strftime [](#l15.18) test_unicodedata test_re test_sre test_select test_poll [](#l15.19)
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,11 @@ What's New in Python 2.6.8 rc 1? Core and Builtins ----------------- +- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED
- environment variable, to provide an opt-in way to protect against denial of
- service attacks due to hash collisions within the dict and set types. Patch
- by David Malcolm, based on work by Victor Stinner. + Library -------
--- a/Misc/python.man +++ b/Misc/python.man @@ -34,6 +34,9 @@ python - an interpreted, interactive, o .B -O0 ] [ +.B -R +] +[ .B -Q .I argument ] @@ -151,6 +154,18 @@ to \fI.pyo\fP. Given twice, causes docs .B -O0 Discard docstrings in addition to the \fB-O\fP optimizations. .TP +.B -R +Turn on "hash randomization", so that the hash() values of str, bytes and +datetime objects are "salted" with an unpredictable pseudo-random value. +Although they remain constant within an individual Python process, they are +not predictable between repeated invocations of Python. +.IP +This is intended to provide protection against a denial of service +caused by carefully-chosen inputs that exploit the worst case performance +of a dict insertion, O(n^2) complexity. See +http://www.ocert.org/advisories/ocert-2011-003.html[](#l17.26) +for details. +.TP .BI "-Q " argument Division control; see PEP 238. The argument must be one of "old" (the default, int/int and long/long return an int or long), "new" (new @@ -411,6 +426,20 @@ the \fB-u\fP option. If this is set to a non-empty string it is equivalent to specifying the \fB-v\fP option. If set to an integer, it is equivalent to specifying \fB-v\fP multiple times. +.IP PYTHONHASHSEED +If this variable is set to "random", the effect is the same as specifying +the \fB-R\fP option: a random value is used to seed the hashes of str, +bytes and datetime objects. + +If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for +generating the hash() of the types covered by the hash randomization. Its +purpose is to allow repeatable hashing, such as for selftests for the +interpreter itself, or to allow a cluster of python processes to share hash +values. + +The integer must be a decimal number in the range [0,4294967295]. Specifying +the value 0 will lead to the same hash values as when hash randomization is +disabled. .SH AUTHOR The Python Software Foundation: http://www.python.org/psf[](#l17.51) .SH INTERNET RESOURCES
--- a/Modules/main.c +++ b/Modules/main.c @@ -40,7 +40,7 @@ static char *orig_argv; static int orig_argc; / command line options */ -#define BASE_OPTS "3bBc:dEhiJm:OQ:sStuUvVW:xX?" +#define BASE_OPTS "3bBc:dEhiJm:OQ:RsStuUvVW:xX?" #ifndef RISCOS #define PROGRAM_OPTS BASE_OPTS @@ -71,6 +71,9 @@ static char *usage_2 = "[](#l18.12) -m mod : run library module as a script (terminates option list)\n[](#l18.13) -O : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n[](#l18.14) -OO : remove doc-strings in addition to the -O optimizations\n[](#l18.15) +-R : use a pseudo-random salt to make hash() values of various types be\n[](#l18.16)
unpredictable between separate invocations of the interpreter, as\n\[](#l18.17)
a defense against denial-of-service attacks\n\[](#l18.18)
-Q arg : division options: -Qold (default), -Qwarn, -Qwarnall, -Qnew\n[](#l18.19)
-s : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n[](#l18.20)
-S : don't imply 'import site' on initialization\n[](#l18.21)
@@ -101,6 +104,12 @@ PYTHONHOME : alternate direct
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n[](#l18.23)
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n[](#l18.24)
";
+static char *usage_6 = "[](#l18.26)
+PYTHONHASHSEED: if this variable is set to random
, the effect is the same \n[](#l18.27)
- as specifying the :option:
-R
option: a random value is used to seed the\n[](#l18.28) - hashes of str, bytes and datetime objects. It can also be set to an integer\n[](#l18.29)
- in the range [0,4294967295] to get hash values with a predictable seed.\n[](#l18.30) +";
static int @@ -117,6 +126,7 @@ usage(int exitcode, char* program) fputs(usage_3, f); fprintf(f, usage_4, DELIM); fprintf(f, usage_5, DELIM, PYTHONHOMEHELP);
#if defined(__VMS) if (exitcode == 0) { @@ -388,6 +398,10 @@ Py_Main(int argc, char **argv) PySys_AddWarnOption(_PyOS_optarg); break;
case 'R':[](#l18.47)
Py_HashRandomizationFlag++;[](#l18.48)
break;[](#l18.49)
+ /* This space reserved for other options */ default:
--- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -8371,117 +8371,35 @@ posix_getloadavg(PyObject *self, PyObjec } #endif -#ifdef MS_WINDOWS - -PyDoc_STRVAR(win32_urandom__doc__, +PyDoc_STRVAR(posix_urandom__doc__, "urandom(n) -> str\n\n[](#l19.11) -Return a string of n random bytes suitable for cryptographic use."); - -typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,[](#l19.14)
LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\[](#l19.15)
DWORD dwFlags );[](#l19.16)
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,[](#l19.17)
BYTE *pbBuffer );[](#l19.18)
- -static CRYPTGENRANDOM pCryptGenRandom = NULL; -/* This handle is never explicitly released. Instead, the operating
-static PyObject* -win32_urandom(PyObject *self, PyObject *args) -{
- /* Read arguments */
- if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
return NULL;[](#l19.33)
- if (howMany < 0)
+Return n random bytes suitable for cryptographic use."); + +static PyObject * +posix_urandom(PyObject *self, PyObject *args) +{
/* Read arguments */[](#l19.44)
- if (!PyArg_ParseTuple(args, "n:urandom", &size))
return NULL;[](#l19.46)
- if (size < 0) return PyErr_Format(PyExc_ValueError, "negative argument not allowed");
- if (hCryptProv == 0) {
HINSTANCE hAdvAPI32 = NULL;[](#l19.52)
CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;[](#l19.53)
/* Obtain handle to the DLL containing CryptoAPI[](#l19.55)
This should not fail */[](#l19.56)
hAdvAPI32 = GetModuleHandle("advapi32.dll");[](#l19.57)
if(hAdvAPI32 == NULL)[](#l19.58)
return win32_error("GetModuleHandle", NULL);[](#l19.59)
/* Obtain pointers to the CryptoAPI functions[](#l19.61)
This will fail on some early versions of Win95 */[](#l19.62)
pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress([](#l19.63)
hAdvAPI32,[](#l19.64)
"CryptAcquireContextA");[](#l19.65)
if (pCryptAcquireContext == NULL)[](#l19.66)
return PyErr_Format(PyExc_NotImplementedError,[](#l19.67)
"CryptAcquireContextA not found");[](#l19.68)
pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress([](#l19.70)
hAdvAPI32, "CryptGenRandom");[](#l19.71)
if (pCryptGenRandom == NULL)[](#l19.72)
return PyErr_Format(PyExc_NotImplementedError,[](#l19.73)
"CryptGenRandom not found");[](#l19.74)
/* Acquire context */[](#l19.76)
if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,[](#l19.77)
PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))[](#l19.78)
return win32_error("CryptAcquireContext", NULL);[](#l19.79)
- }
- /* Allocate bytes */
- result = PyString_FromStringAndSize(NULL, howMany);
- if (result != NULL) {
/* Get random data */[](#l19.85)
memset(PyString_AS_STRING(result), 0, howMany); /* zero seed */[](#l19.86)
if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)[](#l19.87)
PyString_AS_STRING(result))) {[](#l19.88)
Py_DECREF(result);[](#l19.89)
return win32_error("CryptGenRandom", NULL);[](#l19.90)
}[](#l19.91)
- ret = _PyOS_URandom(PyBytes_AS_STRING(result),
PyBytes_GET_SIZE(result));[](#l19.97)
- if (ret == -1) {
Py_DECREF(result);[](#l19.99)
} return result; } -#endif - -#ifdef VMS -/* Use openssl random routine */ -#include <openssl/rand.h> -PyDoc_STRVAR(vms_urandom__doc, -"urandom(n) -> str\n\n[](#l19.110) -Return a string of n random bytes suitable for cryptographic use."); - -static PyObject* -vms_urandom(PyObject *self, PyObject *args) -{return NULL;[](#l19.100)
- /* Read arguments */
- if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
return NULL;[](#l19.121)
- if (howMany < 0)
return PyErr_Format(PyExc_ValueError,[](#l19.123)
"negative argument not allowed");[](#l19.124)
- /* Allocate bytes */
- result = PyString_FromStringAndSize(NULL, howMany);
- if (result != NULL) {
/* Get random data */[](#l19.129)
if (RAND_pseudo_bytes((unsigned char*)[](#l19.130)
PyString_AS_STRING(result),[](#l19.131)
howMany) < 0) {[](#l19.132)
Py_DECREF(result);[](#l19.133)
return PyErr_Format(PyExc_ValueError,[](#l19.134)
"RAND_pseudo_bytes");[](#l19.135)
}[](#l19.136)
- }
- return result;
-} -#endif static PyMethodDef posix_methods[] = { {"access", posix_access, METH_VARARGS, posix_access__doc__}, @@ -8787,12 +8705,7 @@ static PyMethodDef posix_methods[] = { #ifdef HAVE_GETLOADAVG {"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__}, #endif
- #ifdef MS_WINDOWS
- {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
- {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
--- a/Objects/bufferobject.c +++ b/Objects/bufferobject.c @@ -334,10 +334,20 @@ buffer_hash(PyBufferObject *self) return -1; p = (unsigned char *) ptr; len = size;
- /*
We make the hash of the empty buffer be 0, rather than using[](#l20.9)
(prefix ^ suffix), since this slightly obfuscates the hash secret[](#l20.10)
- */
- if (len == 0) {
self->b_hash = 0;[](#l20.13)
return 0;[](#l20.14)
- }
- x = _Py_HashSecret.prefix;
- x ^= p << 7; while (--len >= 0) x = (1000003x) ^ *p++; x ^= size;
- x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; self->b_hash = x;
--- a/Objects/object.c +++ b/Objects/object.c @@ -1101,6 +1101,8 @@ PyObject_HashNotImplemented(PyObject *se return -1; } +_Py_HashSecret_t _Py_HashSecret; + long PyObject_Hash(PyObject *v) {
--- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1212,11 +1212,21 @@ string_hash(PyStringObject *a) if (a->ob_shash != -1) return a->ob_shash; len = Py_SIZE(a);
- /*
We make the hash of the empty string be 0, rather than using[](#l22.8)
(prefix ^ suffix), since this slightly obfuscates the hash secret[](#l22.9)
- */
- if (len == 0) {
a->ob_shash = 0;[](#l22.12)
return 0;[](#l22.13)
- } p = (unsigned char *) a->ob_sval;
- x = _Py_HashSecret.prefix;
- x ^= p << 7; while (--len >= 0) x = (1000003x) ^ *p++; x ^= Py_SIZE(a);
- x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; a->ob_shash = x;
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6695,11 +6695,21 @@ unicode_hash(PyUnicodeObject *self) if (self->hash != -1) return self->hash; len = PyUnicode_GET_SIZE(self);
- /*
We make the hash of the empty string be 0, rather than using[](#l23.8)
(prefix ^ suffix), since this slightly obfuscates the hash secret[](#l23.9)
- */
- if (len == 0) {
self->hash = 0;[](#l23.12)
return 0;[](#l23.13)
- } p = PyUnicode_AS_UNICODE(self);
- x = _Py_HashSecret.prefix;
- x ^= p << 7; while (--len >= 0) x = (1000003x) ^ *p++; x ^= PyUnicode_GET_SIZE(self);
- x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; self->hash = x;
--- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -1779,6 +1779,10 @@ > <File + RelativePath="..\Python\random.c" + > + + <File RelativePath="..\Python\structmember.c" >
--- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -67,6 +67,7 @@ static void call_sys_exitfunc(void); static void call_ll_exitfuncs(void); extern void _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); +extern void _PyRandom_Init(void); #ifdef WITH_THREAD extern void _PyGILState_Init(PyInterpreterState *, PyThreadState ); @@ -89,6 +90,7 @@ int Py_IgnoreEnvironmentFlag; / e.g. PY true divisions (which they will be in 2.3). / int _Py_QnewFlag = 0; int Py_NoUserSiteDirectory = 0; / for -s and site.py / +int Py_HashRandomizationFlag = 0; / for -R and PYTHONHASHSEED / / PyModule_GetWarningsModule is no longer necessary as of 2.6 since _warnings is builtin. This API should not be used. */ @@ -166,6 +168,12 @@ Py_InitializeEx(int install_sigs) Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p); if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
- /* The variable is only tested for existence here; _PyRandom_Init will
check its value further. */[](#l25.24)
- if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p);[](#l25.26)
interp = PyInterpreterState_New(); if (interp == NULL)
new file mode 100644 --- /dev/null +++ b/Python/random.c @@ -0,0 +1,302 @@ +#include "Python.h" +#ifdef MS_WINDOWS +#include <windows.h> +#else +#include <fcntl.h> +#endif + +static int random_initialized = 0; + +#ifdef MS_WINDOWS +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,[](#l26.15)
LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\[](#l26.16)
DWORD dwFlags );[](#l26.17)
+typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,[](#l26.18)
BYTE *pbBuffer );[](#l26.19)
+ +static CRYPTGENRANDOM pCryptGenRandom = NULL; +/* This handle is never explicitly released. Instead, the operating
+static int +win32_urandom_init(int raise) +{
- /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
- hAdvAPI32 = GetModuleHandle("advapi32.dll");
- if(hAdvAPI32 == NULL)
goto error;[](#l26.35)
- /* Obtain pointers to the CryptoAPI functions. This will fail on some early
versions of Win95. */[](#l26.38)
- pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
hAdvAPI32, "CryptAcquireContextA");[](#l26.40)
- if (pCryptAcquireContext == NULL)
goto error;[](#l26.42)
- pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
"CryptGenRandom");[](#l26.45)
- if (pCryptGenRandom == NULL)
goto error;[](#l26.47)
- /* Acquire context */
- if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))[](#l26.51)
goto error;[](#l26.52)
- if (raise)
PyErr_SetFromWindowsErr(0);[](#l26.58)
- else
Py_FatalError("Failed to initialize Windows random API (CryptoGen)");[](#l26.60)
- return -1;
+} + +/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
- API. Return 0 on success, or -1 on error. */ +static int +win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{
- Py_ssize_t chunk;
- while (size > 0)
- {
chunk = size > INT_MAX ? INT_MAX : size;[](#l26.79)
if (!pCryptGenRandom(hCryptProv, chunk, buffer))[](#l26.80)
{[](#l26.81)
/* CryptGenRandom() failed */[](#l26.82)
if (raise)[](#l26.83)
PyErr_SetFromWindowsErr(0);[](#l26.84)
else[](#l26.85)
Py_FatalError("Failed to initialized the randomized hash "[](#l26.86)
"secret using CryptoGen)");[](#l26.87)
return -1;[](#l26.88)
}[](#l26.89)
buffer += chunk;[](#l26.90)
size -= chunk;[](#l26.91)
- }
- return 0;
+} +#endif /* MS_WINDOWS / + + +#ifdef __VMS +/ Use openssl random routine */ +#include <openssl/rand.h> +static int +vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{
- if (RAND_pseudo_bytes(buffer, size) < 0) {
if (raise) {[](#l26.105)
PyErr_Format(PyExc_ValueError,[](#l26.106)
"RAND_pseudo_bytes");[](#l26.107)
} else {[](#l26.108)
Py_FatalError("Failed to initialize the randomized hash "[](#l26.109)
"secret using RAND_pseudo_bytes");[](#l26.110)
}[](#l26.111)
return -1;[](#l26.112)
- }
- return 0;
+} +#endif /* __VMS / + + +#if !defined(MS_WINDOWS) && !defined(__VMS) + +/ Read size bytes from /dev/urandom into buffer.
- Call Py_FatalError() on error. */ +static void +dev_urandom_noraise(char *buffer, Py_ssize_t size) +{
- int fd;
- Py_ssize_t n;
- fd = open("/dev/urandom", O_RDONLY);
- if (fd < 0)
Py_FatalError("Failed to open /dev/urandom");[](#l26.133)
- while (0 < size)
- {
do {[](#l26.137)
n = read(fd, buffer, (size_t)size);[](#l26.138)
} while (n < 0 && errno == EINTR);[](#l26.139)
if (n <= 0)[](#l26.140)
{[](#l26.141)
/* stop on error or if read(size) returned 0 */[](#l26.142)
Py_FatalError("Failed to read bytes from /dev/urandom");[](#l26.143)
break;[](#l26.144)
}[](#l26.145)
buffer += n;[](#l26.146)
size -= (Py_ssize_t)n;[](#l26.147)
- }
- close(fd);
+} + +/* Read size bytes from /dev/urandom into buffer.
- Return 0 on success, raise an exception and return -1 on error. */ +static int +dev_urandom_python(char *buffer, Py_ssize_t size) +{
- int fd;
- Py_ssize_t n;
- Py_BEGIN_ALLOW_THREADS
- fd = open("/dev/urandom", O_RDONLY);
- Py_END_ALLOW_THREADS
- if (fd < 0)
- {
PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom");[](#l26.168)
return -1;[](#l26.169)
- }
- Py_BEGIN_ALLOW_THREADS
- do {
do {[](#l26.174)
n = read(fd, buffer, (size_t)size);[](#l26.175)
} while (n < 0 && errno == EINTR);[](#l26.176)
if (n <= 0)[](#l26.177)
break;[](#l26.178)
buffer += n;[](#l26.179)
size -= (Py_ssize_t)n;[](#l26.180)
- } while (0 < size);
- Py_END_ALLOW_THREADS
- if (n <= 0)
- {
/* stop on error or if read(size) returned 0 */[](#l26.186)
if (n < 0)[](#l26.187)
PyErr_SetFromErrno(PyExc_OSError);[](#l26.188)
else[](#l26.189)
PyErr_Format(PyExc_RuntimeError,[](#l26.190)
"Failed to read %zi bytes from /dev/urandom",[](#l26.191)
size);[](#l26.192)
close(fd);[](#l26.193)
return -1;[](#l26.194)
- }
- close(fd);
- return 0;
+} +#endif /* !defined(MS_WINDOWS) && !defined(__VMS) / + +/ Fill buffer with pseudo-random bytes generated by a linear congruent
- Use bits 23..16 of x(n) to generate a byte. */ +static void +lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size) +{
- size_t index;
- unsigned int x;
- x = x0;
- for (index=0; index < size; index++) {
x *= 214013;[](#l26.215)
x += 2531011;[](#l26.216)
/* modulo 2 ^ (8 * sizeof(int)) */[](#l26.217)
buffer[index] = (x >> 16) & 0xff;[](#l26.218)
- }
+} + +/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic
- use, from the operating random number generator (RNG). +
- Return 0 on success, raise an exception and return -1 on error. */ +int +_PyOS_URandom(void *buffer, Py_ssize_t size) +{
- if (size < 0) {
PyErr_Format(PyExc_ValueError,[](#l26.230)
"negative argument not allowed");[](#l26.231)
return -1;[](#l26.232)
- }
- if (size == 0)
return 0;[](#l26.235)
+# endif +#endif +} + +void +_PyRandom_Init(void) +{
- /*
By default, hash randomization is disabled, and only[](#l26.260)
enabled if PYTHONHASHSEED is set to non-empty or if[](#l26.261)
"-R" is provided at the command line:[](#l26.262)
- */
- if (!Py_HashRandomizationFlag) {
/* Disable the randomized hash: */[](#l26.265)
memset(secret, 0, secret_size);[](#l26.266)
return;[](#l26.267)
- }
- /*
Hash randomization is enabled. Generate a per-process secret,[](#l26.271)
using PYTHONHASHSEED if provided.[](#l26.272)
- */
- env = Py_GETENV("PYTHONHASHSEED");
- if (env && *env != '\0' && strcmp(env, "random") != 0) {
char *endptr = env;[](#l26.277)
unsigned long seed;[](#l26.278)
seed = strtoul(env, &endptr, 10);[](#l26.279)
if (*endptr != '\0'[](#l26.280)
|| seed > 4294967295UL[](#l26.281)
|| (errno == ERANGE && seed == ULONG_MAX))[](#l26.282)
{[](#l26.283)
Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer "[](#l26.284)
"in range [0; 4294967295]");[](#l26.285)
}[](#l26.286)
if (seed == 0) {[](#l26.287)
/* disable the randomized hash */[](#l26.288)
memset(secret, 0, secret_size);[](#l26.289)
}[](#l26.290)
else {[](#l26.291)
lcg_urandom(seed, (unsigned char*)secret, secret_size);[](#l26.292)
}[](#l26.293)
- }
- else {
(void)win32_urandom((unsigned char *)secret, secret_size, 0);[](#l26.297)
+#else /* #ifdef MS_WINDOWS */ +# ifdef __VMS
vms_urandom((unsigned char *)secret, secret_size, 0);[](#l26.300)
dev_urandom_noraise((char*)secret, secret_size);[](#l26.302)
--- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1224,6 +1224,7 @@ static PyStructSequence_Field flags_fiel {"unicode", "-U"}, /* {"skip_first", "-x"}, */ {"bytes_warning", "-b"},
- {"hash_randomization", "-R"}, {0} }; @@ -1232,9 +1233,9 @@ static PyStructSequence_Desc flags_desc flags__doc__, /* doc / flags_fields, / fields */
#endif }; @@ -1271,6 +1272,7 @@ make_flags(void) SetFlag(Py_UnicodeFlag); /* SetFlag(skipfirstline); */ SetFlag(Py_BytesWarningFlag);