(original) (raw)

changeset: 77013:edb9ce3a6c2e user: Antoine Pitrou solipsis@pitrou.net date: Thu May 17 18:55:59 2012 +0200 files: Doc/c-api/import.rst Doc/library/imp.rst Lib/importlib/_bootstrap.py Lib/importlib/test/test_locks.py Lib/pydoc.py Lib/test/lock_tests.py Lib/test/test_pkg.py Lib/test/test_threaded_import.py Lib/token.py Misc/NEWS Python/import.c Python/importlib.h description: Issue #9260: A finer-grained import lock. Most of the import sequence now uses per-module locks rather than the global import lock, eliminating well-known issues with threads and imports. diff -r 3430d7329a3b -r edb9ce3a6c2e Doc/c-api/import.rst --- a/Doc/c-api/import.rst Thu May 17 17:37:02 2012 +0200 +++ b/Doc/c-api/import.rst Thu May 17 18:55:59 2012 +0200 @@ -30,13 +30,13 @@ .. c:function:: PyObject* PyImport_ImportModuleNoBlock(const char *name) - This version of :c:func:`PyImport_ImportModule` does not block. It's intended - to be used in C functions that import other modules to execute a function. - The import may block if another thread holds the import lock. The function - :c:func:`PyImport_ImportModuleNoBlock` never blocks. It first tries to fetch - the module from sys.modules and falls back to :c:func:`PyImport_ImportModule` - unless the lock is held, in which case the function will raise an - :exc:`ImportError`. + This function is a deprecated alias of :c:func:`PyImport_ImportModule`. + + .. versionchanged:: 3.3 + This function used to fail immediately when the import lock was held + by another thread. In Python 3.3 though, the locking scheme switched + to per-module locks for most purposes, so this function's special + behaviour isn't needed anymore. .. c:function:: PyObject* PyImport_ImportModuleEx(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist) diff -r 3430d7329a3b -r edb9ce3a6c2e Doc/library/imp.rst --- a/Doc/library/imp.rst Thu May 17 17:37:02 2012 +0200 +++ b/Doc/library/imp.rst Thu May 17 18:55:59 2012 +0200 @@ -112,18 +112,29 @@ Return ``True`` if the import lock is currently held, else ``False``. On platforms without threads, always return ``False``. - On platforms with threads, a thread executing an import holds an internal lock - until the import is complete. This lock blocks other threads from doing an - import until the original import completes, which in turn prevents other threads - from seeing incomplete module objects constructed by the original thread while - in the process of completing its import (and the imports, if any, triggered by - that). + On platforms with threads, a thread executing an import first holds a + global import lock, then sets up a per-module lock for the rest of the + import. This blocks other threads from importing the same module until + the original import completes, preventing other threads from seeing + incomplete module objects constructed by the original thread. An + exception is made for circular imports, which by construction have to + expose an incomplete module object at some point. + + .. note:: + Locking semantics of imports are an implementation detail which may + vary from release to release. However, Python ensures that circular + imports work without any deadlocks. + + .. versionchanged:: 3.3 + In Python 3.3, the locking scheme has changed to per-module locks for + the most part. .. function:: acquire_lock() - Acquire the interpreter's import lock for the current thread. This lock should - be used by import hooks to ensure thread-safety when importing modules. + Acquire the interpreter's global import lock for the current thread. + This lock should be used by import hooks to ensure thread-safety when + importing modules. Once a thread has acquired the import lock, the same thread may acquire it again without blocking; the thread must release it once for each time it has @@ -134,8 +145,8 @@ .. function:: release_lock() - Release the interpreter's import lock. On platforms without threads, this - function does nothing. + Release the interpreter's global import lock. On platforms without + threads, this function does nothing. .. function:: reload(module) diff -r 3430d7329a3b -r edb9ce3a6c2e Lib/importlib/_bootstrap.py --- a/Lib/importlib/_bootstrap.py Thu May 17 17:37:02 2012 +0200 +++ b/Lib/importlib/_bootstrap.py Thu May 17 18:55:59 2012 +0200 @@ -159,6 +159,145 @@ return type(_io)(name) +# Module-level locking ######################################################## + +# A dict mapping module names to weakrefs of _ModuleLock instances +_module_locks = {} +# A dict mapping thread ids to _ModuleLock instances +_blocking_on = {} + + +class _DeadlockError(RuntimeError): + pass + + +class _ModuleLock: + """A recursive lock implementation which is able to detect deadlocks + (e.g. thread 1 trying to take locks A then B, and thread 2 trying to + take locks B then A). + """ + + def __init__(self, name): + self.lock = _thread.allocate_lock() + self.wakeup = _thread.allocate_lock() + self.name = name + self.owner = None + self.count = 0 + self.waiters = 0 + + def has_deadlock(self): + # Deadlock avoidance for concurrent circular imports. + me = _thread.get_ident() + tid = self.owner + while True: + lock = _blocking_on.get(tid) + if lock is None: + return False + tid = lock.owner + if tid == me: + return True + + def acquire(self): + """ + Acquire the module lock. If a potential deadlock is detected, + a _DeadlockError is raised. + Otherwise, the lock is always acquired and True is returned. + """ + tid = _thread.get_ident() + _blocking_on[tid] = self + try: + while True: + with self.lock: + if self.count == 0 or self.owner == tid: + self.owner = tid + self.count += 1 + return True + if self.has_deadlock(): + raise _DeadlockError("deadlock detected by %r" % self) + if self.wakeup.acquire(False): + self.waiters += 1 + # Wait for a release() call + self.wakeup.acquire() + self.wakeup.release() + finally: + del _blocking_on[tid] + + def release(self): + tid = _thread.get_ident() + with self.lock: + if self.owner != tid: + raise RuntimeError("cannot release un-acquired lock") + assert self.count > 0 + self.count -= 1 + if self.count == 0: + self.owner = None + if self.waiters: + self.waiters -= 1 + self.wakeup.release() + + def __repr__(self): + return "_ModuleLock(%r) at %d" % (self.name, id(self)) + + +class _DummyModuleLock: + """A simple _ModuleLock equivalent for Python builds without + multi-threading support.""" + + def __init__(self, name): + self.name = name + self.count = 0 + + def acquire(self): + self.count += 1 + return True + + def release(self): + if self.count == 0: + raise RuntimeError("cannot release un-acquired lock") + self.count -= 1 + + def __repr__(self): + return "_DummyModuleLock(%r) at %d" % (self.name, id(self)) + + +# The following two functions are for consumption by Python/import.c. + +def _get_module_lock(name): + """Get or create the module lock for a given module name. + + Should only be called with the import lock taken.""" + lock = None + if name in _module_locks: + lock = _module_locks[name]() + if lock is None: + if _thread is None: + lock = _DummyModuleLock(name) + else: + lock = _ModuleLock(name) + def cb(_): + del _module_locks[name] + _module_locks[name] = _weakref.ref(lock, cb) + return lock + +def _lock_unlock_module(name): + """Release the global import lock, and acquires then release the + module lock for a given module name. + This is used to ensure a module is completely initialized, in the + event it is being imported by another thread. + + Should only be called with the import lock taken.""" + lock = _get_module_lock(name) + _imp.release_lock() + try: + lock.acquire() + except _DeadlockError: + # Concurrent circular import, we'll accept a partially initialized + # module object. + pass + else: + lock.release() + + # Finder/loader utility code ################################################## _PYCACHE = '__pycache__' @@ -264,12 +403,15 @@ else: module.__package__ = fullname.rpartition('.')[0] try: + module.__initializing__ = True # If __package__ was not set above, __import__() will do it later. return fxn(self, module, *args, **kwargs) except: if not is_reload: del sys.modules[fullname] raise + finally: + module.__initializing__ = False _wrap(module_for_loader_wrapper, fxn) return module_for_loader_wrapper @@ -932,7 +1074,8 @@ if not sys.meta_path: _warnings.warn('sys.meta_path is empty', ImportWarning) for finder in sys.meta_path: - loader = finder.find_module(name, path) + with _ImportLockContext(): + loader = finder.find_module(name, path) if loader is not None: # The parent import may have already imported this module. if name not in sys.modules: @@ -962,8 +1105,7 @@ _ERR_MSG = 'No module named {!r}' -def _find_and_load(name, import_): - """Find and load the module.""" +def _find_and_load_unlocked(name, import_): path = None parent = name.rpartition('.')[0] if parent: @@ -1009,6 +1151,19 @@ return module +def _find_and_load(name, import_): + """Find and load the module, and release the import lock.""" + try: + lock = _get_module_lock(name) + finally: + _imp.release_lock() + lock.acquire() + try: + return _find_and_load_unlocked(name, import_) + finally: + lock.release() + + def _gcd_import(name, package=None, level=0): """Import and return the module based on its name, the package the call is being made from, and the level adjustment. @@ -1021,17 +1176,17 @@ _sanity_check(name, package, level) if level > 0: name = _resolve_name(name, package, level) - with _ImportLockContext(): - try: - module = sys.modules[name] - if module is None: - message = ("import of {} halted; " - "None in sys.modules".format(name)) - raise ImportError(message, name=name) - return module - except KeyError: - pass # Don't want to chain the exception + _imp.acquire_lock() + if name not in sys.modules: return _find_and_load(name, _gcd_import) + module = sys.modules[name] + if module is None: + _imp.release_lock() + message = ("import of {} halted; " + "None in sys.modules".format(name)) + raise ImportError(message, name=name) + _lock_unlock_module(name) + return module def _handle_fromlist(module, fromlist, import_): @@ -1149,7 +1304,17 @@ continue else: raise ImportError('importlib requires posix or nt') + + try: + thread_module = BuiltinImporter.load_module('_thread') + except ImportError: + # Python was built without threads + thread_module = None + weakref_module = BuiltinImporter.load_module('_weakref') + setattr(self_module, '_os', os_module) + setattr(self_module, '_thread', thread_module) + setattr(self_module, '_weakref', weakref_module) setattr(self_module, 'path_sep', path_sep) setattr(self_module, 'path_separators', set(path_separators)) # Constants diff -r 3430d7329a3b -r edb9ce3a6c2e Lib/importlib/test/test_locks.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/importlib/test/test_locks.py Thu May 17 18:55:59 2012 +0200 @@ -0,0 +1,115 @@ +from importlib import _bootstrap +import time +import unittest +import weakref + +from test import support + +try: + import threading +except ImportError: + threading = None +else: + from test import lock_tests + + +LockType = _bootstrap._ModuleLock +DeadlockError = _bootstrap._DeadlockError + + +if threading is not None: + class ModuleLockAsRLockTests(lock_tests.RLockTests): + locktype = staticmethod(lambda: LockType("some_lock")) + + # _is_owned() unsupported + test__is_owned = None + # acquire(blocking=False) unsupported + test_try_acquire = None + test_try_acquire_contended = None + # `with` unsupported + test_with = None + # acquire(timeout=...) unsupported + test_timeout = None + # _release_save() unsupported + test_release_save_unacquired = None + +else: + class ModuleLockAsRLockTests(unittest.TestCase): + pass + + +@unittest.skipUnless(threading, "threads needed for this test") +class DeadlockAvoidanceTests(unittest.TestCase): + + def run_deadlock_avoidance_test(self, create_deadlock): + NLOCKS = 10 + locks = [LockType(str(i)) for i in range(NLOCKS)] + pairs = [(locks[i], locks[(i+1)%NLOCKS]) for i in range(NLOCKS)] + if create_deadlock: + NTHREADS = NLOCKS + else: + NTHREADS = NLOCKS - 1 + barrier = threading.Barrier(NTHREADS) + results = [] + def _acquire(lock): + """Try to acquire the lock. Return True on success, False on deadlock.""" + try: + lock.acquire() + except DeadlockError: + return False + else: + return True + def f(): + a, b = pairs.pop() + ra = _acquire(a) + barrier.wait() + rb = _acquire(b) + results.append((ra, rb)) + if rb: + b.release() + if ra: + a.release() + lock_tests.Bunch(f, NTHREADS).wait_for_finished() + self.assertEqual(len(results), NTHREADS) + return results + + def test_deadlock(self): + results = self.run_deadlock_avoidance_test(True) + # One of the threads detected a potential deadlock on its second + # acquire() call. + self.assertEqual(results.count((True, False)), 1) + self.assertEqual(results.count((True, True)), len(results) - 1) + + def test_no_deadlock(self): + results = self.run_deadlock_avoidance_test(False) + self.assertEqual(results.count((True, False)), 0) + self.assertEqual(results.count((True, True)), len(results)) + + +class LifetimeTests(unittest.TestCase): + + def test_lock_lifetime(self): + name = "xyzzy" + self.assertNotIn(name, _bootstrap._module_locks) + lock = _bootstrap._get_module_lock(name) + self.assertIn(name, _bootstrap._module_locks) + wr = weakref.ref(lock) + del lock + support.gc_collect() + self.assertNotIn(name, _bootstrap._module_locks) + self.assertIs(wr(), None) + + def test_all_locks(self): + support.gc_collect() + self.assertEqual(0, len(_bootstrap._module_locks)) + + +@support.reap_threads +def test_main(): + support.run_unittest(ModuleLockAsRLockTests, + DeadlockAvoidanceTests, + LifetimeTests) + + +if __name__ == '__main__': + test_main() diff -r 3430d7329a3b -r edb9ce3a6c2e Lib/pydoc.py --- a/Lib/pydoc.py Thu May 17 17:37:02 2012 +0200 +++ b/Lib/pydoc.py Thu May 17 18:55:59 2012 +0200 @@ -167,7 +167,7 @@ if name in {'__builtins__', '__doc__', '__file__', '__path__', '__module__', '__name__', '__slots__', '__package__', '__cached__', '__author__', '__credits__', '__date__', - '__version__', '__qualname__'}: + '__version__', '__qualname__', '__initializing__'}: return 0 # Private names are hidden, but special names are displayed. if name.startswith('__') and name.endswith('__'): return 1 diff -r 3430d7329a3b -r edb9ce3a6c2e Lib/test/lock_tests.py --- a/Lib/test/lock_tests.py Thu May 17 17:37:02 2012 +0200 +++ b/Lib/test/lock_tests.py Thu May 17 18:55:59 2012 +0200 @@ -247,6 +247,17 @@ # Cannot release an unacquired lock lock = self.locktype() self.assertRaises(RuntimeError, lock.release) + lock.acquire() + lock.acquire() + lock.release() + lock.acquire() + lock.release() + lock.release() + self.assertRaises(RuntimeError, lock.release) + + def test_release_save_unacquired(self): + # Cannot _release_save an unacquired lock + lock = self.locktype() self.assertRaises(RuntimeError, lock._release_save) lock.acquire() lock.acquire() @@ -254,7 +265,6 @@ lock.acquire() lock.release() lock.release() - self.assertRaises(RuntimeError, lock.release) self.assertRaises(RuntimeError, lock._release_save) def test_different_thread(self): diff -r 3430d7329a3b -r edb9ce3a6c2e Lib/test/test_pkg.py --- a/Lib/test/test_pkg.py Thu May 17 17:37:02 2012 +0200 +++ b/Lib/test/test_pkg.py Thu May 17 18:55:59 2012 +0200 @@ -23,6 +23,8 @@ def fixdir(lst): if "__builtins__" in lst: lst.remove("__builtins__") + if "__initializing__" in lst: + lst.remove("__initializing__") return lst diff -r 3430d7329a3b -r edb9ce3a6c2e Lib/test/test_threaded_import.py --- a/Lib/test/test_threaded_import.py Thu May 17 17:37:02 2012 +0200 +++ b/Lib/test/test_threaded_import.py Thu May 17 18:55:59 2012 +0200 @@ -12,7 +12,7 @@ import shutil import unittest from test.support import ( - verbose, import_module, run_unittest, TESTFN, reap_threads) + verbose, import_module, run_unittest, TESTFN, reap_threads, forget) threading = import_module('threading') def task(N, done, done_tasks, errors): @@ -187,7 +187,7 @@ contents = contents % {'delay': delay} with open(os.path.join(TESTFN, name + ".py"), "wb") as f: f.write(contents.encode('utf-8')) - self.addCleanup(sys.modules.pop, name, None) + self.addCleanup(forget, name) results = [] def import_ab(): @@ -204,6 +204,21 @@ t2.join() self.assertEqual(set(results), {'a', 'b'}) + def test_side_effect_import(self): + code = """if 1: + import threading + def target(): + import random + t = threading.Thread(target=target) + t.start() + t.join()""" + sys.path.insert(0, os.curdir) + self.addCleanup(sys.path.remove, os.curdir) + with open(TESTFN + ".py", "wb") as f: + f.write(code.encode('utf-8')) + self.addCleanup(forget, TESTFN) + __import__(TESTFN) + @reap_threads def test_main(): diff -r 3430d7329a3b -r edb9ce3a6c2e Lib/token.py --- a/Lib/token.py Thu May 17 17:37:02 2012 +0200 +++ b/Lib/token.py Thu May 17 18:55:59 2012 +0200 @@ -70,7 +70,7 @@ tok_name = {value: name for name, value in globals().items() - if isinstance(value, int)} + if isinstance(value, int) and not name.startswith('_')} __all__.extend(tok_name.values()) def ISTERMINAL(x): diff -r 3430d7329a3b -r edb9ce3a6c2e Misc/NEWS --- a/Misc/NEWS Thu May 17 17:37:02 2012 +0200 +++ b/Misc/NEWS Thu May 17 18:55:59 2012 +0200 @@ -10,6 +10,10 @@ Core and Builtins ----------------- +- Issue #9260: A finer-grained import lock. Most of the import sequence + now uses per-module locks rather than the global import lock, eliminating + well-known issues with threads and imports. + - Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs. Patch by Serhiy Storchaka. diff -r 3430d7329a3b -r edb9ce3a6c2e Python/import.c --- a/Python/import.c Thu May 17 17:37:02 2012 +0200 +++ b/Python/import.c Thu May 17 18:55:59 2012 +0200 @@ -1370,47 +1370,7 @@ PyObject * PyImport_ImportModuleNoBlock(const char *name) { - PyObject *nameobj, *modules, *result; -#ifdef WITH_THREAD - long me; -#endif - - /* Try to get the module from sys.modules[name] */ - modules = PyImport_GetModuleDict(); - if (modules == NULL) - return NULL; - - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) - return NULL; - result = PyDict_GetItem(modules, nameobj); - if (result != NULL) { - Py_DECREF(nameobj); - Py_INCREF(result); - return result; - } - PyErr_Clear(); -#ifdef WITH_THREAD - /* check the import lock - * me might be -1 but I ignore the error here, the lock function - * takes care of the problem */ - me = PyThread_get_thread_ident(); - if (import_lock_thread == -1 || import_lock_thread == me) { - /* no thread or me is holding the lock */ - result = PyImport_Import(nameobj); - } - else { - PyErr_Format(PyExc_ImportError, - "Failed to import %R because the import lock" - "is held by another thread.", - nameobj); - result = NULL; - } -#else - result = PyImport_Import(nameobj); -#endif - Py_DECREF(nameobj); - return result; + return PyImport_ImportModule(name); } @@ -1420,11 +1380,13 @@ int level) { _Py_IDENTIFIER(__import__); + _Py_IDENTIFIER(__initializing__); _Py_IDENTIFIER(__package__); _Py_IDENTIFIER(__path__); _Py_IDENTIFIER(__name__); _Py_IDENTIFIER(_find_and_load); _Py_IDENTIFIER(_handle_fromlist); + _Py_IDENTIFIER(_lock_unlock_module); _Py_static_string(single_dot, "."); PyObject *abs_name = NULL; PyObject *builtins_import = NULL; @@ -1607,16 +1569,48 @@ goto error_with_unlock; } else if (mod != NULL) { + PyObject *value; + int initializing = 0; + Py_INCREF(mod); + /* Only call _bootstrap._lock_unlock_module() if __initializing__ is true. */ + value = _PyObject_GetAttrId(mod, &PyId___initializing__); + if (value == NULL) + PyErr_Clear(); + else { + initializing = PyObject_IsTrue(value); + Py_DECREF(value); + if (initializing == -1) + PyErr_Clear(); + } + if (initializing > 0) { + /* _bootstrap._lock_unlock_module() releases the import lock */ + value = _PyObject_CallMethodObjIdArgs(interp->importlib, + &PyId__lock_unlock_module, abs_name, + NULL); + if (value == NULL) + goto error; + Py_DECREF(value); + } + else { +#ifdef WITH_THREAD + if (_PyImport_ReleaseLock() < 0) { + PyErr_SetString(PyExc_RuntimeError, "not holding the import lock"); + goto error; + } +#endif + } } else { + /* _bootstrap._find_and_load() releases the import lock */ mod = _PyObject_CallMethodObjIdArgs(interp->importlib, &PyId__find_and_load, abs_name, builtins_import, NULL); if (mod == NULL) { - goto error_with_unlock; + goto error; } } + /* From now on we don't hold the import lock anymore. */ if (PyObject_Not(fromlist)) { if (level == 0 || PyUnicode_GET_LENGTH(name) > 0) { @@ -1625,12 +1619,12 @@ PyObject *borrowed_dot = _PyUnicode_FromId(&single_dot); if (borrowed_dot == NULL) { - goto error_with_unlock; + goto error; } partition = PyUnicode_Partition(name, borrowed_dot); if (partition == NULL) { - goto error_with_unlock; + goto error; } if (PyUnicode_GET_LENGTH(PyTuple_GET_ITEM(partition, 1)) == 0) { @@ -1638,7 +1632,7 @@ Py_DECREF(partition); final_mod = mod; Py_INCREF(mod); - goto exit_with_unlock; + goto error; } front = PyTuple_GET_ITEM(partition, 0); @@ -1657,7 +1651,7 @@ abs_name_len - cut_off); Py_DECREF(front); if (to_return == NULL) { - goto error_with_unlock; + goto error; } final_mod = PyDict_GetItem(interp->modules, to_return); @@ -1683,8 +1677,8 @@ fromlist, builtins_import, NULL); } + goto error; - exit_with_unlock: error_with_unlock: #ifdef WITH_THREAD if (_PyImport_ReleaseLock() < 0) { diff -r 3430d7329a3b -r edb9ce3a6c2e Python/importlib.h Binary file Python/importlib.h has changed /solipsis@pitrou.net