(original) (raw)

# cython: language_level=2 # Copyright (C) 2019 Nexedi SA and Contributors. # Kirill Smelkov kirr@nexedi.com # # Program pylock_bug demonstrates PyThread_release_lock bug on systems # where POSIX semaphores are not available and Python-level lock is implemented # with pthread mutex + condition variable. One such particular system is macOS. # # Usage: # # $ cythonize -i pylock_bug.pyx # $ python -c 'import pylock_bug as _; _.test_sema_wakeup()' # ... # STUCK on iteration ## # # # Bug description # # On Darwin, even though this is considered as POSIX, Python uses # mutex+condition variable to implement its lock, and, as of 20190828, Py2.7 # implementation, even though similar issue was fixed for Py3 in 2012, contains # synchronization bug: the condition is signalled after mutex unlock while the # correct protocol is to signal condition from under mutex: # # https://github.com/python/cpython/blob/v2.7.16-127-g0229b56d8c0/Python/thread\_pthread.h#L486-L506 # https://github.com/python/cpython/commit/187aa545165d (py3 fix) # # PyPy has the same bug for both pypy2 and pypy3: # # https://bitbucket.org/pypy/pypy/src/578667b3fef9/rpython/translator/c/src/thread\_pthread.c#lines-443:465 # https://bitbucket.org/pypy/pypy/src/5b42890d48c3/rpython/translator/c/src/thread\_pthread.c#lines-443:465 # # Signalling condition outside of corresponding mutex is considered OK by # POSIX, but in Python context it can lead to at least memory corruption if we # consider the whole lifetime of python level lock. For example the following # logical scenario: # # T1 T2 # # sema = Lock() # sema.acquire() # # sema.release() # # sema.acquire() # free(sema) # # ... # # # can translate to the next C-level calls: # # T1 T2 # # # sema = Lock() # sema = malloc(...) # sema.locked = 0 # pthread_mutex_init(&sema.mut) # pthread_cond_init (&sema.lock_released) # # # sema.acquire() # pthread_mutex_lock(&sema.mut) # # sees sema.locked == 0 # sema.locked = 1 # pthread_mutex_unlock(&sema.mut) # # # # sema.release() # pthread_mutex_lock(&sema.mut) # sema.locked = 0 # pthread_mutex_unlock(&sema.mut) # # # OS scheduler gets in and relinquishes control from T2 # # to another process # ... # # # second sema.acquire() # pthread_mutex_lock(&sema.mut) # # sees sema.locked == 0 # sema.locked = 1 # pthread_mutex_unlock(&sema.mut) # # # free(sema) # pthread_mutex_destroy(&sema.mut) # pthread_cond_destroy (&sema.lock_released) # free(sema) # # # # ... # e.g. malloc() which returns memory where sema was # # ... # # OS scheduler returns control to T2 # # sema.release() continues # # # # BUT sema was already freed and writing to anywhere # # inside sema block CORRUPTS MEMORY. In particular if # # _another_ python-level lock was allocated where sema # # block was, writing into the memory can have effect on # # further synchronization correctness and in particular # # lead to deadlock on lock that was next allocated. # pthread_cond_signal(&sema.lock_released) # # Note that T2.pthread_cond_signal(&sema.lock_released) CORRUPTS MEMORY as it # is called when sema memory was already freed and is potentially # reallocated for another object. # # The fix is to move pthread_cond_signal to be done under corresponding mutex: # # # sema.release() # pthread_mutex_lock(&sema.mut) # sema.locked = 0 # pthread_cond_signal(&sema.lock_released) # pthread_mutex_unlock(&sema.mut) # # by cherry-picking commit 187aa545165d ("Signal condition variables with the # mutex held. Destroy condition variables before their mutexes"). # # # Bug history # # The bug was there since 1994 - since at least [1]. It was discussed in 2001 # with original code author[2], but the code was still considered to be # race-free. In 2010 the place where pthread_cond_signal should be - before or # after pthread_mutex_unlock - was discussed with the rationale to avoid # threads bouncing[3,4,5], and in 2012 pthread_cond_signal was moved to be # called from under mutex, but only for CPython3[6,7]. # # In 2019 the bug was (re-)discovered while testing Pygolang[8] on macOS with # CPython2 and PyPy2 and PyPy3. # # [1] https://github.com/python/cpython/commit/2c8cb9f3d240 # [2] https://bugs.python.org/issue433625 # [3] https://bugs.python.org/issue8299#msg103224 # [4] https://bugs.python.org/issue8410#msg103313 # [5] https://bugs.python.org/issue8411#msg113301 # [6] https://bugs.python.org/issue15038#msg163187 # [7] https://github.com/python/cpython/commit/187aa545165d # [8] https://pypi.org/project/pygolang from cpython.pythread cimport PyThread_acquire_lock, PyThread_release_lock, \ PyThread_allocate_lock, PyThread_free_lock, PyThread_type_lock, WAIT_LOCK, \ PyThread_start_new_thread from libc.stdio cimport printf from libc.stdlib cimport malloc, abort from cpython.ceval cimport PyEval_InitThreads PyEval_InitThreads() # initializes threading and GIL import time cdef nogil: struct WorkState: PyThread_type_lock mu # use as mutex to protect vvv PyThread_type_lock sema # use as semaphore T1 <- T2 wakeup; reallocated on every iteration bint done void T2(void *_state): with gil: # create py thread state with nogil: _T2(_state) void _T2(void *_state): state = <workstate*>_state cdef int i = 0 cdef double Wstart, now while 1: i += 1 # wait till state.sema != NULL and pop it with gil: Wstart = time.time() while 1: XPyThread_acquire_lock(state.mu) sema = state.sema if sema != NULL: state.sema = NULL done = state.done PyThread_release_lock(state.mu) if done: return if sema != NULL: break with gil: now = time.time() if (now - Wstart) > 3: printf("\nSTUCK on iteration #%d\n", i) panic("STUCK") # we have popped .sema from state. # either release or release + wakeup in-progress acquire PyThread_release_lock(sema) # T1 void _test_sema_wakeup(): cdef WorkState *state = malloc(sizeof(WorkState)) if state == NULL: panic("malloc -> NULL") state.sema = NULL state.done = False with gil: state.mu = XPyThread_allocate_lock() XPyThread_start_new_thread(T2, state) N = 100000 cdef PyThread_type_lock sema_prev = NULL for i in range(N): with gil: sema = XPyThread_allocate_lock() XPyThread_acquire_lock(sema) # delta between where sema was (re)allocated compared to sema_prev. # 0 means that the memory of sema_prev was reused. printf("d(sema_prev, sema): %ld\n", sema - sema_prev) XPyThread_acquire_lock(state.mu) state.sema = sema PyThread_release_lock(state.mu) # acquire sema the second time. Either # - we do it without blocking which means that corresponding # release in T2 had already run, or # - we block waiting for corresponding release in T2 to run. XPyThread_acquire_lock(sema) # PyThread_release_lock(sema) # (to free sema in released state) # # (gets stuck both with and without it) # we had acquired sema twice, which means that release in T2 had # already run and sema is no longer used in T2. It thus should be # safe to free it. with gil: PyThread_free_lock(sema) sema_prev = sema XPyThread_acquire_lock(state.mu) state.done = True PyThread_release_lock(state.mu) # XXX free sema, mu, state def test_sema_wakeup(): with nogil: _test_sema_wakeup() # ---- misc ---- cdef void XPyThread_start_new_thread(void (*f)(void *), void *arg): tid = PyThread_start_new_thread(f, arg) if tid == -1: panic("start_thread -> failed") cdef PyThread_type_lock XPyThread_allocate_lock(): lock = PyThread_allocate_lock() if lock == NULL: panic("allocate_lock -> NULL") return lock cdef void XPyThread_acquire_lock(PyThread_type_lock lock) nogil: ok = PyThread_acquire_lock(lock, WAIT_LOCK) if not ok: panic("acquire_lock -> failed") cdef extern from "Python.h": void Py_FatalError(const char *msg) cdef void panic(const char *msg) nogil: with gil: Py_FatalError(msg)</workstate*>/kirr@nexedi.com