CLN: Move test_parallel to gil.py (#47068) · pandas-dev/pandas@1be9d38 (original) (raw)
``
1
`+
from functools import wraps
`
``
2
`+
import threading
`
``
3
+
1
4
`import numpy as np
`
2
5
``
3
6
`from pandas import (
`
30
33
`from pandas._libs import algos
`
31
34
`except ImportError:
`
32
35
`from pandas import algos
`
33
``
`-
try:
`
34
``
`-
from pandas._testing import test_parallel # noqa: PDF014
`
35
36
``
36
``
`-
have_real_test_parallel = True
`
37
``
`-
except ImportError:
`
38
``
`-
have_real_test_parallel = False
`
39
37
``
40
``
`-
def test_parallel(num_threads=1):
`
41
``
`-
def wrapper(fname):
`
42
``
`-
return fname
`
``
38
`+
from .pandas_vb_common import BaseIO # isort:skip
`
43
39
``
44
``
`-
return wrapper
`
45
40
``
``
41
`+
def test_parallel(num_threads=2, kwargs_list=None):
`
``
42
`+
"""
`
``
43
`+
Decorator to run the same function multiple times in parallel.
`
46
44
``
47
``
`-
from .pandas_vb_common import BaseIO # isort:skip
`
``
45
`+
Parameters
`
``
46
`+
`
``
47
`+
num_threads : int, optional
`
``
48
`+
The number of times the function is run in parallel.
`
``
49
`+
kwargs_list : list of dicts, optional
`
``
50
`+
The list of kwargs to update original
`
``
51
`+
function kwargs on different threads.
`
``
52
+
``
53
`+
Notes
`
``
54
`+
`
``
55
`+
This decorator does not pass the return value of the decorated function.
`
``
56
+
``
57
`+
Original from scikit-image:
`
``
58
+
``
59
`+
https://github.com/scikit-image/scikit-image/pull/1519
`
``
60
+
``
61
`+
"""
`
``
62
`+
assert num_threads > 0
`
``
63
`+
has_kwargs_list = kwargs_list is not None
`
``
64
`+
if has_kwargs_list:
`
``
65
`+
assert len(kwargs_list) == num_threads
`
``
66
+
``
67
`+
def wrapper(func):
`
``
68
`+
@wraps(func)
`
``
69
`+
def inner(*args, **kwargs):
`
``
70
`+
if has_kwargs_list:
`
``
71
`+
update_kwargs = lambda i: dict(kwargs, **kwargs_list[i])
`
``
72
`+
else:
`
``
73
`+
update_kwargs = lambda i: kwargs
`
``
74
`+
threads = []
`
``
75
`+
for i in range(num_threads):
`
``
76
`+
updated_kwargs = update_kwargs(i)
`
``
77
`+
thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs)
`
``
78
`+
threads.append(thread)
`
``
79
`+
for thread in threads:
`
``
80
`+
thread.start()
`
``
81
`+
for thread in threads:
`
``
82
`+
thread.join()
`
``
83
+
``
84
`+
return inner
`
``
85
+
``
86
`+
return wrapper
`
48
87
``
49
88
``
50
89
`class ParallelGroupbyMethods:
`
`@@ -53,8 +92,7 @@ class ParallelGroupbyMethods:
`
53
92
`param_names = ["threads", "method"]
`
54
93
``
55
94
`def setup(self, threads, method):
`
56
``
`-
if not have_real_test_parallel:
`
57
``
`-
raise NotImplementedError
`
``
95
+
58
96
`N = 10**6
`
59
97
`ngroups = 10**3
`
60
98
`df = DataFrame(
`
`@@ -86,8 +124,7 @@ class ParallelGroups:
`
86
124
`param_names = ["threads"]
`
87
125
``
88
126
`def setup(self, threads):
`
89
``
`-
if not have_real_test_parallel:
`
90
``
`-
raise NotImplementedError
`
``
127
+
91
128
`size = 2**22
`
92
129
`ngroups = 10**3
`
93
130
`data = Series(np.random.randint(0, ngroups, size=size))
`
`@@ -108,8 +145,7 @@ class ParallelTake1D:
`
108
145
`param_names = ["dtype"]
`
109
146
``
110
147
`def setup(self, dtype):
`
111
``
`-
if not have_real_test_parallel:
`
112
``
`-
raise NotImplementedError
`
``
148
+
113
149
`N = 10**6
`
114
150
`df = DataFrame({"col": np.arange(N, dtype=dtype)})
`
115
151
`indexer = np.arange(100, len(df) - 100)
`
`@@ -131,8 +167,7 @@ class ParallelKth:
`
131
167
`repeat = 5
`
132
168
``
133
169
`def setup(self):
`
134
``
`-
if not have_real_test_parallel:
`
135
``
`-
raise NotImplementedError
`
``
170
+
136
171
`N = 10**7
`
137
172
`k = 5 * 10**5
`
138
173
`kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}]
`
`@@ -149,8 +184,7 @@ def time_kth_smallest(self):
`
149
184
``
150
185
`class ParallelDatetimeFields:
`
151
186
`def setup(self):
`
152
``
`-
if not have_real_test_parallel:
`
153
``
`-
raise NotImplementedError
`
``
187
+
154
188
`N = 10**6
`
155
189
`self.dti = date_range("1900-01-01", periods=N, freq="T")
`
156
190
`self.period = self.dti.to_period("D")
`
`@@ -204,8 +238,7 @@ class ParallelRolling:
`
204
238
`param_names = ["method"]
`
205
239
``
206
240
`def setup(self, method):
`
207
``
`-
if not have_real_test_parallel:
`
208
``
`-
raise NotImplementedError
`
``
241
+
209
242
`win = 100
`
210
243
`arr = np.random.rand(100000)
`
211
244
`if hasattr(DataFrame, "rolling"):
`
`@@ -248,8 +281,7 @@ class ParallelReadCSV(BaseIO):
`
248
281
`param_names = ["dtype"]
`
249
282
``
250
283
`def setup(self, dtype):
`
251
``
`-
if not have_real_test_parallel:
`
252
``
`-
raise NotImplementedError
`
``
284
+
253
285
`rows = 10000
`
254
286
`cols = 50
`
255
287
`data = {
`
`@@ -284,8 +316,6 @@ class ParallelFactorize:
`
284
316
`param_names = ["threads"]
`
285
317
``
286
318
`def setup(self, threads):
`
287
``
`-
if not have_real_test_parallel:
`
288
``
`-
raise NotImplementedError
`
289
319
``
290
320
`strings = tm.makeStringIndex(100000)
`
291
321
``