bpo-28685: Optimize sorted() list.sort() with type-specialized compar… · python/cpython@1e34da4 (original) (raw)

`@@ -260,6 +260,120 @@ def my_cmp_reversed(x, y):

`

260

260

`self.assertEqual(data, copy2)

`

261

261

``

262

262

`#==============================================================================

`

``

263

`+

def check_against_PyObject_RichCompareBool(self, L):

`

``

264

`+

The idea here is to exploit the fact that unsafe_tuple_compare uses

`

``

265

`+

PyObject_RichCompareBool for the second elements of tuples. So we have,

`

``

266

`+

for (most) L, sorted(L) == [y[1] for y in sorted([(0,x) for x in L])]

`

``

267

`+

This will work as long as eq => not lt for all the objects in L,

`

``

268

`+

which holds for all the types used below.

`

``

269

`+

`

``

270

`+

Testing this way ensures that the optimized implementation remains consistent

`

``

271

`+

with the naive implementation, even if changes are made to any of the

`

``

272

`+

richcompares.

`

``

273

`+

`

``

274

`+

This function tests sorting for three lists (it randomly shuffles each one):

`

``

275

`+

1. L

`

``

276

`+

2. [(x,) for x in L]

`

``

277

`+

3. [((x,),) for x in L]

`

``

278

+

``

279

`+

random.seed(0)

`

``

280

`+

random.shuffle(L)

`

``

281

`+

L_1 = L[:]

`

``

282

`+

L_2 = [(x,) for x in L]

`

``

283

`+

L_3 = [((x,),) for x in L]

`

``

284

`+

for L in [L_1, L_2, L_3]:

`

``

285

`+

optimized = sorted(L)

`

``

286

`+

reference = [y[1] for y in sorted([(0,x) for x in L])]

`

``

287

`+

for (opt, ref) in zip(optimized, reference):

`

``

288

`+

self.assertIs(opt, ref)

`

``

289

`+

#note: not assertEqual! We want to ensure identical behavior.

`

``

290

+

``

291

`+

class TestOptimizedCompares(unittest.TestCase):

`

``

292

`+

def test_safe_object_compare(self):

`

``

293

`+

heterogeneous_lists = [[0, 'foo'],

`

``

294

`+

[0.0, 'foo'],

`

``

295

`+

[('foo',), 'foo']]

`

``

296

`+

for L in heterogeneous_lists:

`

``

297

`+

self.assertRaises(TypeError, L.sort)

`

``

298

`+

self.assertRaises(TypeError, [(x,) for x in L].sort)

`

``

299

`+

self.assertRaises(TypeError, [((x,),) for x in L].sort)

`

``

300

+

``

301

`+

float_int_lists = [[1,1.1],

`

``

302

`+

[1<<70,1.1],

`

``

303

`+

[1.1,1],

`

``

304

`+

[1.1,1<<70]]

`

``

305

`+

for L in float_int_lists:

`

``

306

`+

check_against_PyObject_RichCompareBool(self, L)

`

``

307

+

``

308

`+

def test_unsafe_object_compare(self):

`

``

309

+

``

310

`+

This test is by ppperry. It ensures that unsafe_object_compare is

`

``

311

`+

verifying ms->key_richcompare == tp->richcompare before comparing.

`

``

312

+

``

313

`+

class WackyComparator(int):

`

``

314

`+

def lt(self, other):

`

``

315

`+

elem.class = WackyList2

`

``

316

`+

return int.lt(self, other)

`

``

317

+

``

318

`+

class WackyList1(list):

`

``

319

`+

pass

`

``

320

+

``

321

`+

class WackyList2(list):

`

``

322

`+

def lt(self, other):

`

``

323

`+

raise ValueError

`

``

324

+

``

325

`+

L = [WackyList1([WackyComparator(i), i]) for i in range(10)]

`

``

326

`+

elem = L[-1]

`

``

327

`+

with self.assertRaises(ValueError):

`

``

328

`+

L.sort()

`

``

329

+

``

330

`+

L = [WackyList1([WackyComparator(i), i]) for i in range(10)]

`

``

331

`+

elem = L[-1]

`

``

332

`+

with self.assertRaises(ValueError):

`

``

333

`+

[(x,) for x in L].sort()

`

``

334

+

``

335

`+

The following test is also by ppperry. It ensures that

`

``

336

`+

unsafe_object_compare handles Py_NotImplemented appropriately.

`

``

337

`+

class PointlessComparator:

`

``

338

`+

def lt(self, other):

`

``

339

`+

return NotImplemented

`

``

340

`+

L = [PointlessComparator(), PointlessComparator()]

`

``

341

`+

self.assertRaises(TypeError, L.sort)

`

``

342

`+

self.assertRaises(TypeError, [(x,) for x in L].sort)

`

``

343

+

``

344

`+

The following tests go through various types that would trigger

`

``

345

`+

ms->key_compare = unsafe_object_compare

`

``

346

`+

lists = [list(range(100)) + [(1<<70)],

`

``

347

`+

[str(x) for x in range(100)] + ['\uffff'],

`

``

348

`+

[bytes(x) for x in range(100)],

`

``

349

`+

[cmp_to_key(lambda x,y: x<y)(x) for x in range(100)]]

`

``

350

`+

for L in lists:

`

``

351

`+

check_against_PyObject_RichCompareBool(self, L)

`

``

352

+

``

353

`+

def test_unsafe_latin_compare(self):

`

``

354

`+

check_against_PyObject_RichCompareBool(self, [str(x) for

`

``

355

`+

x in range(100)])

`

``

356

+

``

357

`+

def test_unsafe_long_compare(self):

`

``

358

`+

check_against_PyObject_RichCompareBool(self, [x for

`

``

359

`+

x in range(100)])

`

``

360

+

``

361

`+

def test_unsafe_float_compare(self):

`

``

362

`+

check_against_PyObject_RichCompareBool(self, [float(x) for

`

``

363

`+

x in range(100)])

`

``

364

+

``

365

`+

def test_unsafe_tuple_compare(self):

`

``

366

`+

This test was suggested by Tim Peters. It verifies that the tuple

`

``

367

`+

comparison respects the current tuple compare semantics, which do not

`

``

368

`+

guarantee that x < x <=> (x,) < (x,)

`

``

369

`+

`

``

370

`+

Note that we don't have to put anything in tuples here, because

`

``

371

`+

the check function does a tuple test automatically.

`

``

372

+

``

373

`+

check_against_PyObject_RichCompareBool(self, [float('nan')]*100)

`

``

374

`+

check_against_PyObject_RichCompareBool(self, [float('nan') for

`

``

375

`+

_ in range(100)])

`

``

376

`+

#==============================================================================

`

263

377

``

264

378

`if name == "main":

`

265

379

`unittest.main()

`