sort with a key speed patch - Code Review (original) (raw)
OLD
NEW
1 /* List object implementation */
1 /* List object implementation */
2
2
3 #include "Python.h"
3 #include "Python.h"
4
4
5 #ifdef STDC_HEADERS
5 #ifdef STDC_HEADERS
6 #include <stddef.h>
6 #include <stddef.h>
7 #else
7 #else
8 #include <sys/types.h> /* For size_t */
8 #include <sys/types.h> /* For size_t */
9 #endif
9 #endif
10
10
(...skipping 922 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...
933 *hi = t;
933 *hi = t;
934 ++lo;
934 ++lo;
935 --hi;
935 --hi;
936 }
936 }
937 }
937 }
938
938
939 /* Lots of code for an adaptive, stable, natural mergesort. There are many
939 /* Lots of code for an adaptive, stable, natural mergesort. There are many
940 * pieces to this algorithm; read listsort.txt for overviews and details.
940 * pieces to this algorithm; read listsort.txt for overviews and details.
941 */
941 */
942
942
943 /* A sortslice contains a pointer to an array of keys and a pointer to
944 * an array of corresponding values. In other words, keys[i]
945 * corresponds with values[i]. If values == NULL, then the keys are
946 * also the values.
947 *
948 * Several convenience routines are provided here, so that keys and
949 * values are always moved in sync.
950 */
951 typedef struct {
952 PyObject **keys;
953 PyObject **values;
954 } sortslice;
955
956 Py_LOCAL_INLINE(void)
957 sortslice_copy(sortslice *s1, Py_ssize_t i, sortslice *s2, Py_ssize_t j)
958 {
959 s1->keys[i] = s2->keys[j];
960 if (s1->values != NULL)
961 s1->values[i] = s2->values[j];
962 }
963
964 Py_LOCAL_INLINE(void)
965 sortslice_copy_incr(sortslice *dst, sortslice *src) {
966 *dst->keys++ = *src->keys++;
967 if (dst->values != NULL)
968 *dst->values++ = *src->values++;
969 }
970
971 Py_LOCAL_INLINE(void)
972 sortslice_copy_decr(sortslice *dst, sortslice *src) {
973 *dst->keys-- = *src->keys--;
974 if (dst->values != NULL)
975 *dst->values-- = *src->values--;
976 }
977
978
979 Py_LOCAL_INLINE(void)
980 sortslice_memcpy(sortslice *s1, Py_ssize_t i, sortslice *s2, Py_ssize_t j,
981 Py_ssize_t n) {
982 memcpy(&s1->keys[i], &s2->keys[j], sizeof(PyObject *) * n);
983 if (s1->values != NULL)
984 memcpy(&s1->values[i], &s2->values[j], sizeof(PyObject *) * n);
985 }
986
987 Py_LOCAL_INLINE(void)
988 sortslice_memmove(sortslice *s1, Py_ssize_t i, sortslice *s2, Py_ssize_t j,
989 Py_ssize_t n) {
990 memmove(&s1->keys[i], &s2->keys[j], sizeof(PyObject *) * n);
991 if (s1->values != NULL)
992 memmove(&s1->values[i], &s2->values[j], sizeof(PyObject *) * n);
993 }
994
995 Py_LOCAL_INLINE(void)
996 sortslice_advance(sortslice *slice, Py_ssize_t n) {
997 slice->keys += n;
998 if (slice->values != NULL)
999 slice->values += n;
1000 }
1001
943 /* Comparison function: PyObject_RichCompareBool with Py_LT.
1002 /* Comparison function: PyObject_RichCompareBool with Py_LT.
944 * Returns -1 on error, 1 if x < y, 0 if x >= y.
1003 * Returns -1 on error, 1 if x < y, 0 if x >= y.
945 */
1004 */
946
1005
947 #define ISLT(X, Y) (PyObject_RichCompareBool(X, Y, Py_LT))
1006 #define ISLT(X, Y) (PyObject_RichCompareBool((X), (Y), Py_LT))
948
1007
949 /* Compare X to Y via "<". Goto "fail" if the comparison raises an
1008 /* Compare X to Y via "<". Goto "fail" if the comparison raises an
950 error. Else "k" is set to true iff X<Y, and an "if (k)" block is
1009 error. Else "k" is set to true iff X<Y, and an "if (k)" block is
951 started. It makes more sense in context . X and Y are PyObject*s.
1010 started. It makes more sense in context . X and Y are PyObject*s.
952 */
1011 */
953 #define IFLT(X, Y) if ((k = ISLT(X, Y)) < 0) goto fail; \
1012 #define IFLT(X, Y) if ((k = ISLT(X, Y)) < 0) goto fail; \
954 if (k)
1013 if (k)
955
1014
956 /* binarysort is the best method for sorting small arrays: it does
1015 /* binarysort is the best method for sorting small arrays: it does
957 few compares, but can do data movement quadratic in the number of
1016 few compares, but can do data movement quadratic in the number of
958 elements.
1017 elements.
959 [lo, hi) is a contiguous slice of a list, and is sorted via
1018 [lo, hi) is a contiguous slice of a list, and is sorted via
960 binary insertion. This sort is stable.
1019 binary insertion. This sort is stable.
961 On entry, must have lo <= start <= hi, and that [lo, start) is already
1020 On entry, must have lo < start <= hi, and that [lo, start) is already
962 sorted (pass start == lo if you don't know!).
1021 sorted (pass start == lo+1 if you don't know!).
963 If islt() complains return -1, else 0.
1022 If islt() complains return -1, else 0.
964 Even in case of error, the output slice will be some permutation of
1023 Even in case of error, the output slice will be some permutation of
965 the input (nothing is lost or duplicated).
1024 the input (nothing is lost or duplicated).
966 */
1025 */
967 static int
1026 Py_LOCAL_INLINE(int)
968 binarysort(PyObject **lo, PyObject **hi, PyObject **start)
1027 binarysort(sortslice lo, int n, int start_offset)
969 {
1028 {
970 register Py_ssize_t k;
1029 PyObject **next = lo.keys + start_offset;
971 register PyObject **l, **p, **r;
1030 int k;
972 register PyObject *pivot;
1031 PyObject **l, **p, **r;
1032 PyObject *pivot;
973
1033
974 assert(lo <= start && start <= hi);
1034 assert(start_offset >= 0);
975 /* assert [lo, start) is sorted */
1035 /* assert [lo, next) is sorted */
976 if (lo == start)
1036 for (; next - lo.keys < n; ++next) {
977 ++start;
1037 /* set l to where *next belongs */
978 for (; start < hi; ++start) {
1038 l = lo.keys;
979 /* set l to where *start belongs */
1039 r = next;
980 l = lo;
981 r = start;
982 pivot = *r;
1040 pivot = *r;
983 /* Invariants:
1041 /* Invariants:
984 * pivot >= all in [lo, l).
1042 * pivot >= all in [lo, l).
985 * pivot < all in [r, start).
1043 * pivot < all in [r, next).
986 * The second is vacuously true at the start.
1044 * The second is vacuously true at the next.
987 */
1045 */
988 assert(l < r);
1046 assert(l < r);
989 do {
1047 do {
990 p = l + ((r - l) >> 1);
1048 p = l + ((r - l) >> 1);
991 IFLT(pivot, *p)
1049 if ((k = ISLT(pivot, *p))) {
1050 if (k < 0) return -1;
992 r = p;
1051 r = p;
993 else
1052 } else
994 l = p+1;
1053 l = p+1;
995 } while (l < r);
1054 } while (l < r);
996 assert(l == r);
1055 assert(l == r);
997 /* The invariants still hold, so pivot >= all in [lo, l) and
1056 /* The invariants still hold, so pivot >= all in [lo, l) and
998 pivot < all in [l, start), so pivot belongs at l. Note
1057 pivot < all in [l, next), so pivot belongs at l. Note
999 that if there are elements equal to pivot, l points to the
1058 that if there are elements equal to pivot, l points to the
1000 first slot after them -- that's why this sort is stable.
1059 first slot after them -- that's why this sort is stable.
1001 Slide over to make room.
1060 Slide over to make room.
1002 Caution: using memmove is much slower under MSVC 5;
1061 Caution: using memmove is much slower under MSVC 5;
1003 we're not usually moving many slots. */
1062 we're not usually moving many slots. */
1004 for (p = start; p > l; --p)
1063 for (p = next; p > l; --p)
1005 *p = *(p-1);
1064 *p = *(p-1);
1006 *l = pivot;
1065 *l = pivot;
1066 if (lo.values != NULL) {
1067 Py_ssize_t offset = lo.values - lo.keys;
1068 p = next + offset;
1069 pivot = *p;
1070 l += offset;
1071 for (p = next + offset; p > l; --p)
1072 *p = *(p-1);
1073 *l = pivot;
1074 }
1007 }
1075 }
1008 return 0;
1076 return 0;
1009
1010 fail:
1011 return -1;
1012 }
1077 }
1013
1078
1014 /*
1079 /*
1015 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
1080 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
1016 is required on entry. "A run" is the longest ascending sequence, with
1081 is required on entry. "A run" is the longest ascending sequence, with
1017
1082
1018 lo[0] <= lo[1] <= lo[2] <= ...
1083 lo[0] <= lo[1] <= lo[2] <= ...
1019
1084
1020 or the longest descending sequence, with
1085 or the longest descending sequence, with
1021
1086
1022 lo[0] > lo[1] > lo[2] > ...
1087 lo[0] > lo[1] > lo[2] > ...
1023
1088
1024 Boolean *descending is set to 0 in the former case, or to 1 in the latter.
1089 Boolean *descending is set to 0 in the former case, or to 1 in the latter.
1025 For its intended use in a stable mergesort, the strictness of the defn of
1090 For its intended use in a stable mergesort, the strictness of the defn of
1026 "descending" is needed so that the caller can safely reverse a descending
1091 "descending" is needed so that the caller can safely reverse a descending
1027 sequence without violating stability (strict > ensures there are no equal
1092 sequence without violating stability (strict > ensures there are no equal
1028 elements to get out of order).
1093 elements to get out of order).
1029
1094
1030 Returns -1 in case of error.
1095 Returns -1 in case of error.
1031 */
1096 */
1032 static Py_ssize_t
1097 Py_LOCAL_INLINE(Py_ssize_t)
1033 count_run(PyObject **lo, PyObject **hi, int *descending)
1098 count_run(PyObject **lo, Py_ssize_t nremaining, int *descending)
1034 {
1099 {
1035 Py_ssize_t k;
1100 PyObject **hi = lo + nremaining;
1101 int k;
1036 Py_ssize_t n;
1102 Py_ssize_t n;
1037
1103
1038 assert(lo < hi);
1104 assert(lo < hi);
1039 *descending = 0;
1105 *descending = 0;
1040 ++lo;
1106 ++lo;
1041 if (lo == hi)
1107 if (lo == hi)
1042 return 1;
1108 return 1;
1043
1109
1044 n = 2;
1110 n = 2;
1045 IFLT(*lo, *(lo-1)) {
1111 if ((k = ISLT(*lo, *(lo-1)))) {
1112 if (k < 0) goto fail;
1046 *descending = 1;
1113 *descending = 1;
1047 for (lo = lo+1; lo < hi; ++lo, ++n) {
1114 for (lo = lo+1; lo < hi; ++lo, ++n) {
1048 IFLT(*lo, *(lo-1))
1115 if ((k = ISLT(*lo, *(lo-1)))) {
1049 ;
1116 if (k < 0) goto fail;
1050 else
1117 } else
1051 break;
1118 break;
1052 }
1119 }
1053 }
1120 } else {
1054 else {
1055 for (lo = lo+1; lo < hi; ++lo, ++n) {
1121 for (lo = lo+1; lo < hi; ++lo, ++n) {
1056 IFLT(*lo, *(lo-1))
1122 if ((k = ISLT(*lo, *(lo-1)))) {
1123 if (k < 0) goto fail;
1057 break;
1124 break;
1125 }
1058 }
1126 }
1059 }
1127 }
1060
1128
1061 return n;
1129 return n;
1062 fail:
1130 fail:
1063 return -1;
1131 return -1;
1064 }
1132 }
1065
1133
1066 /*
1134 /*
1067 Locate the proper position of key in a sorted vector; if the vector contains
1135 Locate the proper position of key in a sorted vector; if the vector contains
(...skipping 14 matching lines...) Expand all Loading...
1082 key belongs at index k; or, IOW, the first k elements of a should precede
1150 key belongs at index k; or, IOW, the first k elements of a should precede
1083 key, and the last n-k should follow key.
1151 key, and the last n-k should follow key.
1084
1152
1085 Returns -1 on error. See listsort.txt for info on the method.
1153 Returns -1 on error. See listsort.txt for info on the method.
1086 */
1154 */
1087 static Py_ssize_t
1155 static Py_ssize_t
1088 gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
1156 gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
1089 {
1157 {
1090 Py_ssize_t ofs;
1158 Py_ssize_t ofs;
1091 Py_ssize_t lastofs;
1159 Py_ssize_t lastofs;
1092 Py_ssize_t k;
1160 int k;
1093
1161
1094 assert(key && a && n > 0 && hint >= 0 && hint < n);
1162 assert(key && a && n > 0 && hint >= 0 && hint < n);
1095
1163
1096 a += hint;
1164 a += hint;
1097 lastofs = 0;
1165 lastofs = 0;
1098 ofs = 1;
1166 ofs = 1;
1099 IFLT(*a, key) {
1167 IFLT(*a, key) {
1100 /* a[hint] < key -- gallop right, until
1168 /* a[hint] < key -- gallop right, until
1101 * a[hint + lastofs] < key <= a[hint + ofs]
1169 * a[hint + lastofs] < key <= a[hint + ofs]
1102 */
1170 */
(...skipping 12 matching lines...) Expand all Loading...
1115 ofs = maxofs;
1183 ofs = maxofs;
1116 /* Translate back to offsets relative to &a[0]. */
1184 /* Translate back to offsets relative to &a[0]. */
1117 lastofs += hint;
1185 lastofs += hint;
1118 ofs += hint;
1186 ofs += hint;
1119 }
1187 }
1120 else {
1188 else {
1121 /* key <= a[hint] -- gallop left, until
1189 /* key <= a[hint] -- gallop left, until
1122 * a[hint - ofs] < key <= a[hint - lastofs]
1190 * a[hint - ofs] < key <= a[hint - lastofs]
1123 */
1191 */
1124 const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */
1192 const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */
1193 Py_ssize_t tmp;
1125 while (ofs < maxofs) {
1194 while (ofs < maxofs) {
1126 IFLT(*(a-ofs), key)
1195 IFLT(*(a-ofs), key)
1127 break;
1196 break;
1128 /* key <= a[hint - ofs] */
1197 /* key <= a[hint - ofs] */
1129 lastofs = ofs;
1198 lastofs = ofs;
1130 ofs = (ofs << 1) + 1;
1199 ofs = (ofs << 1) + 1;
1131 if (ofs <= 0) /* int overflow */
1200 if (ofs <= 0) /* int overflow */
1132 ofs = maxofs;
1201 ofs = maxofs;
1133 }
1202 }
1134 if (ofs > maxofs)
1203 if (ofs > maxofs)
1135 ofs = maxofs;
1204 ofs = maxofs;
1136 /* Translate back to positive offsets relative to &a[0]. */
1205 /* Translate back to positive offsets relative to &a[0]. */
1137 k = lastofs;
1206 tmp = lastofs;
1138 lastofs = hint - ofs;
1207 lastofs = hint - ofs;
1139 ofs = hint - k;
1208 ofs = hint - tmp;
1140 }
1209 }
1141 a -= hint;
1210 a -= hint;
1142
1211
1143 assert(-1 <= lastofs && lastofs < ofs && ofs <= n);
1212 assert(-1 <= lastofs && lastofs < ofs && ofs <= n);
1144 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
1213 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
1145 * right of lastofs but no farther right than ofs. Do a binary
1214 * right of lastofs but no farther right than ofs. Do a binary
1146 * search, with invariant a[lastofs-1] < key <= a[ofs].
1215 * search, with invariant a[lastofs-1] < key <= a[ofs].
1147 */
1216 */
1148 ++lastofs;
1217 ++lastofs;
1149 while (lastofs < ofs) {
1218 while (lastofs < ofs) {
(...skipping 23 matching lines...) Expand all Loading...
1173
1242
1174 The code duplication is massive, but this is enough different given that
1243 The code duplication is massive, but this is enough different given that
1175 we're sticking to "<" comparisons that it's much harder to follow if
1244 we're sticking to "<" comparisons that it's much harder to follow if
1176 written as one routine with yet another "left or right?" flag.
1245 written as one routine with yet another "left or right?" flag.
1177 */
1246 */
1178 static Py_ssize_t
1247 static Py_ssize_t
1179 gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
1248 gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint)
1180 {
1249 {
1181 Py_ssize_t ofs;
1250 Py_ssize_t ofs;
1182 Py_ssize_t lastofs;
1251 Py_ssize_t lastofs;
1183 Py_ssize_t k;
1252 int k;
1184
1253
1185 assert(key && a && n > 0 && hint >= 0 && hint < n);
1254 assert(key && a && n > 0 && hint >= 0 && hint < n);
1186
1255
1187 a += hint;
1256 a += hint;
1188 lastofs = 0;
1257 lastofs = 0;
1189 ofs = 1;
1258 ofs = 1;
1190 IFLT(key, *a) {
1259 IFLT(key, *a) {
1191 /* key < a[hint] -- gallop left, until
1260 /* key < a[hint] -- gallop left, until
1192 * a[hint - ofs] <= key < a[hint - lastofs]
1261 * a[hint - ofs] <= key < a[hint - lastofs]
1193 */
1262 */
1194 const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */
1263 const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */
1264 Py_ssize_t tmp;
1195 while (ofs < maxofs) {
1265 while (ofs < maxofs) {
1196 IFLT(key, *(a-ofs)) {
1266 IFLT(key, *(a-ofs)) {
1197 lastofs = ofs;
1267 lastofs = ofs;
1198 ofs = (ofs << 1) + 1;
1268 ofs = (ofs << 1) + 1;
1199 if (ofs <= 0) /* int overflow */
1269 if (ofs <= 0) /* int overflow */
1200 ofs = maxofs;
1270 ofs = maxofs;
1201 }
1271 }
1202 else /* a[hint - ofs] <= key */
1272 else /* a[hint - ofs] <= key */
1203 break;
1273 break;
1204 }
1274 }
1205 if (ofs > maxofs)
1275 if (ofs > maxofs)
1206 ofs = maxofs;
1276 ofs = maxofs;
1207 /* Translate back to positive offsets relative to &a[0]. */
1277 /* Translate back to positive offsets relative to &a[0]. */
1208 k = lastofs;
1278 tmp = lastofs;
1209 lastofs = hint - ofs;
1279 lastofs = hint - ofs;
1210 ofs = hint - k;
1280 ofs = hint - tmp;
1211 }
1281 }
1212 else {
1282 else {
1213 /* a[hint] <= key -- gallop right, until
1283 /* a[hint] <= key -- gallop right, until
1214 * a[hint + lastofs] <= key < a[hint + ofs]
1284 * a[hint + lastofs] <= key < a[hint + ofs]
1215 */
1285 */
1216 const Py_ssize_t maxofs = n - hint; /* &a[n-1] is highest */
1286 const Py_ssize_t maxofs = n - hint; /* &a[n-1] is highest */
1217 while (ofs < maxofs) {
1287 while (ofs < maxofs) {
1218 IFLT(key, a[ofs])
1288 IFLT(key, a[ofs])
1219 break;
1289 break;
1220 /* a[hint + ofs] <= key */
1290 /* a[hint + ofs] <= key */
1221 lastofs = ofs;
1291 lastofs = ofs;
1222 ofs = (ofs << 1) + 1;
1292 ofs = (ofs << 1) + 1;
1223 if (ofs <= 0) /* int overflow */
1293 if (ofs <= 0) /* int overflow */
1224 ofs = maxofs;
1294 ofs = maxofs;
1225 }
1295 }
1226 if (ofs > maxofs)
1296 if (ofs > maxofs)
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...
1265 */
1335 */
1266 #define MIN_GALLOP 7
1336 #define MIN_GALLOP 7
1267
1337
1268 /* Avoid malloc for small temp arrays. */
1338 /* Avoid malloc for small temp arrays. */
1269 #define MERGESTATE_TEMP_SIZE 256
1339 #define MERGESTATE_TEMP_SIZE 256
1270
1340
1271 /* One MergeState exists on the stack per invocation of mergesort. It's just
1341 /* One MergeState exists on the stack per invocation of mergesort. It's just
1272 * a convenient way to pass state around among the helper functions.
1342 * a convenient way to pass state around among the helper functions.
1273 */
1343 */
1274 struct s_slice {
1344 struct s_slice {
1275 PyObject **base;
1345 sortslice base;
1276 Py_ssize_t len;
1346 Py_ssize_t len;
1277 };
1347 };
1278
1348
1279 typedef struct s_MergeState {
1349 typedef struct s_MergeState {
1280 /* This controls when we get *into* galloping mode. It's initialized
1350 /* This controls when we get *into* galloping mode. It's initialized
1281 * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for
1351 * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for
1282 * random data, and lower for highly structured data.
1352 * random data, and lower for highly structured data.
1283 */
1353 */
1284 Py_ssize_t min_gallop;
1354 Py_ssize_t min_gallop;
1285
1355
1286 /* 'a' is temp storage to help with merges. It contains room for
1356 /* 'a' is temp storage to help with merges. It contains room for
1287 * alloced entries.
1357 * alloced entries.
1288 */
1358 */
1289 PyObject **a; /* may point to temparray below */
1359 sortslice a; /* may point to temparray below */
1290 Py_ssize_t alloced;
1360 Py_ssize_t alloced;
1291
1361
1292 /* A stack of n pending runs yet to be merged. Run #i starts at
1362 /* A stack of n pending runs yet to be merged. Run #i starts at
1293 * address base[i] and extends for len[i] elements. It's always
1363 * address base[i] and extends for len[i] elements. It's always
1294 * true (so long as the indices are in bounds) that
1364 * true (so long as the indices are in bounds) that
1295 *
1365 *
1296 * pending[i].base + pending[i].len == pending[i+1].base
1366 * pending[i].base + pending[i].len == pending[i+1].base
1297 *
1367 *
1298 * so we could cut the storage for this, but it's a minor amount,
1368 * so we could cut the storage for this, but it's a minor amount,
1299 * and keeping all the info explicit simplifies the code.
1369 * and keeping all the info explicit simplifies the code.
1300 */
1370 */
1301 int n;
1371 int n;
1302 struct s_slice pending[MAX_MERGE_PENDING];
1372 struct s_slice pending[MAX_MERGE_PENDING];
1303
1373
1304 /* 'a' points to this when possible, rather than muck with malloc. */
1374 /* 'a' points to this when possible, rather than muck with malloc. */
1305 PyObject *temparray[MERGESTATE_TEMP_SIZE];
1375 PyObject *temparray[MERGESTATE_TEMP_SIZE];
1306 } MergeState;
1376 } MergeState;
1307
1377
1308 /* Conceptually a MergeState's constructor. */
1378 /* Conceptually a MergeState's constructor. */
1309 static void
1379 Py_LOCAL_INLINE(void)
1310 merge_init(MergeState *ms)
1380 merge_init(MergeState *ms, int list_size, int has_keyfunc)
1311 {
1381 {
1312 assert(ms != NULL);
1382 assert(ms != NULL);
1313 ms->a = ms->temparray;
1383 if (has_keyfunc) {
1314 ms->alloced = MERGESTATE_TEMP_SIZE;
1384 ms->alloced = (list_size+1)/2;
1385 if (MERGESTATE_TEMP_SIZE / 2 < ms->alloced)
1386 ms->alloced = MERGESTATE_TEMP_SIZE / 2;
1387 ms->a.values = &ms->temparray[ms->alloced];
1388 } else {
1389 ms->alloced = MERGESTATE_TEMP_SIZE;
1390 ms->a.values = NULL;
1391 }
1392 ms->a.keys = ms->temparray;
1315 ms->n = 0;
1393 ms->n = 0;
1316 ms->min_gallop = MIN_GALLOP;
1394 ms->min_gallop = MIN_GALLOP;
1317 }
1395 }
1318
1396
1319 /* Free all the temp memory owned by the MergeState. This must be called
1397 /* Free all the temp memory owned by the MergeState. This must be called
1320 * when you're done with a MergeState, and may be called before then if
1398 * when you're done with a MergeState, and may be called before then if
1321 * you want to free the temp memory early.
1399 * you want to free the temp memory early.
1322 */
1400 */
1323 static void
1401 Py_LOCAL_INLINE(void)
1324 merge_freemem(MergeState *ms)
1402 merge_freemem(MergeState *ms)
1325 {
1403 {
1326 assert(ms != NULL);
1404 assert(ms != NULL);
1327 if (ms->a != ms->temparray)
1405 if (ms->a.keys != ms->temparray)
1328 PyMem_Free(ms->a);
1406 PyMem_Free(ms->a.keys);
1329 ms->a = ms->temparray;
1330 ms->alloced = MERGESTATE_TEMP_SIZE;
1331 }
1407 }
1332
1408
1333 /* Ensure enough temp memory for 'need' array slots is available.
1409 /* Ensure enough temp memory for 'need' array slots is available.
1334 * Returns 0 on success and -1 if the memory can't be gotten.
1410 * Returns 0 on success and -1 if the memory can't be gotten.
1335 */
1411 */
1336 static int
1412 static int
1337 merge_getmem(MergeState *ms, Py_ssize_t need)
1413 merge_getmem(MergeState *ms, Py_ssize_t need)
1338 {
1414 {
1415 int multiplier;
1416
1339 assert(ms != NULL);
1417 assert(ms != NULL);
1340 if (need <= ms->alloced)
1418 if (need <= ms->alloced)
1341 return 0;
1419 return 0;
1420
1421 multiplier = ms->a.values != NULL ? 2 : 1;
1422
1342 /* Don't realloc! That can cost cycles to copy the old data, but
1423 /* Don't realloc! That can cost cycles to copy the old data, but
1343 * we don't care what's in the block.
1424 * we don't care what's in the block.
1344 */
1425 */
1345 merge_freemem(ms);
1426 merge_freemem(ms);
1346 if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject*)) {
1427 if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject*) / multiplier) {
1347 PyErr_NoMemory();
1428 PyErr_NoMemory();
1348 return -1;
1429 return -1;
1349 }
1430 }
1350 ms->a = (PyObject **)PyMem_Malloc(need * sizeof(PyObject*));
1431 ms->a.keys = (PyObject**)PyMem_Malloc(multiplier * need
1351 if (ms->a) {
1432 * sizeof(PyObject *));
1433 if (ms->a.keys != NULL) {
1352 ms->alloced = need;
1434 ms->alloced = need;
1435 if (ms->a.values != NULL)
1436 ms->a.values = &ms->a.keys[need];
1353 return 0;
1437 return 0;
1354 }
1438 }
1355 PyErr_NoMemory();
1439 PyErr_NoMemory();
1356 merge_freemem(ms); /* reset to sane state */
1357 return -1;
1440 return -1;
1358 }
1441 }
1359 #define MERGE_GETMEM(MS, NEED) ((NEED) <= (MS)->alloced ? 0 : \
1442 #define MERGE_GETMEM(MS, NEED) ((NEED) <= (MS)->alloced ? 0 : \
1360 merge_getmem(MS, NEED))
1443 merge_getmem(MS, NEED))
1361
1444
1362 /* Merge the na elements starting at pa with the nb elements starting at pb
1445 /* Merge the na elements starting at ssa with the nb elements starting at
1363 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
1446 * ssb = ssa + na in a stable way, in-place. na and nb must be > 0. Must
1364 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
1447 * also have that pa[na-1] belongs at the end of the merge, and should have
1365 * merge, and should have na <= nb. See listsort.txt for more info.
1448 * na <= nb. See listsort.txt for more info. Return 0 if successful,
1366 * Return 0 if successful, -1 if error.
1449 * -1 if error.
1367 */
1450 */
1368 static Py_ssize_t
1451 static Py_ssize_t
1369 merge_lo(MergeState *ms, PyObject **pa, Py_ssize_t na,
1452 merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, Py_ssize_t nb)
1370 PyObject **pb, Py_ssize_t nb)
1371 {
1453 {
1372 Py_ssize_t k;
1454 Py_ssize_t k;
1373 PyObject **dest;
1455 sortslice dest, ssb;
1374 int result = -1; /* guilty until proved innocent */
1456 int result = -1; /* guilty until proved innocent */
1375 Py_ssize_t min_gallop;
1457 Py_ssize_t min_gallop;
1376
1458
1377 assert(ms && pa && pb && na > 0 && nb > 0 && pa + na == pb);
1459 assert(ms && ssa.keys && na > 0 && nb > 0);
1460 ssb = ssa;
1461 sortslice_advance(&ssb, na);
1378 if (MERGE_GETMEM(ms, na) < 0)
1462 if (MERGE_GETMEM(ms, na) < 0)
1379 return -1;
1463 return -1;
1380 memcpy(ms->a, pa, na * sizeof(PyObject*));
1464 sortslice_memcpy(&ms->a, 0, &ssa, 0, na);
1381 dest = pa;
1465 dest = ssa;
1382 pa = ms->a;
1466 ssa = ms->a;
1383
1467
1384 *dest++ = *pb++;
1468 sortslice_copy_incr(&dest, &ssb);
1385 --nb;
1469 --nb;
1386 if (nb == 0)
1470 if (nb == 0)
1387 goto Succeed;
1471 goto Succeed;
1388 if (na == 1)
1472 if (na == 1)
1389 goto CopyB;
1473 goto CopyB;
1390
1474
1391 min_gallop = ms->min_gallop;
1475 min_gallop = ms->min_gallop;
1392 for (;;) {
1476 for (;;) {
1393 Py_ssize_t acount = 0; /* # of times A won in a row */
1477 Py_ssize_t acount = 0; /* # of times A won in a row */
1394 Py_ssize_t bcount = 0; /* # of times B won in a row */
1478 Py_ssize_t bcount = 0; /* # of times B won in a row */
1395
1479
1396 /* Do the straightforward thing until (if ever) one run
1480 /* Do the straightforward thing until (if ever) one run
1397 * appears to win consistently.
1481 * appears to win consistently.
1398 */
1482 */
1399 for (;;) {
1483 for (;;) {
1400 assert(na > 1 && nb > 0);
1484 assert(na > 1 && nb > 0);
1401 k = ISLT(*pb, *pa);
1485 k = ISLT(ssb.keys[0], ssa.keys[0]);
1402 if (k) {
1486 if (k) {
1403 if (k < 0)
1487 if (k < 0)
1404 goto Fail;
1488 goto Fail;
1405 *dest++ = *pb++;
1489 sortslice_copy_incr(&dest, &ssb);
1406 ++bcount;
1490 if (--nb == 0)
1491 goto Succeed;
1492 if (++bcount >= min_gallop)
1493 break;
1407 acount = 0;
1494 acount = 0;
1408 --nb;
1495 } else {
1409 if (nb == 0)
1496 sortslice_copy_incr(&dest, &ssa);
1410 goto Succeed;
1497 if (--na == 1)
1411 if (bcount >= min_gallop)
1498 goto CopyB;
1499 if (++acount >= min_gallop)
1412 break;
1500 break;
1413 }
1414 else {
1415 *dest++ = *pa++;
1416 ++acount;
1417 bcount = 0;
1501 bcount = 0;
1418 --na;
1419 if (na == 1)
1420 goto CopyB;
1421 if (acount >= min_gallop)
1422 break;
1423 }
1502 }
1424 }
1503 }
1425
1504
1426 /* One run is winning so consistently that galloping may
1505 /* One run is winning so consistently that galloping may be a huge
1427 * be a huge win. So try that, and continue galloping until
1506 * win. So try that, and continue galloping until (if ever) neither
1428 * (if ever) neither run appears to be winning consistently
1507 * run appears to be winning consistently anymore.
1429 * anymore.
1430 */
1508 */
1431 ++min_gallop;
1509 ++min_gallop;
1432 do {
1510 do {
1433 assert(na > 1 && nb > 0);
1511 assert(na > 1 && nb > 0);
1434 min_gallop -= min_gallop > 1;
1512 min_gallop -= min_gallop > 1;
1435 ms->min_gallop = min_gallop;
1513 ms->min_gallop = min_gallop;
1436 k = gallop_right(*pb, pa, na, 0);
1514 k = gallop_right(ssb.keys[0], ssa.keys, na, 0);
1437 acount = k;
1515 acount = k;
1438 if (k) {
1516 if (k) {
1439 if (k < 0)
1517 if (k < 0)
1440 goto Fail;
1518 goto Fail;
1441 memcpy(dest, pa, k * sizeof(PyObject *));
1519 sortslice_memcpy(&dest, 0, &ssa, 0, k);
1442 dest += k;
1520 sortslice_advance(&dest, k);
1443 pa += k;
1521 sortslice_advance(&ssa, k);
1444 na -= k;
1522 na -= k;
1445 if (na == 1)
1523 if (na == 1)
1446 goto CopyB;
1524 goto CopyB;
1447 /* na==0 is impossible now if the comparison
1525 /* na==0 is impossible now if the comparison function is
1448 * function is consistent, but we can't assume
1526 * consistent, but we can't assume that it is.
1449 * that it is.
1450 */
1527 */
1451 if (na == 0)
1528 if (na == 0)
1452 goto Succeed;
1529 goto Succeed;
1453 }
1530 }
1454 *dest++ = *pb++;
1531 sortslice_copy_incr(&dest, &ssb);
1455 --nb;
1532 --nb;
1456 if (nb == 0)
1533 if (nb == 0)
1457 goto Succeed;
1534 goto Succeed;
1458
1535
1459 k = gallop_left(*pa, pb, nb, 0);
1536 k = gallop_left(ssa.keys[0], ssb.keys, nb, 0);
1460 bcount = k;
1537 bcount = k;
1461 if (k) {
1538 if (k) {
1462 if (k < 0)
1539 if (k < 0)
1463 goto Fail;
1540 goto Fail;
1464 memmove(dest, pb, k * sizeof(PyObject *));
1541 sortslice_memmove(&dest, 0, &ssb, 0, k);
1465 dest += k;
1542 sortslice_advance(&dest, k);
1466 pb += k;
1543 sortslice_advance(&ssb, k);
1467 nb -= k;
1544 nb -= k;
1468 if (nb == 0)
1545 if (nb == 0)
1469 goto Succeed;
1546 goto Succeed;
1470 }
1547 }
1471 *dest++ = *pa++;
1548 sortslice_copy_incr(&dest, &ssa);
1472 --na;
1549 --na;
1473 if (na == 1)
1550 if (na == 1)
1474 goto CopyB;
1551 goto CopyB;
1475 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1552 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1476 ++min_gallop; /* penalize it for leaving galloping mode */
1553 ++min_gallop; /* penalize it for leaving galloping mode */
1477 ms->min_gallop = min_gallop;
1554 ms->min_gallop = min_gallop;
1478 }
1555 }
1479 Succeed:
1556 Succeed:
1480 result = 0;
1557 result = 0;
1481 Fail:
1558 Fail:
1482 if (na)
1559 if (na)
1483 memcpy(dest, pa, na * sizeof(PyObject*));
1560 sortslice_memcpy(&dest, 0, &ssa, 0, na);
1484 return result;
1561 return result;
1485 CopyB:
1562 CopyB:
1486 assert(na == 1 && nb > 0);
1563 assert(na == 1 && nb > 0);
1487 /* The last element of pa belongs at the end of the merge. */
1564 /* The last element of ssa belongs at the end of the merge. */
1488 memmove(dest, pb, nb * sizeof(PyObject *));
1565 sortslice_memmove(&dest, 0, &ssb, 0, nb);
1489 dest[nb] = *pa;
1566 sortslice_copy(&dest, nb, &ssa, 0);
1490 return 0;
1567 return 0;
1491 }
1568 }
1492
1569
1493 /* Merge the na elements starting at pa with the nb elements starting at pb
1570 /* Merge the na elements starting at pa with the nb elements starting at
1494 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
1571 * ssb = ssa + na in a stable way, in-place. na and nb must be > 0.
1495 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
1572 * Must also have that pa[na-1] belongs at the end of the merge, and
1496 * merge, and should have na >= nb. See listsort.txt for more info.
1573 * should have na >= nb. See listsort.txt for more info. Return 0 if
1497 * Return 0 if successful, -1 if error.
1574 * successful, -1 if error.
1498 */
1575 */
1499 static Py_ssize_t
1576 static Py_ssize_t
1500 merge_hi(MergeState *ms, PyObject **pa, Py_ssize_t na, PyObject **pb, Py_ssize_t nb)
1577 merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, Py_ssize_t nb)
1501 {
1578 {
1502 Py_ssize_t k;
1579 Py_ssize_t k;
1503 PyObject **dest;
1580 sortslice dest, ssb;
1504 int result = -1; /* guilty until proved innocent */
1581 int result = -1; /* guilty until proved innocent */
1505 PyObject **basea;
1506 PyObject **baseb;
1507 Py_ssize_t min_gallop;
1582 Py_ssize_t min_gallop;
1508
1583
1509 assert(ms && pa && pb && na > 0 && nb > 0 && pa + na == pb);
1584 assert(ms && ssa.keys && na > 0 && nb > 0);
1510 if (MERGE_GETMEM(ms, nb) < 0)
1585 if (MERGE_GETMEM(ms, nb) < 0)
1511 return -1;
1586 return -1;
1512 dest = pb + nb - 1;
1587 dest = ssa;
1513 memcpy(ms->a, pb, nb * sizeof(PyObject*));
1588 sortslice_advance(&dest, na + nb - 1);
1514 basea = pa;
1589 sortslice_memcpy(&ms->a, 0, &ssa, na, nb);
1515 baseb = ms->a;
1590 ssb = ms->a;
1516 pb = ms->a + nb - 1;
1591 sortslice_advance(&ssb, nb - 1);
1517 pa += na - 1;
1592 sortslice_advance(&ssa, na - 1);
1518
1593
1519 *dest-- = *pa--;
1594 sortslice_copy_decr(&dest, &ssa);
1520 --na;
1595 --na;
1521 if (na == 0)
1596 if (na == 0)
1522 goto Succeed;
1597 goto Succeed;
1523 if (nb == 1)
1598 if (nb == 1)
1524 goto CopyA;
1599 goto CopyA;
1525
1600
1526 min_gallop = ms->min_gallop;
1601 min_gallop = ms->min_gallop;
1527 for (;;) {
1602 for (;;) {
1528 Py_ssize_t acount = 0; /* # of times A won in a row */
1603 Py_ssize_t acount = 0; /* # of times A won in a row */
1529 Py_ssize_t bcount = 0; /* # of times B won in a row */
1604 Py_ssize_t bcount = 0; /* # of times B won in a row */
1530
1605
1531 /* Do the straightforward thing until (if ever) one run
1606 /* Do the straightforward thing until (if ever) one run
1532 * appears to win consistently.
1607 * appears to win consistently.
1533 */
1608 */
1534 for (;;) {
1609 for (;;) {
1535 assert(na > 0 && nb > 1);
1610 assert(na > 0 && nb > 1);
1536 k = ISLT(*pb, *pa);
1611 k = ISLT(ssb.keys[0], ssa.keys[0]);
1537 if (k) {
1612 if (k) {
1538 if (k < 0)
1613 if (k < 0)
1539 goto Fail;
1614 goto Fail;
1540 *dest-- = *pa--;
1615 sortslice_copy_decr(&dest, &ssa);
1541 ++acount;
1616 if (--na == 0)
1617 goto Succeed;
1618 if (++acount >= min_gallop)
1619 break;
1542 bcount = 0;
1620 bcount = 0;
1543 --na;
1621 } else {
1544 if (na == 0)
1622 sortslice_copy_decr(&dest, &ssb);
1545 goto Succeed;
1623 if (--nb == 1)
1546 if (acount >= min_gallop)
1624 goto CopyA;
1625 if (++bcount >= min_gallop)
1547 break;
1626 break;
1548 }
1549 else {
1550 *dest-- = *pb--;
1551 ++bcount;
1552 acount = 0;
1627 acount = 0;
1553 --nb;
1554 if (nb == 1)
1555 goto CopyA;
1556 if (bcount >= min_gallop)
1557 break;
1558 }
1628 }
1559 }
1629 }
1560
1630
1561 /* One run is winning so consistently that galloping may
1631 /* One run is winning so consistently that galloping may be a huge
1562 * be a huge win. So try that, and continue galloping until
1632 * win. So try that, and continue galloping until (if ever) neither
1563 * (if ever) neither run appears to be winning consistently
1633 * run appears to be winning consistently anymore.
1564 * anymore.
1565 */
1634 */
1566 ++min_gallop;
1635 ++min_gallop;
1567 do {
1636 do {
1568 assert(na > 0 && nb > 1);
1637 assert(na > 0 && nb > 1);
1569 min_gallop -= min_gallop > 1;
1638 min_gallop -= min_gallop > 1;
1570 ms->min_gallop = min_gallop;
1639 ms->min_gallop = min_gallop;
1571 k = gallop_right(*pb, basea, na, na-1);
1640 k = gallop_right(ssb.keys[0], ssa.keys - (na - 1), na, na - 1);
1572 if (k < 0)
1641 if (k < 0)
1573 goto Fail;
1642 goto Fail;
1574 k = na - k;
1643 k = na - k;
1575 acount = k;
1644 acount = k;
1576 if (k) {
1645 if (k) {
1577 dest -= k;
1646 sortslice_advance(&dest, -k);
1578 pa -= k;
1647 sortslice_advance(&ssa, -k);
1579 memmove(dest+1, pa+1, k * sizeof(PyObject *));
1648 sortslice_memmove(&dest, 1, &ssa, 1, k);
1580 na -= k;
1649 na -= k;
1581 if (na == 0)
1650 if (na == 0)
1582 goto Succeed;
1651 goto Succeed;
1583 }
1652 }
1584 *dest-- = *pb--;
1653 sortslice_copy_decr(&dest, &ssb);
1585 --nb;
1654 --nb;
1586 if (nb == 1)
1655 if (nb == 1)
1587 goto CopyA;
1656 goto CopyA;
1588
1657
1589 k = gallop_left(*pa, baseb, nb, nb-1);
1658 k = gallop_left(ssa.keys[0], ms->a.keys, nb, nb-1);
1590 if (k < 0)
1659 if (k < 0)
1591 goto Fail;
1660 goto Fail;
1592 k = nb - k;
1661 k = nb - k;
1593 bcount = k;
1662 bcount = k;
1594 if (k) {
1663 if (k) {
1595 dest -= k;
1664 sortslice_advance(&dest, -k);
1596 pb -= k;
1665 sortslice_advance(&ssb, -k);
1597 memcpy(dest+1, pb+1, k * sizeof(PyObject *));
1666 sortslice_memcpy(&dest, 1, &ssb, 1, k);
1598 nb -= k;
1667 nb -= k;
1599 if (nb == 1)
1668 if (nb == 1)
1600 goto CopyA;
1669 goto CopyA;
1601 /* nb==0 is impossible now if the comparison
1670 /* nb==0 is impossible now if the comparison function is
1602 * function is consistent, but we can't assume
1671 * consistent, but we can't assume that it is.
1603 * that it is.
1604 */
1672 */
1605 if (nb == 0)
1673 if (nb == 0)
1606 goto Succeed;
1674 goto Succeed;
1607 }
1675 }
1608 *dest-- = *pa--;
1676 sortslice_copy_decr(&dest, &ssa);
1609 --na;
1677 --na;
1610 if (na == 0)
1678 if (na == 0)
1611 goto Succeed;
1679 goto Succeed;
1612 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1680 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1613 ++min_gallop; /* penalize it for leaving galloping mode */
1681 ++min_gallop; /* penalize it for leaving galloping mode */
1614 ms->min_gallop = min_gallop;
1682 ms->min_gallop = min_gallop;
1615 }
1683 }
1616 Succeed:
1684 Succeed:
1617 result = 0;
1685 result = 0;
1618 Fail:
1686 Fail:
1619 if (nb)
1687 if (nb)
1620 memcpy(dest-(nb-1), baseb, nb * sizeof(PyObject*));
1688 sortslice_memcpy(&dest, -(nb-1), &ms->a, 0, nb);
1621 return result;
1689 return result;
1622 CopyA:
1690 CopyA:
1623 assert(nb == 1 && na > 0);
1691 assert(nb == 1 && na > 0);
1624 /* The first element of pb belongs at the front of the merge. */
1692 /* The first element of ssb belongs at the front of the merge. */
1625 dest -= na;
1693 sortslice_memmove(&dest, 1-na, &ssa, 1-na, na);
1626 pa -= na;
1694 sortslice_advance(&dest, -na);
1627 memmove(dest+1, pa+1, na * sizeof(PyObject *));
1695 sortslice_advance(&ssa, -na);
1628 *dest = *pb;
1696 sortslice_copy(&dest, 0, &ssb, 0);
1629 return 0;
1697 return 0;
1630 }
1698 }
1631
1699
1632 /* Merge the two runs at stack indices i and i+1.
1700 /* Merge the two runs at stack indices i and i+1.
1633 * Returns 0 on success, -1 on error.
1701 * Returns 0 on success, -1 on error.
1634 */
1702 */
1635 static Py_ssize_t
1703 static Py_ssize_t
1636 merge_at(MergeState *ms, Py_ssize_t i)
1704 merge_at(MergeState *ms, Py_ssize_t i)
1637 {
1705 {
1638 PyObject **pa, **pb;
1706 sortslice ssa, ssb;
1639 Py_ssize_t na, nb;
1707 Py_ssize_t na, nb;
1640 Py_ssize_t k;
1708 Py_ssize_t k;
1641
1709
1642 assert(ms != NULL);
1710 assert(ms != NULL);
1643 assert(ms->n >= 2);
1711 assert(ms->n >= 2);
1644 assert(i >= 0);
1712 assert(i >= 0);
1645 assert(i == ms->n - 2 || i == ms->n - 3);
1713 assert(i == ms->n - 2 || i == ms->n - 3);
1646
1714
1647 pa = ms->pending[i].base;
1715 ssa = ms->pending[i].base;
1648 na = ms->pending[i].len;
1716 na = ms->pending[i].len;
1649 pb = ms->pending[i+1].base;
1717 ssb = ms->pending[i+1].base;
1650 nb = ms->pending[i+1].len;
1718 nb = ms->pending[i+1].len;
1651 assert(na > 0 && nb > 0);
1719 assert(na > 0 && nb > 0);
1652 assert(pa + na == pb);
1720 assert(ssa.keys + na == ssb.keys);
1653
1721
1654 /* Record the length of the combined runs; if i is the 3rd-last
1722 /* Record the length of the combined runs; if i is the 3rd-last
1655 * run now, also slide over the last run (which isn't involved
1723 * run now, also slide over the last run (which isn't involved
1656 * in this merge). The current run i+1 goes away in any case.
1724 * in this merge). The current run i+1 goes away in any case.
1657 */
1725 */
1658 ms->pending[i].len = na + nb;
1726 ms->pending[i].len = na + nb;
1659 if (i == ms->n - 3)
1727 if (i == ms->n - 3)
1660 ms->pending[i+1] = ms->pending[i+2];
1728 ms->pending[i+1] = ms->pending[i+2];
1661 --ms->n;
1729 --ms->n;
1662
1730
1663 /* Where does b start in a? Elements in a before that can be
1731 /* Where does b start in a? Elements in a before that can be
1664 * ignored (already in place).
1732 * ignored (already in place).
1665 */
1733 */
1666 k = gallop_right(*pb, pa, na, 0);
1734 k = gallop_right(*ssb.keys, ssa.keys, na, 0);
1667 if (k < 0)
1735 if (k < 0)
1668 return -1;
1736 return -1;
1669 pa += k;
1737 sortslice_advance(&ssa, k);
1670 na -= k;
1738 na -= k;
1671 if (na == 0)
1739 if (na == 0)
1672 return 0;
1740 return 0;
1673
1741
1674 /* Where does a end in b? Elements in b after that can be
1742 /* Where does a end in b? Elements in b after that can be
1675 * ignored (already in place).
1743 * ignored (already in place).
1676 */
1744 */
1677 nb = gallop_left(pa[na-1], pb, nb, nb-1);
1745 nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1);
1678 if (nb <= 0)
1746 if (nb <= 0)
1679 return nb;
1747 return nb;
1680
1748
1681 /* Merge what remains of the runs, using a temp array with
1749 /* Merge what remains of the runs, using a temp array with
1682 * min(na, nb) elements.
1750 * min(na, nb) elements.
1683 */
1751 */
1684 if (na <= nb)
1752 if (na <= nb)
1685 return merge_lo(ms, pa, na, pb, nb);
1753 return merge_lo(ms, ssa, na, nb);
1686 else
1754 else
1687 return merge_hi(ms, pa, na, pb, nb);
1755 return merge_hi(ms, ssa, na, nb);
1688 }
1756 }
1689
1757
1690 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1758 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1691 * until the stack invariants are re-established:
1759 * until the stack invariants are re-established:
1692 *
1760 *
1693 * 1. len[-3] > len[-2] + len[-1]
1761 * 1. len[-3] > len[-2] + len[-1]
1694 * 2. len[-2] > len[-1]
1762 * 2. len[-2] > len[-1]
1695 *
1763 *
1696 * See listsort.txt for more info.
1764 * See listsort.txt for more info.
1697 *
1765 *
1698 * Returns 0 on success, -1 on error.
1766 * Returns 0 on success, -1 on error.
1699 */
1767 */
1700 static int
1768 Py_LOCAL_INLINE(int)
1701 merge_collapse(MergeState *ms)
1769 merge_collapse(MergeState *ms)
1702 {
1770 {
1703 struct s_slice *p = ms->pending;
1771 struct s_slice *p = ms->pending;
1704
1772
1705 assert(ms);
1773 assert(ms);
1706 while (ms->n > 1) {
1774 while (ms->n > 1) {
1707 Py_ssize_t n = ms->n - 2;
1775 Py_ssize_t n = ms->n - 2;
1708 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) {
1776 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) {
1709 if (p[n-1].len < p[n+1].len)
1777 if (p[n-1].len < p[n+1].len)
1710 --n;
1778 --n;
1711 if (merge_at(ms, n) < 0)
1779 if (merge_at(ms, n) < 0)
1712 return -1;
1780 return -1;
1713 }
1781 }
1714 else if (p[n].len <= p[n+1].len) {
1782 else if (p[n].len <= p[n+1].len) {
1715 if (merge_at(ms, n) < 0)
1783 if (merge_at(ms, n) < 0)
1716 return -1;
1784 return -1;
1717 }
1785 }
1718 else
1786 else
1719 break;
1787 break;
1720 }
1788 }
1721 return 0;
1789 return 0;
1722 }
1790 }
1723
1791
1724 /* Regardless of invariants, merge all runs on the stack until only one
1792 /* Regardless of invariants, merge all runs on the stack until only one
1725 * remains. This is used at the end of the mergesort.
1793 * remains. This is used at the end of the mergesort.
1726 *
1794 *
1727 * Returns 0 on success, -1 on error.
1795 * Returns 0 on success, -1 on error.
1728 */
1796 */
1729 static int
1797 Py_LOCAL_INLINE(int)
1730 merge_force_collapse(MergeState *ms)
1798 merge_force_collapse(MergeState *ms)
1731 {
1799 {
1732 struct s_slice *p = ms->pending;
1800 struct s_slice *p = ms->pending;
1733
1801
1734 assert(ms);
1802 assert(ms);
1735 while (ms->n > 1) {
1803 while (ms->n > 1) {
1736 Py_ssize_t n = ms->n - 2;
1804 Py_ssize_t n = ms->n - 2;
1737 if (n > 0 && p[n-1].len < p[n+1].len)
1805 if (n > 0 && p[n-1].len < p[n+1].len)
1738 --n;
1806 --n;
1739 if (merge_at(ms, n) < 0)
1807 if (merge_at(ms, n) < 0)
1740 return -1;
1808 return -1;
1741 }
1809 }
1742 return 0;
1810 return 0;
1743 }
1811 }
1744
1812
1745 /* Compute a good value for the minimum run length; natural runs shorter
1813 /* Compute a good value for the minimum run length; natural runs shorter
1746 * than this are boosted artificially via binary insertion.
1814 * than this are boosted artificially via binary insertion.
1747 *
1815 *
1748 * If n < 64, return n (it's too small to bother with fancy stuff).
1816 * If n < 64, return n (it's too small to bother with fancy stuff).
1749 * Else if n is an exact power of 2, return 32.
1817 * Else if n is an exact power of 2, return 32.
1750 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1818 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1751 * strictly less than, an exact power of 2.
1819 * strictly less than, an exact power of 2.
1752 *
1820 *
1753 * See listsort.txt for more info.
1821 * See listsort.txt for more info.
1754 */
1822 */
1755 static Py_ssize_t
1823 Py_LOCAL_INLINE(Py_ssize_t)
1756 merge_compute_minrun(Py_ssize_t n)
1824 merge_compute_minrun(Py_ssize_t n)
1757 {
1825 {
1758 Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */
1826 Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */
1759
1827
1760 assert(n >= 0);
1828 assert(n >= 0);
1761 while (n >= 64) {
1829 while (n >= 64) {
1762 r |= n & 1;
1830 r |= n & 1;
1763 n >>= 1;
1831 n >>= 1;
1764 }
1832 }
1765 return n + r;
1833 return n + r;
1766 }
1834 }
1767
1835
1768 /* Special wrapper to support stable sorting using the decorate-sort-undecorate
1836 Py_LOCAL(void)
1769 pattern. Holds a key which is used for comparisons and the original record
1837 reverse_sortslice(sortslice s, Py_ssize_t n)
1770 which is returned during the undecorate phase. By exposing only the key
1771 during comparisons, the underlying sort stability characteristics are left
1772 unchanged. Also, the comparison function will only see the key instead of
1773 a full record. */
1774
1775 typedef struct {
1776 PyObject_HEAD
1777 PyObject *key;
1778 PyObject *value;
1779 } sortwrapperobject;
1780
1781 PyDoc_STRVAR(sortwrapper_doc, "Object wrapper with a custom sort key.");
1782 static PyObject *
1783 sortwrapper_richcompare(sortwrapperobject *, sortwrapperobject *, int);
1784 static void
1785 sortwrapper_dealloc(sortwrapperobject *);
1786
1787 PyTypeObject PySortWrapper_Type = {
1788 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1789 "sortwrapper", /* tp_name */
1790 sizeof(sortwrapperobject), /* tp_basicsize */
1791 0, /* tp_itemsize */
1792 /* methods */
1793 (destructor)sortwrapper_dealloc, /* tp_dealloc */
1794 0, /* tp_print */
1795 0, /* tp_getattr */
1796 0, /* tp_setattr */
1797 0, /* tp_reserved */
1798 0, /* tp_repr */
1799 0, /* tp_as_number */
1800 0, /* tp_as_sequence */
1801 0, /* tp_as_mapping */
1802 0, /* tp_hash */
1803 0, /* tp_call */
1804 0, /* tp_str */
1805 PyObject_GenericGetAttr, /* tp_getattro */
1806 0, /* tp_setattro */
1807 0, /* tp_as_buffer */
1808 Py_TPFLAGS_DEFAULT, /* tp_flags */
1809 sortwrapper_doc, /* tp_doc */
1810 0, /* tp_traverse */
1811 0, /* tp_clear */
1812 (richcmpfunc)sortwrapper_richcompare, /* tp_richcompare */
1813 };
1814
1815
1816 static PyObject *
1817 sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, int op)
1818 {
1838 {
1819 if (!PyObject_TypeCheck(b, &PySortWrapper_Type)) {
1839 reverse_slice(s.keys, &s.keys[n]);
1820 PyErr_SetString(PyExc_TypeError,
1840 if (s.values != NULL)
1821 "expected a sortwrapperobject");
1841 reverse_slice(s.values, &s.values[n]);
1822 return NULL;
1823 }
1824 return PyObject_RichCompare(a->key, b->key, op);
1825 }
1826
1827 static void
1828 sortwrapper_dealloc(sortwrapperobject *so)
1829 {
1830 Py_XDECREF(so->key);
1831 Py_XDECREF(so->value);
1832 PyObject_Del(so);
1833 }
1834
1835 /* Returns a new reference to a sortwrapper.
1836 Consumes the references to the two underlying objects. */
1837
1838 static PyObject *
1839 build_sortwrapper(PyObject *key, PyObject *value)
1840 {
1841 sortwrapperobject *so;
1842
1843 so = PyObject_New(sortwrapperobject, &PySortWrapper_Type);
1844 if (so == NULL)
1845 return NULL;
1846 so->key = key;
1847 so->value = value;
1848 return (PyObject *)so;
1849 }
1850
1851 /* Returns a new reference to the value underlying the wrapper. */
1852 static PyObject *
1853 sortwrapper_getvalue(PyObject *so)
1854 {
1855 PyObject *value;
1856
1857 if (!PyObject_TypeCheck(so, &PySortWrapper_Type)) {
1858 PyErr_SetString(PyExc_TypeError,
1859 "expected a sortwrapperobject");
1860 return NULL;
1861 }
1862 value = ((sortwrapperobject *)so)->value;
1863 Py_INCREF(value);
1864 return value;
1865 }
1842 }
1866
1843
1867 /* An adaptive, stable, natural mergesort. See listsort.txt.
1844 /* An adaptive, stable, natural mergesort. See listsort.txt.
1868 * Returns Py_None on success, NULL on error. Even in case of error, the
1845 * Returns Py_None on success, NULL on error. Even in case of error, the
1869 * list will be some permutation of its input state (nothing is lost or
1846 * list will be some permutation of its input state (nothing is lost or
1870 * duplicated).
1847 * duplicated).
1871 */
1848 */
1872 static PyObject *
1849 static PyObject *
1873 listsort(PyListObject *self, PyObject *args, PyObject *kwds)
1850 listsort(PyListObject *self, PyObject *args, PyObject *kwds)
1874 {
1851 {
1875 MergeState ms;
1852 MergeState ms;
1876 PyObject **lo, **hi;
1877 Py_ssize_t nremaining;
1853 Py_ssize_t nremaining;
1878 Py_ssize_t minrun;
1854 Py_ssize_t minrun;
1855 sortslice lo;
1879 Py_ssize_t saved_ob_size, saved_allocated;
1856 Py_ssize_t saved_ob_size, saved_allocated;
1880 PyObject **saved_ob_item;
1857 PyObject **saved_ob_item;
1881 PyObject **final_ob_item;
1858 PyObject **final_ob_item;
1882 PyObject *result = NULL; /* guilty until proved innocent */
1859 PyObject *result = NULL; /* guilty until proved innocent */
1883 int reverse = 0;
1860 int reverse = 0;
1884 PyObject *keyfunc = NULL;
1861 PyObject *keyfunc = NULL;
1885 Py_ssize_t i;
1862 Py_ssize_t i, was_allocated;
1886 PyObject *key, *value, *kvpair;
1887 static char *kwlist[] = {"key", "reverse", 0};
1863 static char *kwlist[] = {"key", "reverse", 0};
1864 PyObject **keys;
1888
1865
1889 assert(self != NULL);
1866 assert(self != NULL);
1890 assert (PyList_Check(self));
1867 assert (PyList_Check(self));
1891 if (args != NULL) {
1868 if (args != NULL) {
1892 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oi:sort",
1869 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oi:sort",
1893 kwlist, &keyfunc, &reverse))
1870 kwlist, &keyfunc, &reverse))
1894 return NULL;
1871 return NULL;
1895 if (Py_SIZE(args) > 0) {
1872 if (Py_SIZE(args) > 0) {
1896 PyErr_SetString(PyExc_TypeError,
1873 PyErr_SetString(PyExc_TypeError,
1897 "must use keyword argument for key function");
1874 "must use keyword argument for key function");
1898 return NULL;
1875 return NULL;
1899 }
1876 }
1877 if (keyfunc == Py_None)
1878 keyfunc = NULL;
1900 }
1879 }
1901 if (keyfunc == Py_None)
1902 keyfunc = NULL;
1903
1880
1904 /* The list is temporarily made empty, so that mutations performed
1881 /* The list is temporarily made empty, so that mutations performed
1905 * by comparison functions can't affect the slice of memory we're
1882 * by comparison functions can't affect the slice of memory we're
1906 * sorting (allowing mutations during sorting is a core-dump
1883 * sorting (allowing mutations during sorting is a core-dump
1907 * factory, since ob_item may change).
1884 * factory, since ob_item may change).
1908 */
1885 */
1909 saved_ob_size = Py_SIZE(self);
1886 saved_ob_size = Py_SIZE(self);
1887 if (saved_ob_size < 2)
1888 Py_RETURN_NONE;
1910 saved_ob_item = self->ob_item;
1889 saved_ob_item = self->ob_item;
1911 saved_allocated = self->allocated;
1890 saved_allocated = self->allocated;
1912 Py_SIZE(self) = 0;
1891 Py_SIZE(self) = 0;
1913 self->ob_item = NULL;
1892 self->ob_item = NULL;
1914 self->allocated = -1; /* any operation will reset it to >= 0 */
1893 self->allocated = -1; /* any operation will reset it to >= 0 */
1915
1894
1916 if (keyfunc != NULL) {
1895 if (keyfunc == NULL) {
1917 for (i=0 ; i < saved_ob_size ; i++) {
1896 keys = NULL;
1918 value = saved_ob_item[i];
1897 lo.keys = saved_ob_item;
1919 key = PyObject_CallFunctionObjArgs(keyfunc, value,
1898 lo.values = NULL;
1920 NULL);
1899 } else {
1921 if (key == NULL) {
1900 if (saved_ob_size < MERGESTATE_TEMP_SIZE/2)
1922 for (i=i-1 ; i>=0 ; i--) {
1901 keys = &ms.temparray[saved_ob_size+1];
1923 kvpair = saved_ob_item[i];
1902 else {
1924 value = sortwrapper_getvalue(kvpair);
1903 keys = PyMem_MALLOC(sizeof(PyObject *) * saved_ob_size);
1925 saved_ob_item[i] = value;
1904 if (keys == NULL)
1926 Py_DECREF(kvpair);
1905 return NULL;
1927 }
1906 }
1907
1908 for (i = 0; i < saved_ob_size ; i++) {
1909 keys[i] = PyObject_CallFunctionObjArgs(keyfunc, saved_ob_item[i],
1910 NULL);
1911 if (keys[i] == NULL) {
1912 for (i=i-1 ; i>=0 ; i--)
1913 Py_DECREF(keys[i]);
1928 goto dsu_fail;
1914 goto dsu_fail;
1929 }
1915 }
1930 kvpair = build_sortwrapper(key, value);
1931 if (kvpair == NULL)
1932 goto dsu_fail;
1933 saved_ob_item[i] = kvpair;
1934 }
1916 }
1917
1918 lo.keys = keys;
1919 lo.values = saved_ob_item;
1935 }
1920 }
1936
1921
1937 merge_init(&ms);
1922 merge_init(&ms, saved_ob_size, keys != NULL);
1938
1923
1939 nremaining = saved_ob_size;
1924 nremaining = saved_ob_size;
1940 if (nremaining < 2)
1941 goto succeed;
1942
1925
1943 /* Reverse sort stability achieved by initially reversing the list,
1926 /* Reverse sort stability achieved by initially reversing the list,
1944 applying a stable forward sort, then reversing the final result. */
1927 applying a stable forward sort, then reversing the final result. */
1945 if (reverse)
1928 if (reverse) {
1946 reverse_slice(saved_ob_item, saved_ob_item + saved_ob_size);
1929 if (keys != NULL)
1930 reverse_slice(&keys[0], &keys[saved_ob_size]);
1931 reverse_slice(&saved_ob_item[0], &saved_ob_item[saved_ob_size]);
1932 }
1947
1933
1948 /* March over the array once, left to right, finding natural runs,
1934 /* March over the array once, left to right, finding natural runs,
1949 * and extending short natural runs to minrun elements.
1935 * and extending short natural runs to minrun elements.
1950 */
1936 */
1951 lo = saved_ob_item;
1952 hi = lo + nremaining;
1953 minrun = merge_compute_minrun(nremaining);
1937 minrun = merge_compute_minrun(nremaining);
1954 do {
1938 do {
1955 int descending;
1939 int descending;
1956 Py_ssize_t n;
1940 Py_ssize_t n;
1957
1941
1958 /* Identify next run. */
1942 /* Identify next run. */
1959 n = count_run(lo, hi, &descending);
1943 n = count_run(lo.keys, nremaining, &descending);
1960 if (n < 0)
1944 if (n < 0)
1961 goto fail;
1945 goto fail;
1962 if (descending)
1946 if (descending)
1963 reverse_slice(lo, lo + n);
1947 reverse_sortslice(lo, n);
1964 /* If short, extend to min(minrun, nremaining). */
1948 /* If short, extend to min(minrun, nremaining). */
1965 if (n < minrun) {
1949 if (n < minrun) {
1966 const Py_ssize_t force = nremaining <= minrun ?
1950 Py_ssize_t force = nremaining <= minrun ? nremaining : minrun;
1967 nremaining : minrun;
1951 if (binarysort(lo, force, n) < 0)
1968 if (binarysort(lo, lo + force, lo + n) < 0)
1969 goto fail;
1952 goto fail;
1970 n = force;
1953 n = force;
1971 }
1954 }
1955 if (n == saved_ob_size)
1956 goto all_done;
1957
1972 /* Push run onto pending-runs stack, and maybe merge. */
1958 /* Push run onto pending-runs stack, and maybe merge. */
1973 assert(ms.n < MAX_MERGE_PENDING);
1959 assert(ms.n < MAX_MERGE_PENDING);
1974 ms.pending[ms.n].base = lo;
1960 ms.pending[ms.n].base = lo;
1975 ms.pending[ms.n].len = n;
1961 ms.pending[ms.n].len = n;
1976 ++ms.n;
1962 ++ms.n;
1977 if (merge_collapse(&ms) < 0)
1963 if (merge_collapse(&ms) < 0)
1978 goto fail;
1964 goto fail;
1979 /* Advance to find next run. */
1965 /* Advance to find next run. */
1980 lo += n;
1966 sortslice_advance(&lo, n);
1981 nremaining -= n;
1967 nremaining -= n;
1982 } while (nremaining);
1968 } while (nremaining);
1983 assert(lo == hi);
1984
1969
1985 if (merge_force_collapse(&ms) < 0)
1970 if (merge_force_collapse(&ms) < 0)
1986 goto fail;
1971 goto fail;
1987 assert(ms.n == 1);
1972 assert(ms.n == 1);
1988 assert(ms.pending[0].base == saved_ob_item);
1973 assert(keys == NULL
1974 ? ms.pending[0].base.keys == saved_ob_item
1975 : ms.pending[0].base.keys == &keys[0]);
1989 assert(ms.pending[0].len == saved_ob_size);
1976 assert(ms.pending[0].len == saved_ob_size);
1977 lo = ms.pending[0].base;
1990
1978
1991 succeed:
1979 all_done:
1992 result = Py_None;
1980 result = Py_None;
1981
1982 if (reverse)
1983 reverse_sortslice(lo, saved_ob_size);
1984
1993 fail:
1985 fail:
1994 if (keyfunc != NULL) {
1986 if (keys != NULL) {
1995 for (i=0 ; i < saved_ob_size ; i++) {
1987 for (i = 0; i < saved_ob_size; i++)
1996 kvpair = saved_ob_item[i];
1988 Py_DECREF(keys[i]);
1997 value = sortwrapper_getvalue(kvpair);
1989 if (keys != &ms.temparray[saved_ob_size+1])
1998 saved_ob_item[i] = value;
1990 PyMem_FREE(keys);
1999 Py_DECREF(kvpair);
2000 }
2001 }
1991 }
2002
1992
2003 if (self->allocated != -1 && result != NULL) {
2004 /* The user mucked with the list during the sort,
2005 * and we don't already have another error to report.
2006 */
2007 PyErr_SetString(PyExc_ValueError, "list modified during sort");
2008 result = NULL;
2009 }
2010
2011 if (reverse && saved_ob_size > 1)
2012 reverse_slice(saved_ob_item, saved_ob_item + saved_ob_size);
2013
2014 merge_freemem(&ms);
1993 merge_freemem(&ms);
2015
1994
2016 dsu_fail:
1995 dsu_fail:
1996 was_allocated = self->allocated;
1997 self->allocated = saved_allocated;
2017 final_ob_item = self->ob_item;
1998 final_ob_item = self->ob_item;
2018 i = Py_SIZE(self);
1999 i = Py_SIZE(self);
2019 Py_SIZE(self) = saved_ob_size;
2000 Py_SIZE(self) = saved_ob_size;
2020 self->ob_item = saved_ob_item;
2001 self->ob_item = saved_ob_item;
2021 self->allocated = saved_allocated;
2002
2022 if (final_ob_item != NULL) {
2003 if (was_allocated != -1) {
2023 /* we cannot use list_clear() for this because it does not
2004 if (result != NULL) {
2024 guarantee that the list is really empty when it returns */
2005 /* The user mucked with the list during the sort,
2025 while (--i >= 0) {
2006 * and we don't already have another error to report.
2026 Py_XDECREF(final_ob_item[i]);
2007 */
2008 PyErr_SetString(PyExc_ValueError, "list modified during sort");
2009 result = NULL;
2027 }
2010 }
2028 PyMem_FREE(final_ob_item);
2011
2012 if (final_ob_item != NULL) {
2013 /* we cannot use list_clear() for this because it does not
2014 guarantee that the list is really empty when it returns */
2015 while (--i >= 0)
2016 Py_XDECREF(final_ob_item[i]);
2017 PyMem_FREE(final_ob_item);
2018 }
2029 }
2019 }
2020
2030 Py_XINCREF(result);
2021 Py_XINCREF(result);
2031 return result;
2022 return result;
2032 }
2023 }
2033 #undef IFLT
2024 #undef IFLT
2034 #undef ISLT
2025 #undef ISLT
2035
2026
2036 int
2027 int
2037 PyList_Sort(PyObject *v)
2028 PyList_Sort(PyObject *v)
2038 {
2029 {
2039 if (v == NULL || !PyList_Check(v)) {
2030 if (v == NULL || !PyList_Check(v)) {
(...skipping 815 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...
2855 }
2846 }
2856
2847
2857 static PyObject *
2848 static PyObject *
2858 listreviter_len(listreviterobject *it)
2849 listreviter_len(listreviterobject *it)
2859 {
2850 {
2860 Py_ssize_t len = it->it_index + 1;
2851 Py_ssize_t len = it->it_index + 1;
2861 if (it->it_seq == NULL || PyList_GET_SIZE(it->it_seq) < len)
2852 if (it->it_seq == NULL || PyList_GET_SIZE(it->it_seq) < len)
2862 len = 0;
2853 len = 0;
2863 return PyLong_FromSsize_t(len);
2854 return PyLong_FromSsize_t(len);
2864 }
2855 }
2865
OLD
NEW