PostgreSQL Source Code: src/backend/storage/buffer/bufmgr.c Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
36
39
43#ifdef USE_ASSERT_CHECKING
44#include "catalog/pg_tablespace_d.h"
45#endif
69
70
71
72#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
73#define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr)))
74
75
76#define LocalBufHdrGetBlock(bufHdr) \
77 LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
78
79
80#define BUF_WRITTEN 0x01
81#define BUF_REUSABLE 0x02
82
83#define RELS_BSEARCH_THRESHOLD 20
84
85
86
87
88
89
90
91#define BUF_DROP_FULL_SCAN_THRESHOLD (uint64) (NBuffers / 32)
92
94{
98
99
100#define REFCOUNT_ARRAY_ENTRIES 8
101
102
103
104
105
107{
108
110
111
112
113
114
115
116
117
120
121
123
125
126
129
130
131
132
133
134
135
136
138{
142
143
148
149
150
151
152
153
154
156
157
158
159
160
161
163
164
165
166
167
168
169
173
174
175
176
177
181
182
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
220
222
228
229
234
236{
237 .name = "buffer io",
242};
243
245{
246 .name = "buffer pin",
251};
252
253
254
255
256
257
258static void
260{
261
263 return;
264
265
266
267
268
269 {
270 int i;
271
273 {
275
277
279 {
281 return;
282 }
283 }
284 }
285
286
287
288
289
290 {
291
292
293
294
296 bool found;
297
298
301
302
304
305
309 &found);
312
313
316
318 }
319}
320
321
322
323
326{
328
329
331
332
335
336
339
340 return res;
341}
342
343
344
345
346
347
348
349
352{
354 int i;
355
358
359
360
361
362
364 {
366
368 return res;
369 }
370
371
372
373
374
375
376
377
379 return NULL;
380
382
383 if (res == NULL)
384 return NULL;
385 else if (!do_move)
386 {
387
388 return res;
389 }
390 else
391 {
392
393 bool found;
395
396
398
399
404
405
408
409
414
416 }
417}
418
419
420
421
422
423
424static inline int32
426{
428
431
432
433
434
435
437
438 if (ref == NULL)
439 return 0;
441}
442
443
444
445
446
447static void
449{
451
454 {
456
457
458
459
460
461
463 }
464 else
465 {
466 bool found;
468
473 }
474}
475
476
477
478
479
480
481
482
483#define BufferIsPinned(bufnum) \
484( \
485 !BufferIsValid(bufnum) ? \
486 false \
487 : \
488 BufferIsLocal(bufnum) ? \
489 (LocalRefCount[-(bufnum) - 1] > 0) \
490 : \
491 (GetPrivateRefCount(bufnum) > 0) \
493
494
506 uint32 *extended_by);
514 uint32 *extended_by);
521static int SyncOneBuffer(int buf_id, bool skip_recently_used,
528 char relpersistence,
532 bool *foundPtr, IOContext io_context);
547#ifdef USE_ASSERT_CHECKING
549 void *unused_context);
550#endif
555
556
557
558
559
564{
566 BufferTag newTag;
567 uint32 newHash;
568 LWLock *newPartitionLock;
569 int buf_id;
570
572
573
575 forkNum, blockNum);
576
577
580
581
585
586
587 if (buf_id < 0)
588 {
589#ifdef USE_PREFETCH
590
591
592
593
595 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
596 {
598 }
599#endif
600 }
601 else
602 {
603
604
605
606
607
609 }
610
611
612
613
614
615
616
617
618
619
620
621
622
623 return result;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
652{
655
657 {
658
661 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
662 errmsg("cannot access temporary tables of other sessions")));
663
664
666 }
667 else
668 {
669
671 }
672}
673
674
675
676
677
678
679
680
681bool
683 Buffer recent_buffer)
684{
688 bool have_private_ref;
689
691
694 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
695
697 {
698 int b = -recent_buffer - 1;
699
702
703
705 {
707
709
710 return true;
711 }
712 }
713 else
714 {
717
718
719
720
721
722
723 if (have_private_ref)
725 else
727
729 {
730
731
732
733
734
735 if (have_private_ref)
736 PinBuffer(bufHdr, NULL);
737 else
739
741
742 return true;
743 }
744
745
746 if (!have_private_ref)
748 }
749
750 return false;
751}
752
753
754
755
756
759{
761}
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
807{
809
810
811
812
813
814
817 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
818 errmsg("cannot access temporary tables of other sessions")));
819
820
821
822
823
825 forkNum, blockNum, mode, strategy);
826
827 return buf;
828}
829
830
831
832
833
834
835
836
837
838
839
840
845{
847
849 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
850 forkNum, blockNum,
851 mode, strategy);
852}
853
854
855
856
862{
864 uint32 extend_by = 1;
865
867 &buf, &extend_by);
868
869 return buf;
870}
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
897{
900 Assert(extend_by > 0);
901
902 if (bmr.smgr == NULL)
903 {
906 }
907
910 buffers, extended_by);
911}
912
913
914
915
916
917
918
919
920
928{
930 uint32 extended_by = 0;
933
937
938 if (bmr.smgr == NULL)
939 {
942 }
943
944
945
946
947
948
953 {
955
956
959
961 }
962
963
964
965
966
969
970
971
972
973
975
976
977
978
979
980
981
984
986 {
989
992
994 num_pages, extend_to,
995 buffers, &extended_by);
996
999
1000 for (uint32 i = 0; i < extended_by; i++)
1001 {
1002 if (first_block + i != extend_to - 1)
1004 else
1006 }
1007 }
1008
1009
1010
1011
1012
1013
1014
1016 {
1017 Assert(extended_by == 0);
1019 fork, extend_to - 1, mode, strategy);
1020 }
1021
1023}
1024
1025
1026
1027
1028
1029
1030static void
1032{
1034 bool need_to_zero;
1036
1038
1039 if (already_valid)
1040 {
1041
1042
1043
1044
1045 need_to_zero = false;
1046 }
1047 else if (isLocalBuf)
1048 {
1049
1052 }
1053 else
1054 {
1055
1056
1057
1058
1059
1060
1061
1062
1064 need_to_zero = StartBufferIO(bufHdr, true, false);
1065 }
1066
1067 if (need_to_zero)
1068 {
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082 if (!isLocalBuf)
1084
1085
1086 if (isLocalBuf)
1088 else
1090 }
1091 else if (!isLocalBuf)
1092 {
1093
1094
1095
1096
1099 else
1101 }
1102}
1103
1104
1105
1106
1107
1108
1112 char persistence,
1116 bool *foundPtr)
1117{
1121
1123
1124
1125 Assert((persistence == RELPERSISTENCE_TEMP ||
1126 persistence == RELPERSISTENCE_PERMANENT ||
1127 persistence == RELPERSISTENCE_UNLOGGED));
1128
1129 if (persistence == RELPERSISTENCE_TEMP)
1130 {
1133 }
1134 else
1135 {
1138 }
1139
1140 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1145
1146 if (persistence == RELPERSISTENCE_TEMP)
1147 {
1148 bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1149 if (*foundPtr)
1151 }
1152 else
1153 {
1154 bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1155 strategy, foundPtr, io_context);
1156 if (*foundPtr)
1158 }
1159 if (rel)
1160 {
1161
1162
1163
1164
1165
1167 if (*foundPtr)
1169 }
1170 if (*foundPtr)
1171 {
1175
1176 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1181 true);
1182 }
1183
1185}
1186
1187
1188
1189
1190
1191
1197{
1200 int flags;
1201 char persistence;
1202
1203
1204
1205
1206
1207
1209 {
1211
1212
1213
1214
1215
1216
1219
1221 }
1222
1223 if (rel)
1224 persistence = rel->rd_rel->relpersistence;
1225 else
1226 persistence = smgr_persistence;
1227
1230 {
1231 bool found;
1232
1234 forkNum, blockNum, strategy, &found);
1237 }
1238
1239
1240
1241
1242
1243
1247 operation.smgr = smgr;
1248 operation.rel = rel;
1250 operation.forknum = forkNum;
1251 operation.strategy = strategy;
1254 blockNum,
1255 flags))
1257
1259}
1260
1265 int *nblocks,
1266 int flags,
1267 bool allow_forwarding)
1268{
1269 int actual_nblocks = *nblocks;
1270 int maxcombine = 0;
1271 bool did_start_io;
1272
1273 Assert(*nblocks == 1 || allow_forwarding);
1274 Assert(*nblocks > 0);
1276
1277 for (int i = 0; i < actual_nblocks; ++i)
1278 {
1279 bool found;
1280
1281 if (allow_forwarding && buffers[i] != InvalidBuffer)
1282 {
1284
1285
1286
1287
1288
1289
1290
1291
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1312 else
1316 }
1317 else
1318 {
1320 operation->smgr,
1323 blockNum + i,
1325 &found);
1326 }
1327
1328 if (found)
1329 {
1330
1331
1332
1333
1334
1335
1336 if (i == 0)
1337 {
1338 *nblocks = 1;
1339
1340#ifdef USE_ASSERT_CHECKING
1341
1342
1343
1344
1345
1346
1347 operation->buffers = buffers;
1348 operation->blocknum = blockNum;
1352#endif
1353 return false;
1354 }
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365 actual_nblocks = i;
1366 break;
1367 }
1368 else
1369 {
1370
1371
1372
1373
1374 if (i == 0 && actual_nblocks > 1)
1375 {
1378 blockNum);
1379 if (unlikely(maxcombine < actual_nblocks))
1380 {
1381 elog(DEBUG2, "limiting nblocks at %u from %u to %u",
1382 blockNum, actual_nblocks, maxcombine);
1383 actual_nblocks = maxcombine;
1384 }
1385 }
1386 }
1387 }
1388 *nblocks = actual_nblocks;
1389
1390
1391 operation->buffers = buffers;
1392 operation->blocknum = blockNum;
1393 operation->flags = flags;
1394 operation->nblocks = actual_nblocks;
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1411 {
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1429
1430 operation->nblocks = *nblocks;
1431 }
1432 else
1433 {
1435
1437 {
1438
1439
1440
1441
1442
1443
1444
1445
1446
1449 blockNum,
1450 actual_nblocks);
1451 }
1452
1453
1454
1455
1456
1457 did_start_io = true;
1458 }
1459
1461
1462 return did_start_io;
1463}
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493bool
1497 int *nblocks,
1498 int flags)
1499{
1501 true );
1502}
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512bool
1516 int flags)
1517{
1518 int nblocks = 1;
1519 bool result;
1520
1522 false );
1523 Assert(nblocks == 1);
1524
1525 return result;
1526}
1527
1528
1529
1530
1531static void
1533{
1534#ifdef USE_ASSERT_CHECKING
1537
1538 for (int i = 0; i < operation->nblocks; i++)
1539 {
1544
1547
1548 if (i < operation->nblocks_done)
1550 }
1551#endif
1552}
1553
1554
1555static inline bool
1557{
1560 true, nowait);
1561 else
1563}
1564
1565
1566
1567
1568static inline bool
1570{
1571
1572
1573
1574
1575
1577 {
1579 return true;
1580
1581
1582
1583
1584
1585
1586
1588 }
1589
1591}
1592
1593
1594
1595
1596
1597static void
1599{
1602 int newly_read_blocks = 0;
1603
1606
1607
1608
1609
1610
1611
1614
1619 {
1620
1621
1622
1623
1625 elog(DEBUG3, "partial read, will retry");
1626 }
1627
1628 Assert(newly_read_blocks > 0);
1630
1631 operation->nblocks_done += newly_read_blocks;
1632
1634}
1635
1636void
1638{
1642
1643 if (operation->persistence == RELPERSISTENCE_TEMP)
1644 {
1647 }
1648 else
1649 {
1652 }
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1666 elog(ERROR, "waiting for read operation that didn't read");
1667
1668
1669
1670
1671
1672
1673
1674
1675 while (true)
1676 {
1677 int ignored_nblocks_progress;
1678
1680
1681
1682
1683
1684
1686 {
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1699 {
1701
1703
1704
1705
1706
1707
1709 io_start, 0, 0);
1710 }
1711 else
1712 {
1714 }
1715
1716
1717
1718
1719
1721 }
1722
1723
1724
1725
1726
1727
1729 break;
1730
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1744 }
1745
1747
1748
1749}
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768static bool
1770{
1772 int flags = operation->flags;
1775 char persistence = operation->persistence;
1777 Buffer *io_buffers = &operation->buffers[nblocks_done];
1778 int io_buffers_len = 0;
1780 uint32 ioh_flags = 0;
1784 bool did_start_io;
1785
1786
1787
1788
1789
1790
1793
1794 if (persistence == RELPERSISTENCE_TEMP)
1795 {
1799 }
1800 else
1801 {
1804 }
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1819
1820
1821
1822
1823
1826
1827
1828
1829
1830
1831
1832
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1854 {
1856
1858 }
1859
1860
1861
1862
1863
1864
1865
1866
1868 {
1869
1870
1871
1872
1873
1874
1875
1877 *nblocks_progress = 1;
1878
1881 did_start_io = false;
1882
1883
1884
1885
1886
1887
1888 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + operation->nblocks_done,
1893 true);
1894
1895 if (persistence == RELPERSISTENCE_TEMP)
1897 else
1899
1900 if (operation->rel)
1902
1904
1907 }
1908 else
1909 {
1911
1912
1913 Assert(io_buffers[0] == buffers[nblocks_done]);
1914 io_pages[0] = BufferGetBlock(buffers[nblocks_done]);
1915 io_buffers_len = 1;
1916
1917
1918
1919
1920
1921
1922
1923 for (int i = nblocks_done + 1; i < operation->nblocks; i++)
1924 {
1926 break;
1927
1930 Assert(io_buffers[io_buffers_len] == buffers[i]);
1931
1932 io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
1933 }
1934
1935
1937
1938
1940
1942 persistence == RELPERSISTENCE_TEMP ?
1945 flags);
1946
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1960 blocknum + nblocks_done,
1961 io_pages, io_buffers_len);
1963 io_start, 1, io_buffers_len * BLCKSZ);
1964
1965 if (persistence == RELPERSISTENCE_TEMP)
1967 else
1969
1970
1971
1972
1973
1974
1977
1978 *nblocks_progress = io_buffers_len;
1979 did_start_io = true;
1980 }
1981
1982 return did_start_io;
1983}
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2008 bool *foundPtr, IOContext io_context)
2009{
2010 BufferTag newTag;
2011 uint32 newHash;
2012 LWLock *newPartitionLock;
2013 int existing_buf_id;
2014 Buffer victim_buffer;
2016 uint32 victim_buf_state;
2017
2018
2021
2022
2024
2025
2028
2029
2032 if (existing_buf_id >= 0)
2033 {
2035 bool valid;
2036
2037
2038
2039
2040
2041
2043
2045
2046
2048
2049 *foundPtr = true;
2050
2051 if (!valid)
2052 {
2053
2054
2055
2056
2057
2058 *foundPtr = false;
2059 }
2060
2061 return buf;
2062 }
2063
2064
2065
2066
2067
2069
2070
2071
2072
2073
2074
2077
2078
2079
2080
2081
2082
2085 if (existing_buf_id >= 0)
2086 {
2088 bool valid;
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2101
2102
2103
2104
2105
2107
2108
2109
2111
2112 valid = PinBuffer(existing_buf_hdr, strategy);
2113
2114
2116
2117 *foundPtr = true;
2118
2119 if (!valid)
2120 {
2121
2122
2123
2124
2125
2126 *foundPtr = false;
2127 }
2128
2129 return existing_buf_hdr;
2130 }
2131
2132
2133
2134
2135 victim_buf_state = LockBufHdr(victim_buf_hdr);
2136
2137
2140
2141 victim_buf_hdr->tag = newTag;
2142
2143
2144
2145
2146
2147
2148
2150 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
2152
2153 UnlockBufHdr(victim_buf_hdr, victim_buf_state);
2154
2156
2157
2158
2159
2160 *foundPtr = false;
2161
2162 return victim_buf_hdr;
2163}
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182static void
2184{
2186 uint32 oldHash;
2187 LWLock *oldPartitionLock;
2190
2191
2192 oldTag = buf->tag;
2193
2197
2198
2199
2200
2201
2202
2205
2206retry:
2207
2208
2209
2210
2211
2213
2214
2216
2217
2219 {
2222 return;
2223 }
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2236 {
2239
2241 elog(ERROR, "buffer is pinned in InvalidateBuffer");
2243 goto retry;
2244 }
2245
2246
2247
2248
2249
2254
2255
2256
2257
2260
2261
2262
2263
2265
2266
2267
2268
2270}
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281static bool
2283{
2286 LWLock *partition_lock;
2288
2290
2291
2292 tag = buf_hdr->tag;
2293
2296
2298
2299
2301
2302
2303
2304
2305
2309
2310
2311
2312
2313
2315 {
2317
2320
2321 return false;
2322 }
2323
2324
2325
2326
2327
2328
2329
2330
2334
2336
2337
2339
2341
2345
2346 return true;
2347}
2348
2351{
2355 bool from_ring;
2356
2357
2358
2359
2360
2363
2364
2365again:
2366
2367
2368
2369
2370
2373
2375
2376
2378
2379
2380
2381
2383
2384
2385
2386
2387
2388
2389
2390
2392 {
2393 LWLock *content_lock;
2394
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2414 {
2415
2416
2417
2418
2420 goto again;
2421 }
2422
2423
2424
2425
2426
2427
2428
2429
2430 if (strategy != NULL)
2431 {
2433
2434
2438
2441 {
2444 goto again;
2445 }
2446 }
2447
2448
2451
2453 &buf_hdr->tag);
2454 }
2455
2456
2458 {
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2477 }
2478
2479
2480
2481
2482
2483
2485 {
2487 goto again;
2488 }
2489
2490
2491#ifdef USE_ASSERT_CHECKING
2493
2496
2498#endif
2499
2500 return buf;
2501}
2502
2503
2504
2505
2506
2507
2508
2511{
2513}
2514
2515
2516
2517
2518
2519
2520
2523{
2524 uint32 estimated_pins_held;
2525
2526
2527
2528
2529
2530
2532
2533
2535 return 0;
2536
2538}
2539
2540
2541
2542
2543
2544
2545
2546
2547void
2549{
2551
2552 if (*additional_pins <= 1)
2553 return;
2554
2556 limit = Max(limit, 1);
2557 if (limit < *additional_pins)
2558 *additional_pins = limit;
2559}
2560
2561
2562
2563
2564
2573 uint32 *extended_by)
2574{
2576
2577 TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2582 extend_by);
2583
2586 extend_by, extend_upto,
2587 buffers, &extend_by);
2588 else
2590 extend_by, extend_upto,
2591 buffers, &extend_by);
2592 *extended_by = extend_by;
2593
2594 TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2599 *extended_by,
2600 first_block);
2601
2602 return first_block;
2603}
2604
2605
2606
2607
2608
2617 uint32 *extended_by)
2618{
2622
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635 for (uint32 i = 0; i < extend_by; i++)
2636 {
2637 Block buf_block;
2638
2641
2642
2643 MemSet(buf_block, 0, BLCKSZ);
2644 }
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2658
2659
2660
2661
2662
2665
2667
2668
2669
2670
2671
2672
2673
2675 {
2676 uint32 orig_extend_by = extend_by;
2677
2678 if (first_block > extend_upto)
2679 extend_by = 0;
2680 else if ((uint64) first_block + extend_by > extend_upto)
2681 extend_by = extend_upto - first_block;
2682
2683 for (uint32 i = extend_by; i < orig_extend_by; i++)
2684 {
2686
2687
2688
2689
2690
2693 }
2694
2695 if (extend_by == 0)
2696 {
2699 *extended_by = extend_by;
2700 return first_block;
2701 }
2702 }
2703
2704
2707 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2708 errmsg("cannot extend relation %s beyond %u blocks",
2711
2712
2713
2714
2715
2716
2717
2718 for (uint32 i = 0; i < extend_by; i++)
2719 {
2720 Buffer victim_buf = buffers[i];
2724 LWLock *partition_lock;
2725 int existing_id;
2726
2727
2730
2734
2736
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753 if (existing_id >= 0)
2754 {
2756 Block buf_block;
2757 bool valid;
2758
2759
2760
2761
2762
2763 valid = PinBuffer(existing_hdr, strategy);
2764
2766
2767
2768
2769
2770
2773
2776
2779 (errmsg("unexpected data beyond EOF in block %u of relation %s",
2782 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794 do
2795 {
2797
2798 buf_state &= ~BM_VALID;
2800 } while ((existing_hdr, true, false));
2801 }
2802 else
2803 {
2805
2806 buf_state = LockBufHdr(victim_buf_hdr);
2807
2808
2811
2812 victim_buf_hdr->tag = tag;
2813
2817
2819
2821
2822
2824 }
2825 }
2826
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2840
2841
2842
2843
2844
2845
2846
2847
2850
2852 io_start, 1, extend_by * BLCKSZ);
2853
2854
2855 for (uint32 i = 0; i < extend_by; i++)
2856 {
2859 bool lock = false;
2860
2862 lock = true;
2864 {
2866 if (first_block + i + 1 == extend_upto)
2867 lock = true;
2868 }
2869
2870 if (lock)
2872
2874 }
2875
2877
2878 *extended_by = extend_by;
2879
2880 return first_block;
2881}
2882
2883
2884
2885
2886
2887
2888
2889
2890bool
2892{
2894
2896
2898 {
2899
2900 return true;
2901 }
2902 else
2903 {
2907 }
2908}
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918bool
2920{
2922
2924
2926 {
2927 int bufid = -buffer - 1;
2928
2930
2931 }
2932 else
2933 {
2937 }
2938
2940}
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951void
2953{
2956 uint32 old_buf_state;
2957
2960
2962 {
2964 return;
2965 }
2966
2968
2972
2974 for (;;)
2975 {
2978
2979 buf_state = old_buf_state;
2980
2983
2985 buf_state))
2986 break;
2987 }
2988
2989
2990
2991
2992 if (!(old_buf_state & BM_DIRTY))
2993 {
2997 }
2998}
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3017{
3020
3022 {
3025 {
3032 }
3033 else
3034 {
3036
3042 }
3043 }
3044
3045 return ReadBuffer(relation, blockNum);
3046}
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071static bool
3073{
3075 bool result;
3077
3080
3082
3083 if (ref == NULL)
3084 {
3086 uint32 old_buf_state;
3087
3089
3091 for (;;)
3092 {
3095
3096 buf_state = old_buf_state;
3097
3098
3100
3101 if (strategy == NULL)
3102 {
3103
3106 }
3107 else
3108 {
3109
3110
3111
3112
3115 }
3116
3118 buf_state))
3119 {
3120 result = (buf_state & BM_VALID) != 0;
3121
3122
3123
3124
3125
3126
3127
3128
3130 break;
3131 }
3132 }
3133 }
3134 else
3135 {
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3152 }
3153
3157 return result;
3158}
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182static void
3184{
3188
3189
3190
3191
3192
3194
3195
3196
3197
3198
3199
3201
3202
3203
3204
3205
3210
3212
3215
3217}
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228static void
3230{
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3242
3245 {
3246
3247 int wait_backend_pgprocno = buf->wait_backend_pgprocno;
3248
3249 buf_state &= ~BM_PIN_COUNT_WAITER;
3252 }
3253 else
3255}
3256
3257
3258
3259
3260
3261
3262
3263static void
3265{
3267
3270}
3271
3272static void
3274{
3277
3279
3280
3282 Assert(ref != NULL);
3286 {
3288 uint32 old_buf_state;
3289
3290
3291
3292
3293
3294
3295
3296
3298
3299
3301
3302
3303
3304
3305
3306
3307
3309 for (;;)
3310 {
3313
3314 buf_state = old_buf_state;
3315
3317
3319 buf_state))
3320 break;
3321 }
3322
3323
3326
3328 }
3329}
3330
3331#define ST_SORT sort_checkpoint_bufferids
3332#define ST_ELEMENT_TYPE CkptSortItem
3333#define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b)
3334#define ST_SCOPE static
3335#define ST_DEFINE
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348static void
3350{
3352 int buf_id;
3353 int num_to_scan;
3354 int num_spaces;
3355 int num_processed;
3356 int num_written;
3358 Oid last_tsid;
3360 int i;
3363
3364
3365
3366
3367
3368
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389 num_to_scan = 0;
3390 for (buf_id = 0; buf_id < NBuffers; buf_id++)
3391 {
3393
3394
3395
3396
3397
3399
3400 if ((buf_state & mask) == mask)
3401 {
3403
3405
3407 item->buf_id = buf_id;
3412 }
3413
3415
3416
3419 }
3420
3421 if (num_to_scan == 0)
3422 return;
3423
3425
3426 TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
3427
3428
3429
3430
3431
3432
3433
3434
3435 sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
3436
3437 num_spaces = 0;
3438
3439
3440
3441
3442
3444 for (i = 0; i < num_to_scan; i++)
3445 {
3447 Oid cur_tsid;
3448
3450
3451
3452
3453
3454
3455 if (last_tsid == InvalidOid || last_tsid != cur_tsid)
3456 {
3458
3459 num_spaces++;
3460
3461
3462
3463
3464
3466
3467 if (per_ts_stat == NULL)
3469 else
3471
3472 s = &per_ts_stat[num_spaces - 1];
3473 memset(s, 0, sizeof(*s));
3474 s->tsId = cur_tsid;
3475
3476
3477
3478
3479
3480
3482
3483
3484
3485
3486
3487
3488 last_tsid = cur_tsid;
3489 }
3490 else
3491 {
3492 s = &per_ts_stat[num_spaces - 1];
3493 }
3494
3496
3497
3500 }
3501
3502 Assert(num_spaces > 0);
3503
3504
3505
3506
3507
3508
3511 NULL);
3512
3513 for (i = 0; i < num_spaces; i++)
3514 {
3516
3518
3520 }
3521
3523
3524
3525
3526
3527
3528
3529
3530 num_processed = 0;
3531 num_written = 0;
3533 {
3537
3539 Assert(buf_id != -1);
3540
3542
3543 num_processed++;
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3558 {
3560 {
3561 TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3563 num_written++;
3564 }
3565 }
3566
3567
3568
3569
3570
3573 ts_stat->index++;
3574
3575
3577 {
3579 }
3580 else
3581 {
3582
3584 }
3585
3586
3587
3588
3589
3590
3592 }
3593
3594
3595
3596
3597
3599
3600 pfree(per_ts_stat);
3601 per_ts_stat = NULL;
3603
3604
3605
3606
3607
3609
3610 TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
3611}
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624bool
3626{
3627
3628 int strategy_buf_id;
3629 uint32 strategy_passes;
3630 uint32 recent_alloc;
3631
3632
3633
3634
3635
3636 static bool saved_info_valid = false;
3637 static int prev_strategy_buf_id;
3638 static uint32 prev_strategy_passes;
3639 static int next_to_clean;
3640 static uint32 next_passes;
3641
3642
3643 static float smoothed_alloc = 0;
3644 static float smoothed_density = 10.0;
3645
3646
3647 float smoothing_samples = 16;
3648 float scan_whole_pool_milliseconds = 120000.0;
3649
3650
3651 long strategy_delta;
3652 int bufs_to_lap;
3653 int bufs_ahead;
3654 float scans_per_alloc;
3655 int reusable_buffers_est;
3656 int upcoming_alloc_est;
3657 int min_scan_buffers;
3658
3659
3660 int num_to_scan;
3661 int num_written;
3662 int reusable_buffers;
3663
3664
3665 long new_strategy_delta;
3666 uint32 new_recent_alloc;
3667
3668
3669
3670
3671
3672 strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3673
3674
3676
3677
3678
3679
3680
3681
3683 {
3684 saved_info_valid = false;
3685 return true;
3686 }
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696 if (saved_info_valid)
3697 {
3698 int32 passes_delta = strategy_passes - prev_strategy_passes;
3699
3700 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3701 strategy_delta += (long) passes_delta * NBuffers;
3702
3703 Assert(strategy_delta >= 0);
3704
3705 if ((int32) (next_passes - strategy_passes) > 0)
3706 {
3707
3708 bufs_to_lap = strategy_buf_id - next_to_clean;
3709#ifdef BGW_DEBUG
3710 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3711 next_passes, next_to_clean,
3712 strategy_passes, strategy_buf_id,
3713 strategy_delta, bufs_to_lap);
3714#endif
3715 }
3716 else if (next_passes == strategy_passes &&
3717 next_to_clean >= strategy_buf_id)
3718 {
3719
3720 bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3721#ifdef BGW_DEBUG
3722 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3723 next_passes, next_to_clean,
3724 strategy_passes, strategy_buf_id,
3725 strategy_delta, bufs_to_lap);
3726#endif
3727 }
3728 else
3729 {
3730
3731
3732
3733
3734#ifdef BGW_DEBUG
3735 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3736 next_passes, next_to_clean,
3737 strategy_passes, strategy_buf_id,
3738 strategy_delta);
3739#endif
3740 next_to_clean = strategy_buf_id;
3741 next_passes = strategy_passes;
3743 }
3744 }
3745 else
3746 {
3747
3748
3749
3750
3751#ifdef BGW_DEBUG
3752 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3753 strategy_passes, strategy_buf_id);
3754#endif
3755 strategy_delta = 0;
3756 next_to_clean = strategy_buf_id;
3757 next_passes = strategy_passes;
3759 }
3760
3761
3762 prev_strategy_buf_id = strategy_buf_id;
3763 prev_strategy_passes = strategy_passes;
3764 saved_info_valid = true;
3765
3766
3767
3768
3769
3770
3771
3772 if (strategy_delta > 0 && recent_alloc > 0)
3773 {
3774 scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3775 smoothed_density += (scans_per_alloc - smoothed_density) /
3776 smoothing_samples;
3777 }
3778
3779
3780
3781
3782
3783
3784 bufs_ahead = NBuffers - bufs_to_lap;
3785 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3786
3787
3788
3789
3790
3791
3792 if (smoothed_alloc <= (float) recent_alloc)
3793 smoothed_alloc = recent_alloc;
3794 else
3795 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3796 smoothing_samples;
3797
3798
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809 if (upcoming_alloc_est == 0)
3810 smoothed_alloc = 0;
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822 min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3823
3824 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3825 {
3826#ifdef BGW_DEBUG
3827 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3828 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3829#endif
3830 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3831 }
3832
3833
3834
3835
3836
3837
3838
3839
3840 num_to_scan = bufs_to_lap;
3841 num_written = 0;
3842 reusable_buffers = reusable_buffers_est;
3843
3844
3845 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3846 {
3847 int sync_state = SyncOneBuffer(next_to_clean, true,
3848 wb_context);
3849
3850 if (++next_to_clean >= NBuffers)
3851 {
3852 next_to_clean = 0;
3853 next_passes++;
3854 }
3855 num_to_scan--;
3856
3858 {
3859 reusable_buffers++;
3861 {
3863 break;
3864 }
3865 }
3867 reusable_buffers++;
3868 }
3869
3871
3872#ifdef BGW_DEBUG
3873 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3874 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3875 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3876 bufs_to_lap - num_to_scan,
3877 num_written,
3878 reusable_buffers - reusable_buffers_est);
3879#endif
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889 new_strategy_delta = bufs_to_lap - num_to_scan;
3890 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3891 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3892 {
3893 scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3894 smoothed_density += (scans_per_alloc - smoothed_density) /
3895 smoothing_samples;
3896
3897#ifdef BGW_DEBUG
3898 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3899 new_recent_alloc, new_strategy_delta,
3900 scans_per_alloc, smoothed_density);
3901#endif
3902 }
3903
3904
3905 return (bufs_to_lap == 0 && recent_alloc == 0);
3906}
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922static int
3924{
3926 int result = 0;
3929
3930
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3944
3947 {
3949 }
3950 else if (skip_recently_used)
3951 {
3952
3954 return result;
3955 }
3956
3958 {
3959
3961 return result;
3962 }
3963
3964
3965
3966
3967
3970
3972
3974
3975 tag = bufHdr->tag;
3976
3978
3979
3980
3981
3982
3984
3986}
3987
3988
3989
3990
3991
3992
3993
3994
3995void
3997{
3999
4001
4003}
4004
4005
4006
4007
4008
4009
4010
4011
4012void
4014{
4016
4017
4018
4019
4020
4021
4022
4023
4025
4027
4030
4033
4034
4035
4036
4037
4040}
4041
4042
4043
4044
4045
4046static void
4048{
4050
4052
4053
4055}
4056
4057
4058
4059
4060
4061
4062
4063
4064static void
4066{
4067#ifdef USE_ASSERT_CHECKING
4068 int RefCountErrors = 0;
4070 int i;
4071 char *s;
4072
4073
4075 {
4077
4079 {
4081 elog(WARNING, "buffer refcount leak: %s", s);
4083
4084 RefCountErrors++;
4085 }
4086 }
4087
4088
4090 {
4092
4095 {
4097 elog(WARNING, "buffer refcount leak: %s", s);
4099 RefCountErrors++;
4100 }
4101 }
4102
4103 Assert(RefCountErrors == 0);
4104#endif
4105}
4106
4107#ifdef USE_ASSERT_CHECKING
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125void
4126AssertBufferLocksPermitCatalogRead(void)
4127{
4129}
4130
4131static void
4133 void *unused_context)
4134{
4137 Oid relid;
4138
4140 return;
4141
4144 return;
4145
4147 ((char *) lock - offsetof(BufferDesc, content_lock));
4148 tag = bufHdr->tag;
4149
4150
4151
4152
4153
4154
4155
4156
4157
4159
4161 return;
4162
4164
4166}
4167#endif
4168
4169
4170
4171
4172
4173char *
4175{
4178 char *result;
4181
4184 {
4188 }
4189 else
4190 {
4194 }
4195
4196
4198
4199 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
4205 return result;
4206}
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216void
4218{
4220}
4221
4222
4223
4224
4225
4226
4227
4228
4229
4232{
4234
4236
4239 else
4241
4242
4244}
4245
4246
4247
4248
4249
4250
4251void
4254{
4256
4257
4259
4262 else
4264
4265
4269}
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290static void
4293{
4298 char *bufToWrite;
4300
4301
4302
4303
4304
4305
4307 return;
4308
4309
4314
4315
4316 if (reln == NULL)
4318
4320 buf->tag.blockNum,
4324
4326
4327
4328
4329
4330
4332
4333
4334 buf_state &= ~BM_JUST_DIRTIED;
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4356
4357
4358
4359
4360
4361
4362
4364
4365
4366
4367
4368
4369
4371
4373
4374
4375
4376
4379 buf->tag.blockNum,
4380 bufToWrite,
4381 false);
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4403
4405
4406
4407
4408
4409
4411
4413 buf->tag.blockNum,
4417
4418
4420}
4421
4422
4423
4424
4425
4426
4427
4428
4429
4432{
4433 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4434 {
4435
4436
4437
4438
4439
4440
4442
4444
4445 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4446 }
4447 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4448 {
4450 }
4451 else
4453
4454 return 0;
4455}
4456
4457
4458
4459
4460
4461
4462bool
4464{
4466
4467
4469 return false;
4470
4471
4474
4475
4476
4477
4478
4479
4480
4481
4484}
4485
4486
4487
4488
4489
4490
4491
4494{
4499
4500
4501
4502
4505
4506
4509
4514
4515 return lsn;
4516}
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539void
4542{
4543 int i;
4544 int j;
4547 uint64 nBlocksToInvalidate = 0;
4548
4550
4551
4553 {
4555 {
4556 for (j = 0; j < nforks; j++)
4558 firstDelBlock[j]);
4559 }
4560 return;
4561 }
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585 for (i = 0; i < nforks; i++)
4586 {
4587
4589
4591 {
4593 break;
4594 }
4595
4596
4597 nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4598 }
4599
4600
4601
4602
4603
4606 {
4607 for (j = 0; j < nforks; j++)
4609 nForkBlock[j], firstDelBlock[j]);
4610 return;
4611 }
4612
4614 {
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4635 continue;
4636
4638
4639 for (j = 0; j < nforks; j++)
4640 {
4644 {
4646 break;
4647 }
4648 }
4649 if (j >= nforks)
4651 }
4652}
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662void
4664{
4665 int i;
4666 int n = 0;
4669 uint64 nBlocksToInvalidate = 0;
4671 bool cached = true;
4672 bool use_bsearch;
4673
4674 if (nlocators == 0)
4675 return;
4676
4677 rels = palloc(sizeof(SMgrRelation) * nlocators);
4678
4679
4680 for (i = 0; i < nlocators; i++)
4681 {
4683 {
4684 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4686 }
4687 else
4688 rels[n++] = smgr_reln[i];
4689 }
4690
4691
4692
4693
4694
4695 if (n == 0)
4696 {
4698 return;
4699 }
4700
4701
4702
4703
4704
4707
4708
4709
4710
4711
4712 for (i = 0; i < n && cached; i++)
4713 {
4715 {
4716
4718
4719
4721 {
4723 continue;
4724 cached = false;
4725 break;
4726 }
4727
4728
4729 nBlocksToInvalidate += block[i][j];
4730 }
4731 }
4732
4733
4734
4735
4736
4738 {
4740 {
4742 {
4743
4745 continue;
4746
4747
4750 }
4751 }
4752
4755 return;
4756 }
4757
4761 locators[i] = rels[i]->smgr_rlocator.locator;
4762
4763
4764
4765
4766
4767
4768
4770
4771
4772 if (use_bsearch)
4774
4776 {
4780
4781
4782
4783
4784
4785
4786 if (!use_bsearch)
4787 {
4788 int j;
4789
4791 {
4793 {
4794 rlocator = &locators[j];
4795 break;
4796 }
4797 }
4798 }
4799 else
4800 {
4802
4804 rlocator = bsearch(&locator,
4807 }
4808
4809
4810 if (rlocator == NULL)
4811 continue;
4812
4816 else
4818 }
4819
4820 pfree(locators);
4822}
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833static void
4837{
4839
4840 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4841 {
4842 uint32 bufHash;
4843 BufferTag bufTag;
4844 LWLock *bufPartitionLock;
4845 int buf_id;
4848
4849
4850 InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4851
4852
4855
4856
4860
4861 if (buf_id < 0)
4862 continue;
4863
4865
4866
4867
4868
4869
4870
4871
4873
4878 else
4880 }
4881}
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894void
4896{
4897 int i;
4898
4899
4900
4901
4902
4903
4905 {
4908
4909
4910
4911
4912
4913 if (bufHdr->tag.dbOid != dbid)
4914 continue;
4915
4917 if (bufHdr->tag.dbOid == dbid)
4919 else
4921 }
4922}
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942void
4944{
4945 int i;
4948
4950 {
4952 {
4954
4959 {
4961
4962
4964 errcallback.arg = bufHdr;
4967
4968
4971
4972
4973
4974
4975
4977
4978
4980
4982
4983
4985 }
4986 }
4987
4988 return;
4989 }
4990
4992 {
4994
4996
4997
4998
4999
5000
5002 continue;
5003
5004
5007
5011 {
5017 }
5018 else
5020 }
5021}
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032void
5034{
5035 int i;
5037 bool use_bsearch;
5038
5039 if (nrels == 0)
5040 return;
5041
5042
5044
5045 for (i = 0; i < nrels; i++)
5046 {
5048
5050 srels[i].srel = smgrs[i];
5051 }
5052
5053
5054
5055
5056
5058
5059
5060 if (use_bsearch)
5062
5064 {
5068
5069
5070
5071
5072
5073
5074 if (!use_bsearch)
5075 {
5076 int j;
5077
5078 for (j = 0; j < nrels; j++)
5079 {
5081 {
5082 srelent = &srels[j];
5083 break;
5084 }
5085 }
5086 }
5087 else
5088 {
5090
5092 srelent = bsearch(&rlocator,
5095 }
5096
5097
5098 if (srelent == NULL)
5099 continue;
5100
5101
5104
5108 {
5114 }
5115 else
5117 }
5118
5120}
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132static void
5136{
5139 Page srcPage;
5140 Page dstPage;
5141 bool use_wal;
5150
5151
5152
5153
5154
5155
5157
5158
5160 forkNum);
5161
5162
5163 if (nblocks == 0)
5164 return;
5165
5166
5167
5168
5169
5170 memset(buf.data, 0, BLCKSZ);
5172 buf.data, true);
5173
5174
5177
5178
5182
5183
5184
5185
5186
5189 bstrategy_src,
5190 src_smgr,
5191 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
5192 forkNum,
5194 &p,
5195 0);
5196
5197
5198 for (blkno = 0; blkno < nblocks; blkno++)
5199 {
5201
5202
5206
5210 permanent);
5212
5214
5215
5216 memcpy(dstPage, srcPage, BLCKSZ);
5218
5219
5220 if (use_wal)
5222
5224
5227 }
5230
5233}
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246void
5249{
5250 char relpersistence;
5253
5254
5255 relpersistence = permanent ?
5256 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
5257
5260
5261
5262
5263
5264
5265
5266
5268
5269
5271 permanent);
5272
5273
5276 {
5278 {
5279 smgrcreate(dst_rel, forkNum, false);
5280
5281
5282
5283
5284
5287
5288
5290 permanent);
5291 }
5292 }
5293}
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310void
5312{
5313 int i;
5315
5317 {
5319
5321
5322
5323
5324
5325
5326 if (bufHdr->tag.dbOid != dbid)
5327 continue;
5328
5329
5332
5334 if (bufHdr->tag.dbOid == dbid &&
5336 {
5342 }
5343 else
5345 }
5346}
5347
5348
5349
5350
5351
5352void
5354{
5356
5357
5359
5361
5363
5365
5367}
5368
5369
5370
5371
5372void
5374{
5377
5380 else
5382}
5383
5384
5385
5386
5387
5388
5389void
5391{
5394}
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404void
5406{
5411 else
5412 {
5414
5416 Assert(ref != NULL);
5418 }
5420}
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436void
5438{
5441
5444
5446 {
5448 return;
5449 }
5450
5452
5454
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5470 {
5472 bool dirtied = false;
5473 bool delayChkptFlags = false;
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5487 {
5488
5489
5490
5491
5492
5493
5494
5495
5498 return;
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5525 delayChkptFlags = true;
5527 }
5528
5530
5532
5533 if (!(buf_state & BM_DIRTY))
5534 {
5535 dirtied = true;
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5552 }
5553
5556
5557 if (delayChkptFlags)
5559
5560 if (dirtied)
5561 {
5565 }
5566 }
5567}
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578void
5580{
5582
5583 if (buf)
5584 {
5586
5588
5589
5590
5591
5592
5595 buf_state &= ~BM_PIN_COUNT_WAITER;
5596
5598
5600 }
5601}
5602
5603
5604
5605
5606void
5608{
5610
5613 return;
5614
5616
5623 else
5624 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5625}
5626
5627
5628
5629
5630
5631
5632bool
5634{
5636
5639 return true;
5640
5642
5645}
5646
5647
5648
5649
5650
5651
5652
5653void
5655{
5657 {
5659 elog(ERROR, "incorrect local pin count: %d",
5661 }
5662 else
5663 {
5665 elog(ERROR, "incorrect local pin count: %d",
5667 }
5668}
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686void
5688{
5692 bool logged_recovery_conflict = false;
5693
5696
5698
5699
5700
5701
5702
5703
5704
5705
5706
5708 return;
5709
5711
5712 for (;;)
5713 {
5715
5716
5719
5722 {
5723
5725
5726
5727
5728
5729
5730
5731 if (logged_recovery_conflict)
5734 NULL, false);
5735
5737 {
5738
5741 }
5742 return;
5743 }
5744
5746 {
5749 elog(ERROR, "multiple backends attempting to wait for pincount 1");
5750 }
5756
5757
5759 {
5761 {
5762
5765 }
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775 if (waitStart != 0 && !logged_recovery_conflict)
5776 {
5778
5781 {
5783 waitStart, now, NULL, true);
5784 logged_recovery_conflict = true;
5785 }
5786 }
5787
5788
5789
5790
5791
5794
5795
5797
5799
5801 }
5802 else
5804
5805
5806
5807
5808
5809
5810
5811
5812
5816 buf_state &= ~BM_PIN_COUNT_WAITER;
5818
5820
5821 }
5822}
5823
5824
5825
5826
5827
5828bool
5830{
5832
5833
5834
5835
5836
5837
5838
5839 if (bufid < 0)
5840 return false;
5841
5843 return true;
5844
5845 return false;
5846}
5847
5848
5849
5850
5851
5852
5853
5854bool
5856{
5860
5862
5863
5864
5866 {
5868
5871 return false;
5872
5873 return true;
5874 }
5875
5876
5880 return false;
5881
5882
5884 return false;
5885
5889
5892 {
5893
5895 return true;
5896 }
5897
5898
5901 return false;
5902}
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912bool
5914{
5917
5919
5920
5921
5923 {
5924
5926 return false;
5927
5928 return true;
5929 }
5930
5931
5933 return false;
5934
5936
5937
5940
5942
5945 {
5946
5948 return true;
5949 }
5950
5952 return false;
5953}
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965static void
5967{
5969
5971 for (;;)
5972 {
5975
5976
5977
5978
5979
5980
5982
5983
5984
5985
5986
5987
5988 iow = buf->io_wref;
5990
5991
5993 break;
5994
5995
5996
5997
5998
6000 {
6002
6003
6004
6005
6006
6007
6008
6009
6010
6012 continue;
6013 }
6014
6015
6017 }
6019}
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044bool
6046{
6048
6050
6051 for (;;)
6052 {
6054
6056 break;
6058 if (nowait)
6059 return false;
6061 }
6062
6063
6064
6065
6066 if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
6067 {
6069 return false;
6070 }
6071
6074
6077
6078 return true;
6079}
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101void
6103 bool forget_owner, bool release_aio)
6104{
6106
6108
6110 buf_state &= ~BM_IO_IN_PROGRESS;
6111
6112
6113 buf_state &= ~BM_IO_ERROR;
6114
6117
6118 if (release_aio)
6119 {
6120
6124 }
6125
6126 buf_state |= set_flag_bits;
6128
6129 if (forget_owner)
6132
6134
6135
6136
6137
6138
6139
6140
6141
6142
6145}
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160static void
6162{
6165
6168
6169 if (!(buf_state & BM_VALID))
6170 {
6173 }
6174 else
6175 {
6178
6179
6181 {
6182
6184 (errcode(ERRCODE_IO_ERROR),
6185 errmsg("could not write block %u of %s",
6189 errdetail("Multiple failures --- write error might be permanent.")));
6190 }
6191 }
6192
6194}
6195
6196
6197
6198
6199static void
6201{
6203
6204
6205 if (bufHdr != NULL)
6206 errcontext("writing block %u of relation %s",
6210}
6211
6212
6213
6214
6215static void
6217{
6219
6220 if (bufHdr != NULL)
6221 errcontext("writing block %u of relation %s",
6226}
6227
6228
6229
6230
6231static int
6233{
6236
6238 return -1;
6240 return 1;
6241
6243 return -1;
6245 return 1;
6246
6248 return -1;
6250 return 1;
6251 else
6252 return 0;
6253}
6254
6255
6256
6257
6260{
6262 uint32 old_buf_state;
6263
6265
6267
6268 while (true)
6269 {
6270
6272
6273 if (!(old_buf_state & BM_LOCKED))
6274 break;
6276 }
6278 return old_buf_state | BM_LOCKED;
6279}
6280
6281
6282
6283
6284
6285
6286
6287
6290{
6293
6295
6297
6299 {
6302 }
6303
6305
6306 return buf_state;
6307}
6308
6309
6310
6311
6312static inline int
6314{
6315 int ret;
6318
6321
6323
6324 if (ret != 0)
6325 return ret;
6326
6328 return -1;
6330 return 1;
6331
6333 return -1;
6335 return 1;
6336
6337 return 0;
6338}
6339
6340
6341
6342
6343
6344
6345
6346static inline int
6348{
6349
6351 return -1;
6352 else if (a->tsId > b->tsId)
6353 return 1;
6354
6355 if (a->relNumber < b->relNumber)
6356 return -1;
6357 else if (a->relNumber > b->relNumber)
6358 return 1;
6359
6360 else if (a->forkNum < b->forkNum)
6361 return -1;
6362 else if (a->forkNum > b->forkNum)
6363 return 1;
6364
6365 else if (a->blockNum < b->blockNum)
6366 return -1;
6367 else if (a->blockNum > b->blockNum)
6368 return 1;
6369
6370 return 0;
6371}
6372
6373
6374
6375
6376
6377static int
6379{
6382
6383
6385 return 1;
6386 else if (sa->progress == sb->progress)
6387 return 0;
6388 else
6389 return -1;
6390}
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400void
6402{
6404
6407}
6408
6409
6410
6411
6412void
6415{
6417
6418
6419
6420
6421
6424 return;
6425
6426
6427
6428
6429
6431 {
6433
6435
6436 pending->tag = *tag;
6437 }
6438
6439
6440
6441
6442
6443
6446}
6447
6448#define ST_SORT sort_pending_writebacks
6449#define ST_ELEMENT_TYPE PendingWriteback
6450#define ST_COMPARE(a, b) buffertag_comparator(&a->tag, &b->tag)
6451#define ST_SCOPE static
6452#define ST_DEFINE
6454
6455
6456
6457
6458
6459
6460
6461
6462void
6464{
6466 int i;
6467
6469 return;
6470
6471
6472
6473
6474
6477
6479
6480
6481
6482
6483
6484
6486 {
6490 int ahead;
6493 Size nblocks = 1;
6494
6496 tag = cur->tag;
6498
6499
6500
6501
6502
6503 for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
6504 {
6505
6507
6508
6512 break;
6513
6514
6515 if (cur->tag.blockNum == next->tag.blockNum)
6516 continue;
6517
6518
6519 if (cur->tag.blockNum + 1 != next->tag.blockNum)
6520 break;
6521
6522 nblocks++;
6524 }
6525
6526 i += ahead;
6527
6528
6531 }
6532
6533
6534
6535
6536
6539
6541}
6542
6543
6544
6545static void
6547{
6549
6551}
6552
6553static char *
6555{
6557
6558 return psprintf("lost track of buffer IO on buffer %d", buffer);
6559}
6560
6561static void
6563{
6565
6566
6569
6572 else
6574}
6575
6576static char *
6578{
6580}
6581
6582
6583
6584
6585
6586static bool
6588{
6590 bool result;
6591
6592 *buffer_flushed = false;
6593
6596
6597 if ((buf_state & BM_VALID) == 0)
6598 {
6600 return false;
6601 }
6602
6603
6605 {
6607 return false;
6608 }
6609
6611
6612
6614 {
6617 *buffer_flushed = true;
6619 }
6620
6621
6623
6625
6626 return result;
6627}
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650bool
6652{
6654
6656
6657
6660
6663
6665}
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679void
6681 int32 *buffers_skipped)
6682{
6683 *buffers_evicted = 0;
6684 *buffers_skipped = 0;
6685 *buffers_flushed = 0;
6686
6688 {
6691 bool buffer_flushed;
6692
6694 if (!(buf_state & BM_VALID))
6695 continue;
6696
6699
6701
6703 (*buffers_evicted)++;
6704 else
6705 (*buffers_skipped)++;
6706
6707 if (buffer_flushed)
6708 (*buffers_flushed)++;
6709 }
6710}
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727void
6729 int32 *buffers_flushed, int32 *buffers_skipped)
6730{
6732
6733 *buffers_skipped = 0;
6734 *buffers_evicted = 0;
6735 *buffers_flushed = 0;
6736
6738 {
6741 bool buffer_flushed;
6742
6743
6744 if ((buf_state & BM_VALID) == 0 ||
6746 continue;
6747
6748
6751
6753
6754
6755 if ((buf_state & BM_VALID) == 0 ||
6757 {
6759 continue;
6760 }
6761
6763 (*buffers_evicted)++;
6764 else
6765 (*buffers_skipped)++;
6766
6767 if (buffer_flushed)
6768 (*buffers_flushed)++;
6769 }
6770}
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6788{
6790 uint8 handle_data_len;
6793
6795
6797
6798
6799 for (int i = 0; i < handle_data_len; i++)
6800 {
6806
6807
6808
6809
6810
6811
6812
6813
6814 if (i == 0)
6815 first = buf_hdr->tag;
6816 else
6817 {
6820 }
6821
6822 if (is_temp)
6824 else
6826
6827
6829 if (is_write)
6830 {
6833 }
6834 else
6835 {
6838 }
6839
6840
6841 if (!is_temp)
6843
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6857 buf_hdr->io_wref = io_ref;
6858
6859 if (is_temp)
6861 else
6863
6864
6865
6866
6867
6868
6869 if (is_write && !is_temp)
6870 {
6871 LWLock *content_lock;
6872
6874
6876
6877
6878
6879
6881 }
6882
6883
6884
6885
6886
6887 if (!is_temp)
6889 }
6890}
6891
6892
6893
6894
6895static inline void
6897 bool *zeroed_any,
6898 bool *ignored_any,
6899 uint8 *zeroed_or_error_count,
6900 uint8 *checkfail_count,
6901 uint8 *first_off)
6902{
6904
6905
6906#define READV_COUNT_BITS 7
6907#define READV_COUNT_MASK ((1 << READV_COUNT_BITS) - 1)
6908
6909 *zeroed_any = rem_error & 1;
6910 rem_error >>= 1;
6911
6912 *ignored_any = rem_error & 1;
6913 rem_error >>= 1;
6914
6917
6920
6923}
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937static inline void
6939 bool is_temp,
6940 bool zeroed_any,
6941 bool ignored_any,
6942 uint8 error_count,
6943 uint8 zeroed_count,
6944 uint8 checkfail_count,
6945 uint8 first_error_off,
6946 uint8 first_zeroed_off,
6947 uint8 first_ignored_off)
6948{
6949
6950 uint8 shift = 0;
6951 uint8 zeroed_or_error_count =
6952 error_count > 0 ? error_count : zeroed_count;
6953 uint8 first_off;
6954
6956 "PG_IOV_MAX is bigger than reserved space for error data");
6958 "PGAIO_RESULT_ERROR_BITS is insufficient for buffer_readv");
6959
6960
6961
6962
6963
6964
6965 if (error_count > 0)
6966 first_off = first_error_off;
6967 else if (zeroed_count > 0)
6968 first_off = first_zeroed_off;
6969 else
6970 first_off = first_ignored_off;
6971
6972 Assert(!zeroed_any || error_count == 0);
6973
6975
6976 result->error_data |= zeroed_any << shift;
6977 shift += 1;
6978
6979 result->error_data |= ignored_any << shift;
6980 shift += 1;
6981
6982 result->error_data |= ((uint32) zeroed_or_error_count) << shift;
6984
6987
6990
6993
6994 if (error_count > 0)
6996 else
6998
6999
7000
7001
7002
7003#ifdef USE_ASSERT_CHECKING
7004 {
7005 bool zeroed_any_2,
7006 ignored_any_2;
7007 uint8 zeroed_or_error_count_2,
7008 checkfail_count_2,
7009 first_off_2;
7010
7012 &zeroed_any_2, &ignored_any_2,
7013 &zeroed_or_error_count_2,
7014 &checkfail_count_2,
7015 &first_off_2);
7016 Assert(zeroed_any == zeroed_any_2);
7017 Assert(ignored_any == ignored_any_2);
7018 Assert(zeroed_or_error_count == zeroed_or_error_count_2);
7019 Assert(checkfail_count == checkfail_count_2);
7020 Assert(first_off == first_off_2);
7021 }
7022#endif
7023
7024#undef READV_COUNT_BITS
7025#undef READV_COUNT_MASK
7026}
7027
7028
7029
7030
7031
7034 uint8 flags, bool failed, bool is_temp,
7035 bool *buffer_invalid,
7036 bool *failed_checksum,
7037 bool *ignored_checksum,
7038 bool *zeroed_buffer)
7039{
7045 uint32 set_flag_bits;
7046 int piv_flags;
7047
7048
7049#ifdef USE_ASSERT_CHECKING
7050 {
7052
7055
7056 if (!is_temp)
7059 }
7060#endif
7061
7062 *buffer_invalid = false;
7063 *failed_checksum = false;
7064 *ignored_checksum = false;
7065 *zeroed_buffer = false;
7066
7067
7068
7069
7070
7071
7073
7074
7077
7078
7079 if (!failed)
7080 {
7081
7082
7083
7084
7085
7086
7087
7088
7089#ifdef USE_VALGRIND
7092#endif
7093
7095 failed_checksum))
7096 {
7098 {
7099 memset(bufdata, 0, BLCKSZ);
7100 *zeroed_buffer = true;
7101 }
7102 else
7103 {
7104 *buffer_invalid = true;
7105
7106 failed = true;
7107 }
7108 }
7109 else if (*failed_checksum)
7110 *ignored_checksum = true;
7111
7112
7113#ifdef USE_VALGRIND
7116#endif
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132 if (*buffer_invalid || *failed_checksum || *zeroed_buffer)
7133 {
7135
7137 *zeroed_buffer,
7138 *ignored_checksum,
7139 *buffer_invalid,
7140 *zeroed_buffer ? 1 : 0,
7141 *failed_checksum ? 1 : 0,
7142 buf_off, buf_off, buf_off);
7144 }
7145 }
7146
7147
7149 if (is_temp)
7151 else
7153
7154
7155
7156
7157
7158
7159
7160
7161 TRACE_POSTGRESQL_BUFFER_READ_DONE(tag.forkNum,
7167 false);
7168}
7169
7170
7171
7172
7173
7174
7175
7178 uint8 cb_data, bool is_temp)
7179{
7182 uint8 first_error_off = 0;
7183 uint8 first_zeroed_off = 0;
7184 uint8 first_ignored_off = 0;
7185 uint8 error_count = 0;
7186 uint8 zeroed_count = 0;
7187 uint8 ignored_count = 0;
7188 uint8 checkfail_count = 0;
7190 uint8 handle_data_len;
7191
7192 if (is_temp)
7193 {
7196 }
7197 else
7199
7200
7201
7202
7203
7205 for (uint8 buf_off = 0; buf_off < handle_data_len; buf_off++)
7206 {
7208 bool failed;
7209 bool failed_verification = false;
7210 bool failed_checksum = false;
7211 bool zeroed_buffer = false;
7212 bool ignored_checksum = false;
7213
7215
7216
7217
7218
7219
7220
7221 failed =
7223 || prior_result.result <= buf_off;
7224
7226 &failed_verification,
7227 &failed_checksum,
7228 &ignored_checksum,
7229 &zeroed_buffer);
7230
7231
7232
7233
7234
7235
7236 if (failed_verification && !zeroed_buffer && error_count++ == 0)
7237 first_error_off = buf_off;
7238 if (zeroed_buffer && zeroed_count++ == 0)
7239 first_zeroed_off = buf_off;
7240 if (ignored_checksum && ignored_count++ == 0)
7241 first_ignored_off = buf_off;
7242 if (failed_checksum)
7243 checkfail_count++;
7244 }
7245
7246
7247
7248
7249
7251 (error_count > 0 || ignored_count > 0 || zeroed_count > 0))
7252 {
7254 zeroed_count > 0, ignored_count > 0,
7255 error_count, zeroed_count, checkfail_count,
7256 first_error_off, first_zeroed_off,
7257 first_ignored_off);
7259 }
7260
7261
7262
7263
7264
7265 if (is_temp && checkfail_count > 0)
7267 checkfail_count);
7268
7269 return result;
7270}
7271
7272
7273
7274
7275
7276
7277
7278
7279static void
7281 int elevel)
7282{
7290 bool zeroed_any,
7291 ignored_any;
7292 uint8 zeroed_or_error_count,
7293 checkfail_count,
7294 first_off;
7295 uint8 affected_count;
7296 const char *msg_one,
7297 *msg_mult,
7298 *det_mult,
7299 *hint_mult;
7300
7302 &zeroed_or_error_count,
7303 &checkfail_count,
7304 &first_off);
7305
7306
7307
7308
7309
7310
7311 if (zeroed_any && ignored_any)
7312 {
7313 Assert(zeroed_any && ignored_any);
7314 Assert(nblocks > 1);
7316 affected_count = zeroed_or_error_count;
7317
7320 errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation %s",
7321 affected_count, checkfail_count, first, last, rpath.str),
7322 affected_count > 1 ?
7323 errdetail("Block %u held first zeroed page.",
7324 first + first_off) : 0,
7325 errhint("See server log for details about the other %u invalid block(s).",
7326 affected_count + checkfail_count - 1));
7327 return;
7328 }
7329
7330
7331
7332
7333
7334
7336 {
7337 Assert(!zeroed_any);
7338 affected_count = zeroed_or_error_count;
7339 msg_one = _("invalid page in block %u of relation %s");
7340 msg_mult = _("%u invalid pages among blocks %u..%u of relation %s");
7341 det_mult = _("Block %u held first invalid page.");
7342 hint_mult = _("See server log for the other %u invalid block(s).");
7343 }
7344 else if (zeroed_any && !ignored_any)
7345 {
7346 affected_count = zeroed_or_error_count;
7347 msg_one = _("invalid page in block %u of relation %s; zeroing out page");
7348 msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation %s");
7349 det_mult = _("Block %u held first zeroed page.");
7350 hint_mult = _("See server log for the other %u zeroed block(s).");
7351 }
7352 else if (!zeroed_any && ignored_any)
7353 {
7354 affected_count = checkfail_count;
7355 msg_one = _("ignoring checksum failure in block %u of relation %s");
7356 msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation %s");
7357 det_mult = _("Block %u held first ignored page.");
7358 hint_mult = _("See server log for the other %u ignored block(s).");
7359 }
7360 else
7362
7365 affected_count == 1 ?
7368 affected_count > 1 ? errdetail_internal(det_mult, first + first_off) : 0,
7369 affected_count > 1 ? errhint_internal(hint_mult, affected_count - 1) : 0);
7370}
7371
7372static void
7374{
7376}
7377
7381{
7383}
7384
7385
7386
7387
7388
7389
7390
7391
7395{
7396 bool zeroed_any,
7397 ignored_any;
7398 uint8 zeroed_or_error_count,
7399 checkfail_count,
7400 first_off;
7401
7403 return prior_result;
7404
7406 &zeroed_any,
7407 &ignored_any,
7408 &zeroed_or_error_count,
7409 &checkfail_count,
7410 &first_off);
7411
7412 if (checkfail_count)
7413 {
7415
7417 checkfail_count);
7418 }
7419
7420 return prior_result;
7421}
7422
7423static void
7425{
7427}
7428
7432{
7434}
7435
7436
7440
7443};
7444
7445
7448
7449
7450
7451
7452
7453
7454
7457};
bool pgaio_wref_valid(PgAioWaitRef *iow)
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
void pgaio_wref_clear(PgAioWaitRef *iow)
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
bool pgaio_have_staged(void)
bool pgaio_wref_check_done(PgAioWaitRef *iow)
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
void pgaio_submit_staged(void)
void pgaio_wref_wait(PgAioWaitRef *iow)
void pgaio_io_release(PgAioHandle *ioh)
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
@ PGAIO_HCB_LOCAL_BUFFER_READV
@ PGAIO_HCB_SHARED_BUFFER_READV
@ PGAIO_HF_REFERENCES_LOCAL
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
#define PGAIO_RESULT_ERROR_BITS
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
TimestampTz GetCurrentTimestamp(void)
Datum now(PG_FUNCTION_ARGS)
void binaryheap_build(binaryheap *heap)
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
bh_node_type binaryheap_first(binaryheap *heap)
bh_node_type binaryheap_remove_first(binaryheap *heap)
void binaryheap_free(binaryheap *heap)
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
#define binaryheap_empty(h)
#define InvalidBlockNumber
static bool BlockNumberIsValid(BlockNumber blockNumber)
#define BufferIsLocal(buffer)
CkptSortItem * CkptBufferIds
WritebackContext BackendWritebackContext
BufferDescPadded * BufferDescriptors
#define BM_MAX_USAGE_COUNT
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BUF_USAGECOUNT_MASK
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BM_PIN_COUNT_WAITER
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)
#define BUF_STATE_GET_USAGECOUNT(state)
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
#define BM_IO_IN_PROGRESS
static void ClearBufferTag(BufferTag *tag)
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
static void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
#define BUF_USAGECOUNT_ONE
#define BUF_STATE_GET_REFCOUNT(state)
static LWLock * BufMappingPartitionLock(uint32 hashcode)
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
static BufferDesc * GetBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
#define BM_CHECKPOINT_NEEDED
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
uint32 BufTableHashCode(BufferTag *tagPtr)
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
void CheckBufferIsPinnedOnce(Buffer buffer)
void FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
void IncrBufferRefCount(Buffer buffer)
void DropDatabaseBuffers(Oid dbid)
static int ckpt_buforder_comparator(const CkptSortItem *a, const CkptSortItem *b)
static pg_attribute_always_inline PgAioResult buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
bool BufferIsExclusiveLocked(Buffer buffer)
const ResourceOwnerDesc buffer_pin_resowner_desc
BlockNumber BufferGetBlockNumber(Buffer buffer)
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
static bool ReadBuffersCanStartIO(Buffer buffer, bool nowait)
void DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
static PgAioResult shared_buffer_readv_complete_local(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
static uint32 PrivateRefCountClock
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
static void ResOwnerReleaseBufferIO(Datum res)
static PgAioResult local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
bool StartReadBuffers(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
void EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
const ResourceOwnerDesc buffer_io_resowner_desc
#define BUF_DROP_FULL_SCAN_THRESHOLD
static void PinBuffer_Locked(BufferDesc *buf)
void EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
static pg_attribute_always_inline void buffer_readv_complete_one(PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
static int buffertag_comparator(const BufferTag *ba, const BufferTag *bb)
bool IsBufferCleanupOK(Buffer buffer)
#define BufferGetLSN(bufHdr)
static char * ResOwnerPrintBufferIO(Datum res)
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
void AtEOXact_Buffers(bool isCommit)
static void AbortBufferIO(Buffer buffer)
const PgAioHandleCallbacks aio_shared_buffer_readv_cb
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
static void CheckForBufferLeaks(void)
static bool ReadBuffersCanStartIOOnce(Buffer buffer, bool nowait)
void CreateAndCopyRelationData(RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
void DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
static int rlocator_comparator(const void *p1, const void *p2)
Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
struct SMgrSortArray SMgrSortArray
const PgAioHandleCallbacks aio_local_buffer_readv_cb
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
static void AtProcExit_Buffers(int code, Datum arg)
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
#define BufHdrGetBlock(bufHdr)
static pg_attribute_always_inline void buffer_stage_common(PgAioHandle *ioh, bool is_write, bool is_temp)
static void local_buffer_write_error_callback(void *arg)
static void BufferSync(int flags)
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
static void local_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
char * DebugPrintBufferRefcount(Buffer buffer)
static char * ResOwnerPrintBufferPin(Datum res)
void CheckPointBuffers(int flags)
bool BufferIsDirty(Buffer buffer)
static uint32 MaxProportionalPins
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
bool BgBufferSync(WritebackContext *wb_context)
static void WakePinCountWaiter(BufferDesc *buf)
bool BufferIsPermanent(Buffer buffer)
#define REFCOUNT_ARRAY_ENTRIES
static void shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
static PgAioResult shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
bool ConditionalLockBuffer(Buffer buffer)
BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
void ReleaseBuffer(Buffer buffer)
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
XLogRecPtr BufferGetLSNAtomic(Buffer buffer)
bool HoldingBufferPinThatDelaysRecovery(void)
int checkpoint_flush_after
void UnlockReleaseBuffer(Buffer buffer)
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner, bool release_aio)
static void UnpinBufferNoOwner(BufferDesc *buf)
static void shared_buffer_write_error_callback(void *arg)
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
void WaitReadBuffers(ReadBuffersOperation *operation)
void WritebackContextInit(WritebackContext *context, int *max_pending)
void MarkBufferDirty(Buffer buffer)
#define BufferIsPinned(bufnum)
double bgwriter_lru_multiplier
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
void LimitAdditionalPins(uint32 *additional_pins)
static void buffer_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
static void ReservePrivateRefCountEntry(void)
static BufferDesc * PinCountWaitBuf
static int32 GetPrivateRefCount(Buffer buffer)
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
void LockBufferForCleanup(Buffer buffer)
void LockBuffer(Buffer buffer, int mode)
static PrivateRefCountEntry * ReservedRefCountEntry
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
void FlushRelationBuffers(Relation rel)
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed)
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
bool ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
#define RELS_BSEARCH_THRESHOLD
int maintenance_io_concurrency
static void UnpinBuffer(BufferDesc *buf)
void FlushDatabaseBuffers(Oid dbid)
static void InvalidateBuffer(BufferDesc *buf)
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
int effective_io_concurrency
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
struct PrivateRefCountEntry PrivateRefCountEntry
struct CkptTsStatus CkptTsStatus
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
uint32 LockBufHdr(BufferDesc *desc)
static void ResOwnerReleaseBufferPin(Datum res)
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
static void buffer_readv_decode_error(PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
void InitBufferManagerAccess(void)
static void buffer_readv_encode_error(PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
uint32 GetAdditionalPinLimit(void)
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
static HTAB * PrivateRefCountHash
static int32 PrivateRefCountOverflowed
bool ConditionalLockBufferForCleanup(Buffer buffer)
int bgwriter_lru_maxpages
static void WaitIO(BufferDesc *buf)
void FlushOneBuffer(Buffer buffer)
#define BUFFER_LOCK_UNLOCK
#define BUFFER_LOCK_SHARE
#define READ_BUFFERS_ZERO_ON_ERROR
static Page BufferGetPage(Buffer buffer)
#define DEFAULT_IO_COMBINE_LIMIT
static Block BufferGetBlock(Buffer buffer)
#define READ_BUFFERS_ISSUE_ADVICE
#define MAX_IO_COMBINE_LIMIT
#define DEFAULT_EFFECTIVE_IO_CONCURRENCY
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES
#define DEFAULT_MAINTENANCE_IO_CONCURRENCY
@ EB_CREATE_FORK_IF_NEEDED
#define READ_BUFFERS_SYNCHRONOUSLY
#define BUFFER_LOCK_EXCLUSIVE
@ RBM_ZERO_AND_CLEANUP_LOCK
static bool BufferIsValid(Buffer bufnum)
bool ignore_checksum_failure
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
static bool PageIsNew(const PageData *page)
static void PageSetLSN(Page page, XLogRecPtr lsn)
static XLogRecPtr PageGetLSN(const PageData *page)
#define PIV_IGNORE_CHECKSUM_FAILURE
#define PG_USED_FOR_ASSERTS_ONLY
#define pg_attribute_always_inline
#define MemSet(start, val, len)
#define StaticAssertStmt(condition, errmessage)
bool IsCatalogRelationOid(Oid relid)
bool IsCatalogTextUniqueIndexOid(Oid relid)
void CheckpointWriteDelay(int flags, double progress)
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
void * hash_seq_search(HASH_SEQ_STATUS *status)
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
int errmsg_internal(const char *fmt,...)
int errdetail_internal(const char *fmt,...)
int errdetail(const char *fmt,...)
ErrorContextCallback * error_context_stack
int errhint_internal(const char *fmt,...)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
void FreeAccessStrategy(BufferAccessStrategy strategy)
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
void StrategyFreeBuffer(BufferDesc *buf)
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
volatile sig_atomic_t ProcSignalBarrierPending
Assert(PointerIsAligned(start, uint64))
BufferUsage pgBufferUsage
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
if(TABLE==NULL||TABLE_index==NULL)
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
void UnpinLocalBuffer(Buffer buffer)
bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait)
void AtEOXact_LocalBuffers(bool isCommit)
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber firstDelBlock)
void AtProcExit_LocalBuffers(void)
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
void MarkLocalBufferDirty(Buffer buffer)
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, uint32 set_flag_bits, bool release_aio)
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
void UnpinLocalBufferNoOwner(Buffer buffer)
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
bool LWLockHeldByMe(LWLock *lock)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
void LWLockDisown(LWLock *lock)
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
void LWLockRelease(LWLock *lock)
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
void ForEachLWLockHeldByMe(void(*callback)(LWLock *, LWLockMode, void *), void *context)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
#define START_CRIT_SECTION()
#define CHECK_FOR_INTERRUPTS()
#define END_CRIT_SECTION()
#define ERRCODE_DATA_CORRUPTED
static PgChecksumMode mode
static int64 current_size
#define WRITEBACK_MAX_PENDING_FLUSHES
#define DEFAULT_BACKEND_FLUSH_AFTER
#define DEFAULT_CHECKPOINT_FLUSH_AFTER
#define DEFAULT_BGWRITER_FLUSH_AFTER
#define pgstat_count_buffer_read(rel)
#define pgstat_count_buffer_hit(rel)
PgStat_BgWriterStats PendingBgWriterStats
PgStat_CheckpointerStats PendingCheckpointerStats
void pgstat_prepare_report_checksum_failure(Oid dboid)
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
instr_time pgstat_prepare_io_time(bool track_io_guc)
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
#define qsort(a, b, c, d)
static Datum PointerGetDatum(const void *X)
static Pointer DatumGetPointer(Datum X)
static int32 DatumGetInt32(Datum X)
#define NUM_AUXILIARY_PROCS
#define DELAY_CHKPT_START
#define INVALID_PROC_NUMBER
void ProcessProcSignalBarrier(void)
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
void set_ps_display_remove_suffix(void)
void set_ps_display_suffix(const char *suffix)
char * psprintf(const char *fmt,...)
ReadStream * read_stream_begin_smgr_relation(int flags, BufferAccessStrategy strategy, SMgrRelation smgr, char smgr_persistence, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_USE_BATCHING
static unsigned hash(unsigned *uv, int n)
static SMgrRelation RelationGetSmgr(Relation rel)
#define RelationUsesLocalBuffers(relation)
#define RELATION_IS_OTHER_TEMP(relation)
#define RelationIsValid(relation)
#define RelFileLocatorBackendIsTemp(rlocator)
#define RelFileLocatorEquals(locator1, locator2)
#define relpath(rlocator, forknum)
#define relpathbackend(rlocator, backend, forknum)
#define relpathperm(rlocator, forknum)
ResourceOwner CurrentResourceOwner
void ResourceOwnerEnlarge(ResourceOwner owner)
#define RELEASE_PRIO_BUFFER_IOS
@ RESOURCE_RELEASE_BEFORE_LOCKS
#define RELEASE_PRIO_BUFFER_PINS
void perform_spin_delay(SpinDelayStatus *status)
void finish_spin_delay(SpinDelayStatus *status)
#define init_local_spin_delay(status)
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
void smgrstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
void ProcSendSignal(ProcNumber procNumber)
int GetStartupBufferPinWaitBufId(void)
void SetStartupBufferPinWaitBufId(int bufid)
void ProcWaitForSignal(uint32 wait_event_info)
void ResolveRecoveryConflictWithBufferPin(void)
bool log_recovery_conflict_waits
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
BlockNumber last_exclusive
BlockNumber current_blocknum
int wait_backend_pgprocno
struct SMgrRelationData * smgr
int64 shared_blks_dirtied
int64 shared_blks_written
struct ErrorContextCallback * previous
void(* callback)(void *arg)
PgAioHandleCallbackStage stage
PgAioTargetData target_data
PgStat_Counter buf_written_clean
PgStat_Counter maxwritten_clean
PgStat_Counter buffers_written
BufferAccessStrategy strategy
struct SMgrRelationData * smgr
char str[REL_PATH_STR_MAXLEN+1]
RelFileLocator rd_locator
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
RelFileLocatorBackend smgr_rlocator
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
struct PgAioTargetData::@124 smgr
static volatile sig_atomic_t waiting
bool RecoveryInProgress(void)
bool XLogNeedsFlush(XLogRecPtr record)
CheckpointStatsData CheckpointStats
void XLogFlush(XLogRecPtr record)
#define CHECKPOINT_END_OF_RECOVERY
#define CHECKPOINT_FLUSH_ALL
#define CHECKPOINT_IS_SHUTDOWN
#define XLogHintBitIsNeeded()
#define XLogRecPtrIsInvalid(r)
#define InvalidXLogRecPtr
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)