PostgreSQL Source Code: src/backend/storage/buffer/bufmgr.c Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
36
39
43#ifdef USE_ASSERT_CHECKING
44#include "catalog/pg_tablespace_d.h"
45#endif
69
70
71
72#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
73#define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr)))
74
75
76#define LocalBufHdrGetBlock(bufHdr) \
77 LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
78
79
80#define BUF_WRITTEN 0x01
81#define BUF_REUSABLE 0x02
82
83#define RELS_BSEARCH_THRESHOLD 20
84
85
86
87
88
89
90
91#define BUF_DROP_FULL_SCAN_THRESHOLD (uint64) (NBuffers / 32)
92
94{
98
99
100#define REFCOUNT_ARRAY_ENTRIES 8
101
102
103
104
105
107{
108
110
111
112
113
114
115
116
117
120
121
123
125
126
129
130
131
132
133
134
135
136
138{
142
143
148
149
150
151
152
153
154
156
157
158
159
160
161
163
164
165
166
167
168
169
173
174
175
176
177
181
182
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
220
222
228
229
234
236{
237 .name = "buffer io",
242};
243
245{
246 .name = "buffer pin",
251};
252
253
254
255
256
257
258static void
260{
261
263 return;
264
265
266
267
268
269 {
270 int i;
271
273 {
275
277
279 {
281 return;
282 }
283 }
284 }
285
286
287
288
289
290 {
291
292
293
294
296 bool found;
297
298
301
302
304
305
309 &found);
312
313
316
318 }
319}
320
321
322
323
326{
328
329
331
332
335
336
339
340 return res;
341}
342
343
344
345
346
347
348
349
352{
354 int i;
355
358
359
360
361
362
364 {
366
368 return res;
369 }
370
371
372
373
374
375
376
377
379 return NULL;
380
382
383 if (res == NULL)
384 return NULL;
385 else if (!do_move)
386 {
387
388 return res;
389 }
390 else
391 {
392
393 bool found;
395
396
398
399
404
405
408
409
414
416 }
417}
418
419
420
421
422
423
424static inline int32
426{
428
431
432
433
434
435
437
438 if (ref == NULL)
439 return 0;
441}
442
443
444
445
446
447static void
449{
451
454 {
456
457
458
459
460
461
463 }
464 else
465 {
466 bool found;
468
473 }
474}
475
476
477
478
479
480
481
482
483#define BufferIsPinned(bufnum) \
484( \
485 !BufferIsValid(bufnum) ? \
486 false \
487 : \
488 BufferIsLocal(bufnum) ? \
489 (LocalRefCount[-(bufnum) - 1] > 0) \
490 : \
491 (GetPrivateRefCount(bufnum) > 0) \
493
494
506 uint32 *extended_by);
514 uint32 *extended_by);
521static int SyncOneBuffer(int buf_id, bool skip_recently_used,
528 char relpersistence,
532 bool *foundPtr, IOContext io_context);
547#ifdef USE_ASSERT_CHECKING
549 void *unused_context);
550#endif
555
556
557
558
559
564{
566 BufferTag newTag;
567 uint32 newHash;
568 LWLock *newPartitionLock;
569 int buf_id;
570
572
573
575 forkNum, blockNum);
576
577
580
581
585
586
587 if (buf_id < 0)
588 {
589#ifdef USE_PREFETCH
590
591
592
593
595 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
596 {
598 }
599#endif
600 }
601 else
602 {
603
604
605
606
607
609 }
610
611
612
613
614
615
616
617
618
619
620
621
622
623 return result;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
652{
655
657 {
658
661 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
662 errmsg("cannot access temporary tables of other sessions")));
663
664
666 }
667 else
668 {
669
671 }
672}
673
674
675
676
677
678
679
680
681bool
683 Buffer recent_buffer)
684{
688 bool have_private_ref;
689
691
694 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
695
697 {
698 int b = -recent_buffer - 1;
699
702
703
705 {
707
709
710 return true;
711 }
712 }
713 else
714 {
717
718
719
720
721
722
723 if (have_private_ref)
725 else
727
729 {
730
731
732
733
734
735 if (have_private_ref)
736 PinBuffer(bufHdr, NULL);
737 else
739
741
742 return true;
743 }
744
745
746 if (!have_private_ref)
748 }
749
750 return false;
751}
752
753
754
755
756
759{
761}
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
807{
809
810
811
812
813
814
817 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
818 errmsg("cannot access temporary tables of other sessions")));
819
820
821
822
823
825 forkNum, blockNum, mode, strategy);
826
827 return buf;
828}
829
830
831
832
833
834
835
836
837
838
839
840
845{
847
849 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
850 forkNum, blockNum,
851 mode, strategy);
852}
853
854
855
856
862{
864 uint32 extend_by = 1;
865
867 &buf, &extend_by);
868
869 return buf;
870}
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
897{
900 Assert(extend_by > 0);
901
902 if (bmr.smgr == NULL)
903 {
906 }
907
910 buffers, extended_by);
911}
912
913
914
915
916
917
918
919
920
928{
930 uint32 extended_by = 0;
933
937
938 if (bmr.smgr == NULL)
939 {
942 }
943
944
945
946
947
948
953 {
955
956
959
961 }
962
963
964
965
966
969
970
971
972
973
975
976
977
978
979
980
981
984
986 {
989
992
994 num_pages, extend_to,
995 buffers, &extended_by);
996
999
1000 for (uint32 i = 0; i < extended_by; i++)
1001 {
1002 if (first_block + i != extend_to - 1)
1004 else
1006 }
1007 }
1008
1009
1010
1011
1012
1013
1014
1016 {
1017 Assert(extended_by == 0);
1019 fork, extend_to - 1, mode, strategy);
1020 }
1021
1023}
1024
1025
1026
1027
1028
1029
1030static void
1032{
1034 bool need_to_zero;
1036
1038
1039 if (already_valid)
1040 {
1041
1042
1043
1044
1045 need_to_zero = false;
1046 }
1047 else if (isLocalBuf)
1048 {
1049
1052 }
1053 else
1054 {
1055
1056
1057
1058
1059
1060
1061
1062
1064 need_to_zero = StartBufferIO(bufHdr, true, false);
1065 }
1066
1067 if (need_to_zero)
1068 {
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082 if (!isLocalBuf)
1084
1085
1086 if (isLocalBuf)
1088 else
1090 }
1091 else if (!isLocalBuf)
1092 {
1093
1094
1095
1096
1099 else
1101 }
1102}
1103
1104
1105
1106
1107
1108
1112 char persistence,
1116 bool *foundPtr)
1117{
1121
1123
1124
1125 Assert((persistence == RELPERSISTENCE_TEMP ||
1126 persistence == RELPERSISTENCE_PERMANENT ||
1127 persistence == RELPERSISTENCE_UNLOGGED));
1128
1129 if (persistence == RELPERSISTENCE_TEMP)
1130 {
1133 }
1134 else
1135 {
1138 }
1139
1140 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1145
1146 if (persistence == RELPERSISTENCE_TEMP)
1147 {
1148 bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1149 if (*foundPtr)
1151 }
1152 else
1153 {
1154 bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1155 strategy, foundPtr, io_context);
1156 if (*foundPtr)
1158 }
1159 if (rel)
1160 {
1161
1162
1163
1164
1165
1167 if (*foundPtr)
1169 }
1170 if (*foundPtr)
1171 {
1175
1176 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1181 true);
1182 }
1183
1185}
1186
1187
1188
1189
1190
1191
1197{
1200 int flags;
1201 char persistence;
1202
1203
1204
1205
1206
1207
1209 {
1211
1212
1213
1214
1215
1216
1219
1221 }
1222
1223 if (rel)
1224 persistence = rel->rd_rel->relpersistence;
1225 else
1226 persistence = smgr_persistence;
1227
1230 {
1231 bool found;
1232
1234 forkNum, blockNum, strategy, &found);
1237 }
1238
1239
1240
1241
1242
1243
1247 operation.smgr = smgr;
1248 operation.rel = rel;
1250 operation.forknum = forkNum;
1251 operation.strategy = strategy;
1254 blockNum,
1255 flags))
1257
1259}
1260
1265 int *nblocks,
1266 int flags,
1267 bool allow_forwarding)
1268{
1269 int actual_nblocks = *nblocks;
1270 int maxcombine = 0;
1271 bool did_start_io;
1272
1273 Assert(*nblocks == 1 || allow_forwarding);
1274 Assert(*nblocks > 0);
1276
1277 for (int i = 0; i < actual_nblocks; ++i)
1278 {
1279 bool found;
1280
1281 if (allow_forwarding && buffers[i] != InvalidBuffer)
1282 {
1284
1285
1286
1287
1288
1289
1290
1291
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1312 else
1316 }
1317 else
1318 {
1320 operation->smgr,
1323 blockNum + i,
1325 &found);
1326 }
1327
1328 if (found)
1329 {
1330
1331
1332
1333
1334
1335
1336 if (i == 0)
1337 {
1338 *nblocks = 1;
1339
1340#ifdef USE_ASSERT_CHECKING
1341
1342
1343
1344
1345
1346
1347 operation->buffers = buffers;
1348 operation->blocknum = blockNum;
1352#endif
1353 return false;
1354 }
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365 actual_nblocks = i;
1366 break;
1367 }
1368 else
1369 {
1370
1371
1372
1373
1374 if (i == 0 && actual_nblocks > 1)
1375 {
1378 blockNum);
1379 if (unlikely(maxcombine < actual_nblocks))
1380 {
1381 elog(DEBUG2, "limiting nblocks at %u from %u to %u",
1382 blockNum, actual_nblocks, maxcombine);
1383 actual_nblocks = maxcombine;
1384 }
1385 }
1386 }
1387 }
1388 *nblocks = actual_nblocks;
1389
1390
1391 operation->buffers = buffers;
1392 operation->blocknum = blockNum;
1393 operation->flags = flags;
1394 operation->nblocks = actual_nblocks;
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1411 {
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1429
1430 operation->nblocks = *nblocks;
1431 }
1432 else
1433 {
1435
1437 {
1438
1439
1440
1441
1442
1443
1444
1445
1446
1449 blockNum,
1450 actual_nblocks);
1451 }
1452
1453
1454
1455
1456
1457 did_start_io = true;
1458 }
1459
1461
1462 return did_start_io;
1463}
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493bool
1497 int *nblocks,
1498 int flags)
1499{
1501 true );
1502}
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512bool
1516 int flags)
1517{
1518 int nblocks = 1;
1519 bool result;
1520
1522 false );
1523 Assert(nblocks == 1);
1524
1525 return result;
1526}
1527
1528
1529
1530
1531static void
1533{
1534#ifdef USE_ASSERT_CHECKING
1537
1538 for (int i = 0; i < operation->nblocks; i++)
1539 {
1544
1547
1548 if (i < operation->nblocks_done)
1550 }
1551#endif
1552}
1553
1554
1555static inline bool
1557{
1560 true, nowait);
1561 else
1563}
1564
1565
1566
1567
1568static inline bool
1570{
1571
1572
1573
1574
1575
1577 {
1579 return true;
1580
1581
1582
1583
1584
1585
1586
1588 }
1589
1591}
1592
1593
1594
1595
1596
1597static void
1599{
1602 int newly_read_blocks = 0;
1603
1606
1607
1608
1609
1610
1611
1614
1619 {
1620
1621
1622
1623
1625 elog(DEBUG3, "partial read, will retry");
1626 }
1627
1628 Assert(newly_read_blocks > 0);
1630
1631 operation->nblocks_done += newly_read_blocks;
1632
1634}
1635
1636void
1638{
1642
1643 if (operation->persistence == RELPERSISTENCE_TEMP)
1644 {
1647 }
1648 else
1649 {
1652 }
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1666 elog(ERROR, "waiting for read operation that didn't read");
1667
1668
1669
1670
1671
1672
1673
1674
1675 while (true)
1676 {
1677 int ignored_nblocks_progress;
1678
1680
1681
1682
1683
1684
1686 {
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1699 {
1701
1703
1704
1705
1706
1707
1709 io_start, 0, 0);
1710 }
1711 else
1712 {
1714 }
1715
1716
1717
1718
1719
1721 }
1722
1723
1724
1725
1726
1727
1729 break;
1730
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1744 }
1745
1747
1748
1749}
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768static bool
1770{
1772 int flags = operation->flags;
1775 char persistence = operation->persistence;
1777 Buffer *io_buffers = &operation->buffers[nblocks_done];
1778 int io_buffers_len = 0;
1780 uint32 ioh_flags = 0;
1784 bool did_start_io;
1785
1786
1787
1788
1789
1790
1793
1794 if (persistence == RELPERSISTENCE_TEMP)
1795 {
1799 }
1800 else
1801 {
1804 }
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1819
1820
1821
1822
1823
1826
1827
1828
1829
1830
1831
1832
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1854 {
1856
1858 }
1859
1860
1861
1862
1863
1864
1865
1866
1868 {
1869
1870
1871
1872
1873
1874
1875
1877 *nblocks_progress = 1;
1878
1881 did_start_io = false;
1882
1883
1884
1885
1886
1887
1888 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + operation->nblocks_done,
1893 true);
1894
1895 if (persistence == RELPERSISTENCE_TEMP)
1897 else
1899
1900 if (operation->rel)
1902
1904
1907 }
1908 else
1909 {
1911
1912
1913 Assert(io_buffers[0] == buffers[nblocks_done]);
1914 io_pages[0] = BufferGetBlock(buffers[nblocks_done]);
1915 io_buffers_len = 1;
1916
1917
1918
1919
1920
1921
1922
1923 for (int i = nblocks_done + 1; i < operation->nblocks; i++)
1924 {
1926 break;
1927
1930 Assert(io_buffers[io_buffers_len] == buffers[i]);
1931
1932 io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
1933 }
1934
1935
1937
1938
1940
1942 persistence == RELPERSISTENCE_TEMP ?
1945 flags);
1946
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1960 blocknum + nblocks_done,
1961 io_pages, io_buffers_len);
1963 io_start, 1, io_buffers_len * BLCKSZ);
1964
1965 if (persistence == RELPERSISTENCE_TEMP)
1967 else
1969
1970
1971
1972
1973
1974
1977
1978 *nblocks_progress = io_buffers_len;
1979 did_start_io = true;
1980 }
1981
1982 return did_start_io;
1983}
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2008 bool *foundPtr, IOContext io_context)
2009{
2010 BufferTag newTag;
2011 uint32 newHash;
2012 LWLock *newPartitionLock;
2013 int existing_buf_id;
2014 Buffer victim_buffer;
2016 uint32 victim_buf_state;
2017
2018
2021
2022
2024
2025
2028
2029
2032 if (existing_buf_id >= 0)
2033 {
2035 bool valid;
2036
2037
2038
2039
2040
2041
2043
2045
2046
2048
2049 *foundPtr = true;
2050
2051 if (!valid)
2052 {
2053
2054
2055
2056
2057
2058 *foundPtr = false;
2059 }
2060
2061 return buf;
2062 }
2063
2064
2065
2066
2067
2069
2070
2071
2072
2073
2074
2077
2078
2079
2080
2081
2082
2085 if (existing_buf_id >= 0)
2086 {
2088 bool valid;
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2101
2102
2103
2104
2105
2107
2108
2109
2111
2112 valid = PinBuffer(existing_buf_hdr, strategy);
2113
2114
2116
2117 *foundPtr = true;
2118
2119 if (!valid)
2120 {
2121
2122
2123
2124
2125
2126 *foundPtr = false;
2127 }
2128
2129 return existing_buf_hdr;
2130 }
2131
2132
2133
2134
2135 victim_buf_state = LockBufHdr(victim_buf_hdr);
2136
2137
2140
2141 victim_buf_hdr->tag = newTag;
2142
2143
2144
2145
2146
2147
2148
2150 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
2152
2153 UnlockBufHdr(victim_buf_hdr, victim_buf_state);
2154
2156
2157
2158
2159
2160 *foundPtr = false;
2161
2162 return victim_buf_hdr;
2163}
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182static void
2184{
2186 uint32 oldHash;
2187 LWLock *oldPartitionLock;
2190
2191
2192 oldTag = buf->tag;
2193
2197
2198
2199
2200
2201
2202
2205
2206retry:
2207
2208
2209
2210
2211
2213
2214
2216
2217
2219 {
2222 return;
2223 }
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2236 {
2239
2241 elog(ERROR, "buffer is pinned in InvalidateBuffer");
2243 goto retry;
2244 }
2245
2246
2247
2248
2249
2254
2255
2256
2257
2260
2261
2262
2263
2265
2266
2267
2268
2270}
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281static bool
2283{
2286 LWLock *partition_lock;
2288
2290
2291
2292 tag = buf_hdr->tag;
2293
2296
2298
2299
2301
2302
2303
2304
2305
2309
2310
2311
2312
2313
2315 {
2317
2320
2321 return false;
2322 }
2323
2324
2325
2326
2327
2328
2329
2330
2334
2336
2337
2339
2341
2345
2346 return true;
2347}
2348
2351{
2355 bool from_ring;
2356
2357
2358
2359
2360
2363
2364
2365again:
2366
2367
2368
2369
2370
2373
2375
2376
2378
2379
2380
2381
2383
2384
2385
2386
2387
2388
2389
2390
2392 {
2393 LWLock *content_lock;
2394
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2414 {
2415
2416
2417
2418
2420 goto again;
2421 }
2422
2423
2424
2425
2426
2427
2428
2429
2430 if (strategy != NULL)
2431 {
2433
2434
2438
2441 {
2444 goto again;
2445 }
2446 }
2447
2448
2451
2453 &buf_hdr->tag);
2454 }
2455
2456
2458 {
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2477 }
2478
2479
2480
2481
2482
2483
2485 {
2487 goto again;
2488 }
2489
2490
2491#ifdef USE_ASSERT_CHECKING
2493
2496
2498#endif
2499
2500 return buf;
2501}
2502
2503
2504
2505
2506
2507
2508
2511{
2513}
2514
2515
2516
2517
2518
2519
2520
2523{
2524 uint32 estimated_pins_held;
2525
2526
2527
2528
2529
2530
2532
2533
2535 return 0;
2536
2538}
2539
2540
2541
2542
2543
2544
2545
2546
2547void
2549{
2551
2552 if (*additional_pins <= 1)
2553 return;
2554
2556 limit = Max(limit, 1);
2557 if (limit < *additional_pins)
2558 *additional_pins = limit;
2559}
2560
2561
2562
2563
2564
2573 uint32 *extended_by)
2574{
2576
2577 TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2582 extend_by);
2583
2586 extend_by, extend_upto,
2587 buffers, &extend_by);
2588 else
2590 extend_by, extend_upto,
2591 buffers, &extend_by);
2592 *extended_by = extend_by;
2593
2594 TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2599 *extended_by,
2600 first_block);
2601
2602 return first_block;
2603}
2604
2605
2606
2607
2608
2617 uint32 *extended_by)
2618{
2622
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635 for (uint32 i = 0; i < extend_by; i++)
2636 {
2637 Block buf_block;
2638
2641
2642
2643 MemSet(buf_block, 0, BLCKSZ);
2644 }
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2658
2659
2660
2661
2662
2665
2667
2668
2669
2670
2671
2672
2673
2675 {
2676 uint32 orig_extend_by = extend_by;
2677
2678 if (first_block > extend_upto)
2679 extend_by = 0;
2680 else if ((uint64) first_block + extend_by > extend_upto)
2681 extend_by = extend_upto - first_block;
2682
2683 for (uint32 i = extend_by; i < orig_extend_by; i++)
2684 {
2686
2687
2688
2689
2690
2693 }
2694
2695 if (extend_by == 0)
2696 {
2699 *extended_by = extend_by;
2700 return first_block;
2701 }
2702 }
2703
2704
2707 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2708 errmsg("cannot extend relation %s beyond %u blocks",
2711
2712
2713
2714
2715
2716
2717
2718 for (uint32 i = 0; i < extend_by; i++)
2719 {
2720 Buffer victim_buf = buffers[i];
2724 LWLock *partition_lock;
2725 int existing_id;
2726
2727
2730
2734
2736
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753 if (existing_id >= 0)
2754 {
2756 Block buf_block;
2757 bool valid;
2758
2759
2760
2761
2762
2763 valid = PinBuffer(existing_hdr, strategy);
2764
2766
2767
2768
2769
2770
2773
2776
2779 (errmsg("unexpected data beyond EOF in block %u of relation %s",
2782 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794 do
2795 {
2797
2798 buf_state &= ~BM_VALID;
2800 } while ((existing_hdr, true, false));
2801 }
2802 else
2803 {
2805
2806 buf_state = LockBufHdr(victim_buf_hdr);
2807
2808
2811
2812 victim_buf_hdr->tag = tag;
2813
2817
2819
2821
2822
2824 }
2825 }
2826
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2840
2841
2842
2843
2844
2845
2846
2847
2850
2852 io_start, 1, extend_by * BLCKSZ);
2853
2854
2855 for (uint32 i = 0; i < extend_by; i++)
2856 {
2859 bool lock = false;
2860
2862 lock = true;
2864 {
2866 if (first_block + i + 1 == extend_upto)
2867 lock = true;
2868 }
2869
2870 if (lock)
2872
2874 }
2875
2877
2878 *extended_by = extend_by;
2879
2880 return first_block;
2881}
2882
2883
2884
2885
2886
2887
2888
2889
2890bool
2892{
2894
2896
2898 {
2899
2900 return true;
2901 }
2902 else
2903 {
2907 }
2908}
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918bool
2920{
2922
2924
2926 {
2927 int bufid = -buffer - 1;
2928
2930
2931 }
2932 else
2933 {
2937 }
2938
2940}
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951void
2953{
2956 uint32 old_buf_state;
2957
2960
2962 {
2964 return;
2965 }
2966
2968
2972
2974 for (;;)
2975 {
2978
2979 buf_state = old_buf_state;
2980
2983
2985 buf_state))
2986 break;
2987 }
2988
2989
2990
2991
2992 if (!(old_buf_state & BM_DIRTY))
2993 {
2997 }
2998}
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3017{
3020
3022 {
3025 {
3032 }
3033 else
3034 {
3036
3042 }
3043 }
3044
3045 return ReadBuffer(relation, blockNum);
3046}
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071static bool
3073{
3075 bool result;
3077
3080
3082
3083 if (ref == NULL)
3084 {
3086 uint32 old_buf_state;
3087
3089
3091 for (;;)
3092 {
3095
3096 buf_state = old_buf_state;
3097
3098
3100
3101 if (strategy == NULL)
3102 {
3103
3106 }
3107 else
3108 {
3109
3110
3111
3112
3115 }
3116
3118 buf_state))
3119 {
3120 result = (buf_state & BM_VALID) != 0;
3121
3122
3123
3124
3125
3126
3127
3128
3130 break;
3131 }
3132 }
3133 }
3134 else
3135 {
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3152 }
3153
3157 return result;
3158}
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182static void
3184{
3188
3189
3190
3191
3192
3194
3195
3196
3197
3198
3199
3201
3202
3203
3204
3205
3210
3212
3215
3217}
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228static void
3230{
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3242
3245 {
3246
3247 int wait_backend_pgprocno = buf->wait_backend_pgprocno;
3248
3249 buf_state &= ~BM_PIN_COUNT_WAITER;
3252 }
3253 else
3255}
3256
3257
3258
3259
3260
3261
3262
3263static void
3265{
3267
3270}
3271
3272static void
3274{
3277
3279
3280
3282 Assert(ref != NULL);
3286 {
3288 uint32 old_buf_state;
3289
3290
3291
3292
3293
3294
3295
3296
3298
3299
3301
3302
3303
3304
3305
3306
3307
3309 for (;;)
3310 {
3313
3314 buf_state = old_buf_state;
3315
3317
3319 buf_state))
3320 break;
3321 }
3322
3323
3326
3328 }
3329}
3330
3331#define ST_SORT sort_checkpoint_bufferids
3332#define ST_ELEMENT_TYPE CkptSortItem
3333#define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b)
3334#define ST_SCOPE static
3335#define ST_DEFINE
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348static void
3350{
3352 int buf_id;
3353 int num_to_scan;
3354 int num_spaces;
3355 int num_processed;
3356 int num_written;
3358 Oid last_tsid;
3360 int i;
3363
3364
3365
3366
3367
3368
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389 num_to_scan = 0;
3390 for (buf_id = 0; buf_id < NBuffers; buf_id++)
3391 {
3393
3394
3395
3396
3397
3399
3400 if ((buf_state & mask) == mask)
3401 {
3403
3405
3407 item->buf_id = buf_id;
3412 }
3413
3415
3416
3419 }
3420
3421 if (num_to_scan == 0)
3422 return;
3423
3425
3426 TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
3427
3428
3429
3430
3431
3432
3433
3434
3435 sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
3436
3437 num_spaces = 0;
3438
3439
3440
3441
3442
3444 for (i = 0; i < num_to_scan; i++)
3445 {
3447 Oid cur_tsid;
3448
3450
3451
3452
3453
3454
3455 if (last_tsid == InvalidOid || last_tsid != cur_tsid)
3456 {
3458
3459 num_spaces++;
3460
3461
3462
3463
3464
3466
3467 if (per_ts_stat == NULL)
3469 else
3471
3472 s = &per_ts_stat[num_spaces - 1];
3473 memset(s, 0, sizeof(*s));
3474 s->tsId = cur_tsid;
3475
3476
3477
3478
3479
3480
3482
3483
3484
3485
3486
3487
3488 last_tsid = cur_tsid;
3489 }
3490 else
3491 {
3492 s = &per_ts_stat[num_spaces - 1];
3493 }
3494
3496
3497
3500 }
3501
3502 Assert(num_spaces > 0);
3503
3504
3505
3506
3507
3508
3511 NULL);
3512
3513 for (i = 0; i < num_spaces; i++)
3514 {
3516
3518
3520 }
3521
3523
3524
3525
3526
3527
3528
3529
3530 num_processed = 0;
3531 num_written = 0;
3533 {
3537
3539 Assert(buf_id != -1);
3540
3542
3543 num_processed++;
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3558 {
3560 {
3561 TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3563 num_written++;
3564 }
3565 }
3566
3567
3568
3569
3570
3573 ts_stat->index++;
3574
3575
3577 {
3579 }
3580 else
3581 {
3582
3584 }
3585
3586
3587
3588
3589
3590
3592 }
3593
3594
3595
3596
3597
3599
3600 pfree(per_ts_stat);
3601 per_ts_stat = NULL;
3603
3604
3605
3606
3607
3609
3610 TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
3611}
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624bool
3626{
3627
3628 int strategy_buf_id;
3629 uint32 strategy_passes;
3630 uint32 recent_alloc;
3631
3632
3633
3634
3635
3636 static bool saved_info_valid = false;
3637 static int prev_strategy_buf_id;
3638 static uint32 prev_strategy_passes;
3639 static int next_to_clean;
3640 static uint32 next_passes;
3641
3642
3643 static float smoothed_alloc = 0;
3644 static float smoothed_density = 10.0;
3645
3646
3647 float smoothing_samples = 16;
3648 float scan_whole_pool_milliseconds = 120000.0;
3649
3650
3651 long strategy_delta;
3652 int bufs_to_lap;
3653 int bufs_ahead;
3654 float scans_per_alloc;
3655 int reusable_buffers_est;
3656 int upcoming_alloc_est;
3657 int min_scan_buffers;
3658
3659
3660 int num_to_scan;
3661 int num_written;
3662 int reusable_buffers;
3663
3664
3665 long new_strategy_delta;
3666 uint32 new_recent_alloc;
3667
3668
3669
3670
3671
3672 strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3673
3674
3676
3677
3678
3679
3680
3681
3683 {
3684 saved_info_valid = false;
3685 return true;
3686 }
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696 if (saved_info_valid)
3697 {
3698 int32 passes_delta = strategy_passes - prev_strategy_passes;
3699
3700 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3701 strategy_delta += (long) passes_delta * NBuffers;
3702
3703 Assert(strategy_delta >= 0);
3704
3705 if ((int32) (next_passes - strategy_passes) > 0)
3706 {
3707
3708 bufs_to_lap = strategy_buf_id - next_to_clean;
3709#ifdef BGW_DEBUG
3710 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3711 next_passes, next_to_clean,
3712 strategy_passes, strategy_buf_id,
3713 strategy_delta, bufs_to_lap);
3714#endif
3715 }
3716 else if (next_passes == strategy_passes &&
3717 next_to_clean >= strategy_buf_id)
3718 {
3719
3720 bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3721#ifdef BGW_DEBUG
3722 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3723 next_passes, next_to_clean,
3724 strategy_passes, strategy_buf_id,
3725 strategy_delta, bufs_to_lap);
3726#endif
3727 }
3728 else
3729 {
3730
3731
3732
3733
3734#ifdef BGW_DEBUG
3735 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3736 next_passes, next_to_clean,
3737 strategy_passes, strategy_buf_id,
3738 strategy_delta);
3739#endif
3740 next_to_clean = strategy_buf_id;
3741 next_passes = strategy_passes;
3743 }
3744 }
3745 else
3746 {
3747
3748
3749
3750
3751#ifdef BGW_DEBUG
3752 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3753 strategy_passes, strategy_buf_id);
3754#endif
3755 strategy_delta = 0;
3756 next_to_clean = strategy_buf_id;
3757 next_passes = strategy_passes;
3759 }
3760
3761
3762 prev_strategy_buf_id = strategy_buf_id;
3763 prev_strategy_passes = strategy_passes;
3764 saved_info_valid = true;
3765
3766
3767
3768
3769
3770
3771
3772 if (strategy_delta > 0 && recent_alloc > 0)
3773 {
3774 scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3775 smoothed_density += (scans_per_alloc - smoothed_density) /
3776 smoothing_samples;
3777 }
3778
3779
3780
3781
3782
3783
3784 bufs_ahead = NBuffers - bufs_to_lap;
3785 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3786
3787
3788
3789
3790
3791
3792 if (smoothed_alloc <= (float) recent_alloc)
3793 smoothed_alloc = recent_alloc;
3794 else
3795 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3796 smoothing_samples;
3797
3798
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809 if (upcoming_alloc_est == 0)
3810 smoothed_alloc = 0;
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822 min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3823
3824 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3825 {
3826#ifdef BGW_DEBUG
3827 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3828 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3829#endif
3830 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3831 }
3832
3833
3834
3835
3836
3837
3838
3839
3840 num_to_scan = bufs_to_lap;
3841 num_written = 0;
3842 reusable_buffers = reusable_buffers_est;
3843
3844
3845 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3846 {
3847 int sync_state = SyncOneBuffer(next_to_clean, true,
3848 wb_context);
3849
3850 if (++next_to_clean >= NBuffers)
3851 {
3852 next_to_clean = 0;
3853 next_passes++;
3854 }
3855 num_to_scan--;
3856
3858 {
3859 reusable_buffers++;
3861 {
3863 break;
3864 }
3865 }
3867 reusable_buffers++;
3868 }
3869
3871
3872#ifdef BGW_DEBUG
3873 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3874 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3875 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3876 bufs_to_lap - num_to_scan,
3877 num_written,
3878 reusable_buffers - reusable_buffers_est);
3879#endif
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889 new_strategy_delta = bufs_to_lap - num_to_scan;
3890 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3891 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3892 {
3893 scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3894 smoothed_density += (scans_per_alloc - smoothed_density) /
3895 smoothing_samples;
3896
3897#ifdef BGW_DEBUG
3898 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3899 new_recent_alloc, new_strategy_delta,
3900 scans_per_alloc, smoothed_density);
3901#endif
3902 }
3903
3904
3905 return (bufs_to_lap == 0 && recent_alloc == 0);
3906}
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922static int
3924{
3926 int result = 0;
3929
3930
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3944
3947 {
3949 }
3950 else if (skip_recently_used)
3951 {
3952
3954 return result;
3955 }
3956
3958 {
3959
3961 return result;
3962 }
3963
3964
3965
3966
3967
3970
3972
3974
3975 tag = bufHdr->tag;
3976
3978
3979
3980
3981
3982
3984
3986}
3987
3988
3989
3990
3991
3992
3993
3994
3995void
3997{
3999
4001
4003}
4004
4005
4006
4007
4008
4009
4010
4011
4012void
4014{
4016
4017
4018
4019
4020
4021
4022
4023
4025
4027
4030
4033
4034
4035
4036
4037
4040}
4041
4042
4043
4044
4045
4046static void
4048{
4050
4052
4053
4055}
4056
4057
4058
4059
4060
4061
4062
4063
4064static void
4066{
4067#ifdef USE_ASSERT_CHECKING
4068 int RefCountErrors = 0;
4070 int i;
4071 char *s;
4072
4073
4075 {
4077
4079 {
4081 elog(WARNING, "buffer refcount leak: %s", s);
4083
4084 RefCountErrors++;
4085 }
4086 }
4087
4088
4090 {
4092
4095 {
4097 elog(WARNING, "buffer refcount leak: %s", s);
4099 RefCountErrors++;
4100 }
4101 }
4102
4103 Assert(RefCountErrors == 0);
4104#endif
4105}
4106
4107#ifdef USE_ASSERT_CHECKING
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125void
4126AssertBufferLocksPermitCatalogRead(void)
4127{
4129}
4130
4131static void
4133 void *unused_context)
4134{
4137 Oid relid;
4138
4140 return;
4141
4144 return;
4145
4147 ((char *) lock - offsetof(BufferDesc, content_lock));
4148 tag = bufHdr->tag;
4149
4150
4151
4152
4153
4154
4155
4156
4157
4159
4161 return;
4162
4164}
4165#endif
4166
4167
4168
4169
4170
4171char *
4173{
4176 char *result;
4179
4182 {
4186 }
4187 else
4188 {
4192 }
4193
4194
4196
4197 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
4203 return result;
4204}
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214void
4216{
4218}
4219
4220
4221
4222
4223
4224
4225
4226
4227
4230{
4232
4234
4237 else
4239
4240
4242}
4243
4244
4245
4246
4247
4248
4249void
4252{
4254
4255
4257
4260 else
4262
4263
4267}
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288static void
4291{
4296 char *bufToWrite;
4298
4299
4300
4301
4302
4303
4305 return;
4306
4307
4312
4313
4314 if (reln == NULL)
4316
4318 buf->tag.blockNum,
4322
4324
4325
4326
4327
4328
4330
4331
4332 buf_state &= ~BM_JUST_DIRTIED;
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4354
4355
4356
4357
4358
4359
4360
4362
4363
4364
4365
4366
4367
4369
4371
4372
4373
4374
4377 buf->tag.blockNum,
4378 bufToWrite,
4379 false);
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4401
4403
4404
4405
4406
4407
4409
4411 buf->tag.blockNum,
4415
4416
4418}
4419
4420
4421
4422
4423
4424
4425
4426
4427
4430{
4431 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4432 {
4433
4434
4435
4436
4437
4438
4440
4442
4443 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4444 }
4445 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4446 {
4448 }
4449 else
4451
4452 return 0;
4453}
4454
4455
4456
4457
4458
4459
4460bool
4462{
4464
4465
4467 return false;
4468
4469
4472
4473
4474
4475
4476
4477
4478
4479
4482}
4483
4484
4485
4486
4487
4488
4489
4492{
4497
4498
4499
4500
4503
4504
4507
4512
4513 return lsn;
4514}
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537void
4540{
4541 int i;
4542 int j;
4545 uint64 nBlocksToInvalidate = 0;
4546
4548
4549
4551 {
4553 {
4554 for (j = 0; j < nforks; j++)
4556 firstDelBlock[j]);
4557 }
4558 return;
4559 }
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583 for (i = 0; i < nforks; i++)
4584 {
4585
4587
4589 {
4591 break;
4592 }
4593
4594
4595 nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4596 }
4597
4598
4599
4600
4601
4604 {
4605 for (j = 0; j < nforks; j++)
4607 nForkBlock[j], firstDelBlock[j]);
4608 return;
4609 }
4610
4612 {
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4633 continue;
4634
4636
4637 for (j = 0; j < nforks; j++)
4638 {
4642 {
4644 break;
4645 }
4646 }
4647 if (j >= nforks)
4649 }
4650}
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660void
4662{
4663 int i;
4664 int n = 0;
4667 uint64 nBlocksToInvalidate = 0;
4669 bool cached = true;
4670 bool use_bsearch;
4671
4672 if (nlocators == 0)
4673 return;
4674
4675 rels = palloc(sizeof(SMgrRelation) * nlocators);
4676
4677
4678 for (i = 0; i < nlocators; i++)
4679 {
4681 {
4682 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4684 }
4685 else
4686 rels[n++] = smgr_reln[i];
4687 }
4688
4689
4690
4691
4692
4693 if (n == 0)
4694 {
4696 return;
4697 }
4698
4699
4700
4701
4702
4705
4706
4707
4708
4709
4710 for (i = 0; i < n && cached; i++)
4711 {
4713 {
4714
4716
4717
4719 {
4721 continue;
4722 cached = false;
4723 break;
4724 }
4725
4726
4727 nBlocksToInvalidate += block[i][j];
4728 }
4729 }
4730
4731
4732
4733
4734
4736 {
4738 {
4740 {
4741
4743 continue;
4744
4745
4748 }
4749 }
4750
4753 return;
4754 }
4755
4759 locators[i] = rels[i]->smgr_rlocator.locator;
4760
4761
4762
4763
4764
4765
4766
4768
4769
4770 if (use_bsearch)
4772
4774 {
4778
4779
4780
4781
4782
4783
4784 if (!use_bsearch)
4785 {
4786 int j;
4787
4789 {
4791 {
4792 rlocator = &locators[j];
4793 break;
4794 }
4795 }
4796 }
4797 else
4798 {
4800
4802 rlocator = bsearch(&locator,
4805 }
4806
4807
4808 if (rlocator == NULL)
4809 continue;
4810
4814 else
4816 }
4817
4818 pfree(locators);
4820}
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831static void
4835{
4837
4838 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4839 {
4840 uint32 bufHash;
4841 BufferTag bufTag;
4842 LWLock *bufPartitionLock;
4843 int buf_id;
4846
4847
4848 InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4849
4850
4853
4854
4858
4859 if (buf_id < 0)
4860 continue;
4861
4863
4864
4865
4866
4867
4868
4869
4871
4876 else
4878 }
4879}
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892void
4894{
4895 int i;
4896
4897
4898
4899
4900
4901
4903 {
4906
4907
4908
4909
4910
4911 if (bufHdr->tag.dbOid != dbid)
4912 continue;
4913
4915 if (bufHdr->tag.dbOid == dbid)
4917 else
4919 }
4920}
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940void
4942{
4943 int i;
4946
4948 {
4950 {
4952
4957 {
4959
4960
4962 errcallback.arg = bufHdr;
4965
4966
4969
4970
4971
4972
4973
4975
4976
4978
4980
4981
4983 }
4984 }
4985
4986 return;
4987 }
4988
4990 {
4992
4994
4995
4996
4997
4998
5000 continue;
5001
5002
5005
5009 {
5015 }
5016 else
5018 }
5019}
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030void
5032{
5033 int i;
5035 bool use_bsearch;
5036
5037 if (nrels == 0)
5038 return;
5039
5040
5042
5043 for (i = 0; i < nrels; i++)
5044 {
5046
5048 srels[i].srel = smgrs[i];
5049 }
5050
5051
5052
5053
5054
5056
5057
5058 if (use_bsearch)
5060
5062 {
5066
5067
5068
5069
5070
5071
5072 if (!use_bsearch)
5073 {
5074 int j;
5075
5076 for (j = 0; j < nrels; j++)
5077 {
5079 {
5080 srelent = &srels[j];
5081 break;
5082 }
5083 }
5084 }
5085 else
5086 {
5088
5090 srelent = bsearch(&rlocator,
5093 }
5094
5095
5096 if (srelent == NULL)
5097 continue;
5098
5099
5102
5106 {
5112 }
5113 else
5115 }
5116
5118}
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130static void
5134{
5137 Page srcPage;
5138 Page dstPage;
5139 bool use_wal;
5148
5149
5150
5151
5152
5153
5155
5156
5158 forkNum);
5159
5160
5161 if (nblocks == 0)
5162 return;
5163
5164
5165
5166
5167
5168 memset(buf.data, 0, BLCKSZ);
5170 buf.data, true);
5171
5172
5175
5176
5180
5181
5182
5183
5184
5187 bstrategy_src,
5188 src_smgr,
5189 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
5190 forkNum,
5192 &p,
5193 0);
5194
5195
5196 for (blkno = 0; blkno < nblocks; blkno++)
5197 {
5199
5200
5204
5208 permanent);
5210
5212
5213
5214 memcpy(dstPage, srcPage, BLCKSZ);
5216
5217
5218 if (use_wal)
5220
5222
5225 }
5228
5231}
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244void
5247{
5248 char relpersistence;
5251
5252
5253 relpersistence = permanent ?
5254 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
5255
5258
5259
5260
5261
5262
5263
5264
5266
5267
5269 permanent);
5270
5271
5274 {
5276 {
5277 smgrcreate(dst_rel, forkNum, false);
5278
5279
5280
5281
5282
5285
5286
5288 permanent);
5289 }
5290 }
5291}
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308void
5310{
5311 int i;
5313
5315 {
5317
5319
5320
5321
5322
5323
5324 if (bufHdr->tag.dbOid != dbid)
5325 continue;
5326
5327
5330
5332 if (bufHdr->tag.dbOid == dbid &&
5334 {
5340 }
5341 else
5343 }
5344}
5345
5346
5347
5348
5349
5350void
5352{
5354
5355
5357
5359
5361
5363
5365}
5366
5367
5368
5369
5370void
5372{
5375
5378 else
5380}
5381
5382
5383
5384
5385
5386
5387void
5389{
5392}
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402void
5404{
5409 else
5410 {
5412
5414 Assert(ref != NULL);
5416 }
5418}
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434void
5436{
5439
5442
5444 {
5446 return;
5447 }
5448
5450
5452
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5468 {
5470 bool dirtied = false;
5471 bool delayChkptFlags = false;
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5485 {
5486
5487
5488
5489
5490
5491
5492
5493
5496 return;
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5523 delayChkptFlags = true;
5525 }
5526
5528
5530
5531 if (!(buf_state & BM_DIRTY))
5532 {
5533 dirtied = true;
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5550 }
5551
5554
5555 if (delayChkptFlags)
5557
5558 if (dirtied)
5559 {
5563 }
5564 }
5565}
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576void
5578{
5580
5581 if (buf)
5582 {
5584
5586
5587
5588
5589
5590
5593 buf_state &= ~BM_PIN_COUNT_WAITER;
5594
5596
5598 }
5599}
5600
5601
5602
5603
5604void
5606{
5608
5611 return;
5612
5614
5621 else
5622 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5623}
5624
5625
5626
5627
5628
5629
5630bool
5632{
5634
5637 return true;
5638
5640
5643}
5644
5645
5646
5647
5648
5649
5650
5651void
5653{
5655 {
5657 elog(ERROR, "incorrect local pin count: %d",
5659 }
5660 else
5661 {
5663 elog(ERROR, "incorrect local pin count: %d",
5665 }
5666}
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684void
5686{
5690 bool logged_recovery_conflict = false;
5691
5694
5696
5697
5698
5699
5700
5701
5702
5703
5704
5706 return;
5707
5709
5710 for (;;)
5711 {
5713
5714
5717
5720 {
5721
5723
5724
5725
5726
5727
5728
5729 if (logged_recovery_conflict)
5732 NULL, false);
5733
5735 {
5736
5739 }
5740 return;
5741 }
5742
5744 {
5747 elog(ERROR, "multiple backends attempting to wait for pincount 1");
5748 }
5754
5755
5757 {
5759 {
5760
5763 }
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773 if (waitStart != 0 && !logged_recovery_conflict)
5774 {
5776
5779 {
5781 waitStart, now, NULL, true);
5782 logged_recovery_conflict = true;
5783 }
5784 }
5785
5786
5787
5788
5789
5792
5793
5795
5797
5799 }
5800 else
5802
5803
5804
5805
5806
5807
5808
5809
5810
5814 buf_state &= ~BM_PIN_COUNT_WAITER;
5816
5818
5819 }
5820}
5821
5822
5823
5824
5825
5826bool
5828{
5830
5831
5832
5833
5834
5835
5836
5837 if (bufid < 0)
5838 return false;
5839
5841 return true;
5842
5843 return false;
5844}
5845
5846
5847
5848
5849
5850
5851
5852bool
5854{
5858
5860
5861
5862
5864 {
5866
5869 return false;
5870
5871 return true;
5872 }
5873
5874
5878 return false;
5879
5880
5882 return false;
5883
5887
5890 {
5891
5893 return true;
5894 }
5895
5896
5899 return false;
5900}
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910bool
5912{
5915
5917
5918
5919
5921 {
5922
5924 return false;
5925
5926 return true;
5927 }
5928
5929
5931 return false;
5932
5934
5935
5938
5940
5943 {
5944
5946 return true;
5947 }
5948
5950 return false;
5951}
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963static void
5965{
5967
5969 for (;;)
5970 {
5973
5974
5975
5976
5977
5978
5980
5981
5982
5983
5984
5985
5986 iow = buf->io_wref;
5988
5989
5991 break;
5992
5993
5994
5995
5996
5998 {
6000
6001
6002
6003
6004
6005
6006
6007
6008
6010 continue;
6011 }
6012
6013
6015 }
6017}
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042bool
6044{
6046
6048
6049 for (;;)
6050 {
6052
6054 break;
6056 if (nowait)
6057 return false;
6059 }
6060
6061
6062
6063
6064 if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
6065 {
6067 return false;
6068 }
6069
6072
6075
6076 return true;
6077}
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099void
6101 bool forget_owner, bool release_aio)
6102{
6104
6106
6108 buf_state &= ~BM_IO_IN_PROGRESS;
6109
6110
6111 buf_state &= ~BM_IO_ERROR;
6112
6115
6116 if (release_aio)
6117 {
6118
6122 }
6123
6124 buf_state |= set_flag_bits;
6126
6127 if (forget_owner)
6130
6132
6133
6134
6135
6136
6137
6138
6139
6140
6143}
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158static void
6160{
6163
6166
6167 if (!(buf_state & BM_VALID))
6168 {
6171 }
6172 else
6173 {
6176
6177
6179 {
6180
6182 (errcode(ERRCODE_IO_ERROR),
6183 errmsg("could not write block %u of %s",
6187 errdetail("Multiple failures --- write error might be permanent.")));
6188 }
6189 }
6190
6192}
6193
6194
6195
6196
6197static void
6199{
6201
6202
6203 if (bufHdr != NULL)
6204 errcontext("writing block %u of relation %s",
6208}
6209
6210
6211
6212
6213static void
6215{
6217
6218 if (bufHdr != NULL)
6219 errcontext("writing block %u of relation %s",
6224}
6225
6226
6227
6228
6229static int
6231{
6234
6236 return -1;
6238 return 1;
6239
6241 return -1;
6243 return 1;
6244
6246 return -1;
6248 return 1;
6249 else
6250 return 0;
6251}
6252
6253
6254
6255
6258{
6260 uint32 old_buf_state;
6261
6263
6265
6266 while (true)
6267 {
6268
6270
6271 if (!(old_buf_state & BM_LOCKED))
6272 break;
6274 }
6276 return old_buf_state | BM_LOCKED;
6277}
6278
6279
6280
6281
6282
6283
6284
6285
6288{
6291
6293
6295
6297 {
6300 }
6301
6303
6304 return buf_state;
6305}
6306
6307
6308
6309
6310static inline int
6312{
6313 int ret;
6316
6319
6321
6322 if (ret != 0)
6323 return ret;
6324
6326 return -1;
6328 return 1;
6329
6331 return -1;
6333 return 1;
6334
6335 return 0;
6336}
6337
6338
6339
6340
6341
6342
6343
6344static inline int
6346{
6347
6349 return -1;
6350 else if (a->tsId > b->tsId)
6351 return 1;
6352
6353 if (a->relNumber < b->relNumber)
6354 return -1;
6355 else if (a->relNumber > b->relNumber)
6356 return 1;
6357
6358 else if (a->forkNum < b->forkNum)
6359 return -1;
6360 else if (a->forkNum > b->forkNum)
6361 return 1;
6362
6363 else if (a->blockNum < b->blockNum)
6364 return -1;
6365 else if (a->blockNum > b->blockNum)
6366 return 1;
6367
6368 return 0;
6369}
6370
6371
6372
6373
6374
6375static int
6377{
6380
6381
6383 return 1;
6384 else if (sa->progress == sb->progress)
6385 return 0;
6386 else
6387 return -1;
6388}
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398void
6400{
6402
6405}
6406
6407
6408
6409
6410void
6413{
6415
6416
6417
6418
6419
6422 return;
6423
6424
6425
6426
6427
6429 {
6431
6433
6434 pending->tag = *tag;
6435 }
6436
6437
6438
6439
6440
6441
6444}
6445
6446#define ST_SORT sort_pending_writebacks
6447#define ST_ELEMENT_TYPE PendingWriteback
6448#define ST_COMPARE(a, b) buffertag_comparator(&a->tag, &b->tag)
6449#define ST_SCOPE static
6450#define ST_DEFINE
6452
6453
6454
6455
6456
6457
6458
6459
6460void
6462{
6464 int i;
6465
6467 return;
6468
6469
6470
6471
6472
6475
6477
6478
6479
6480
6481
6482
6484 {
6488 int ahead;
6491 Size nblocks = 1;
6492
6494 tag = cur->tag;
6496
6497
6498
6499
6500
6501 for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
6502 {
6503
6505
6506
6510 break;
6511
6512
6513 if (cur->tag.blockNum == next->tag.blockNum)
6514 continue;
6515
6516
6517 if (cur->tag.blockNum + 1 != next->tag.blockNum)
6518 break;
6519
6520 nblocks++;
6522 }
6523
6524 i += ahead;
6525
6526
6529 }
6530
6531
6532
6533
6534
6537
6539}
6540
6541
6542
6543static void
6545{
6547
6549}
6550
6551static char *
6553{
6555
6556 return psprintf("lost track of buffer IO on buffer %d", buffer);
6557}
6558
6559static void
6561{
6563
6564
6567
6570 else
6572}
6573
6574static char *
6576{
6578}
6579
6580
6581
6582
6583
6584static bool
6586{
6588 bool result;
6589
6590 *buffer_flushed = false;
6591
6594
6595 if ((buf_state & BM_VALID) == 0)
6596 {
6598 return false;
6599 }
6600
6601
6603 {
6605 return false;
6606 }
6607
6609
6610
6612 {
6615 *buffer_flushed = true;
6617 }
6618
6619
6621
6623
6624 return result;
6625}
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648bool
6650{
6652
6654
6655
6658
6661
6663}
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677void
6679 int32 *buffers_skipped)
6680{
6681 *buffers_evicted = 0;
6682 *buffers_skipped = 0;
6683 *buffers_flushed = 0;
6684
6686 {
6689 bool buffer_flushed;
6690
6692 if (!(buf_state & BM_VALID))
6693 continue;
6694
6697
6699
6701 (*buffers_evicted)++;
6702 else
6703 (*buffers_skipped)++;
6704
6705 if (buffer_flushed)
6706 (*buffers_flushed)++;
6707 }
6708}
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725void
6727 int32 *buffers_flushed, int32 *buffers_skipped)
6728{
6730
6731 *buffers_skipped = 0;
6732 *buffers_evicted = 0;
6733 *buffers_flushed = 0;
6734
6736 {
6739 bool buffer_flushed;
6740
6741
6742 if ((buf_state & BM_VALID) == 0 ||
6744 continue;
6745
6746
6749
6751
6752
6753 if ((buf_state & BM_VALID) == 0 ||
6755 {
6757 continue;
6758 }
6759
6761 (*buffers_evicted)++;
6762 else
6763 (*buffers_skipped)++;
6764
6765 if (buffer_flushed)
6766 (*buffers_flushed)++;
6767 }
6768}
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6786{
6788 uint8 handle_data_len;
6791
6793
6795
6796
6797 for (int i = 0; i < handle_data_len; i++)
6798 {
6804
6805
6806
6807
6808
6809
6810
6811
6812 if (i == 0)
6813 first = buf_hdr->tag;
6814 else
6815 {
6818 }
6819
6820 if (is_temp)
6822 else
6824
6825
6827 if (is_write)
6828 {
6831 }
6832 else
6833 {
6836 }
6837
6838
6839 if (!is_temp)
6841
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6855 buf_hdr->io_wref = io_ref;
6856
6857 if (is_temp)
6859 else
6861
6862
6863
6864
6865
6866
6867 if (is_write && !is_temp)
6868 {
6869 LWLock *content_lock;
6870
6872
6874
6875
6876
6877
6879 }
6880
6881
6882
6883
6884
6885 if (!is_temp)
6887 }
6888}
6889
6890
6891
6892
6893static inline void
6895 bool *zeroed_any,
6896 bool *ignored_any,
6897 uint8 *zeroed_or_error_count,
6898 uint8 *checkfail_count,
6899 uint8 *first_off)
6900{
6902
6903
6904#define READV_COUNT_BITS 7
6905#define READV_COUNT_MASK ((1 << READV_COUNT_BITS) - 1)
6906
6907 *zeroed_any = rem_error & 1;
6908 rem_error >>= 1;
6909
6910 *ignored_any = rem_error & 1;
6911 rem_error >>= 1;
6912
6915
6918
6921}
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935static inline void
6937 bool is_temp,
6938 bool zeroed_any,
6939 bool ignored_any,
6940 uint8 error_count,
6941 uint8 zeroed_count,
6942 uint8 checkfail_count,
6943 uint8 first_error_off,
6944 uint8 first_zeroed_off,
6945 uint8 first_ignored_off)
6946{
6947
6948 uint8 shift = 0;
6949 uint8 zeroed_or_error_count =
6950 error_count > 0 ? error_count : zeroed_count;
6951 uint8 first_off;
6952
6954 "PG_IOV_MAX is bigger than reserved space for error data");
6956 "PGAIO_RESULT_ERROR_BITS is insufficient for buffer_readv");
6957
6958
6959
6960
6961
6962
6963 if (error_count > 0)
6964 first_off = first_error_off;
6965 else if (zeroed_count > 0)
6966 first_off = first_zeroed_off;
6967 else
6968 first_off = first_ignored_off;
6969
6970 Assert(!zeroed_any || error_count == 0);
6971
6973
6974 result->error_data |= zeroed_any << shift;
6975 shift += 1;
6976
6977 result->error_data |= ignored_any << shift;
6978 shift += 1;
6979
6980 result->error_data |= ((uint32) zeroed_or_error_count) << shift;
6982
6985
6988
6991
6992 if (error_count > 0)
6994 else
6996
6997
6998
6999
7000
7001#ifdef USE_ASSERT_CHECKING
7002 {
7003 bool zeroed_any_2,
7004 ignored_any_2;
7005 uint8 zeroed_or_error_count_2,
7006 checkfail_count_2,
7007 first_off_2;
7008
7010 &zeroed_any_2, &ignored_any_2,
7011 &zeroed_or_error_count_2,
7012 &checkfail_count_2,
7013 &first_off_2);
7014 Assert(zeroed_any == zeroed_any_2);
7015 Assert(ignored_any == ignored_any_2);
7016 Assert(zeroed_or_error_count == zeroed_or_error_count_2);
7017 Assert(checkfail_count == checkfail_count_2);
7018 Assert(first_off == first_off_2);
7019 }
7020#endif
7021
7022#undef READV_COUNT_BITS
7023#undef READV_COUNT_MASK
7024}
7025
7026
7027
7028
7029
7032 uint8 flags, bool failed, bool is_temp,
7033 bool *buffer_invalid,
7034 bool *failed_checksum,
7035 bool *ignored_checksum,
7036 bool *zeroed_buffer)
7037{
7043 uint32 set_flag_bits;
7044 int piv_flags;
7045
7046
7047#ifdef USE_ASSERT_CHECKING
7048 {
7050
7053
7054 if (!is_temp)
7057 }
7058#endif
7059
7060 *buffer_invalid = false;
7061 *failed_checksum = false;
7062 *ignored_checksum = false;
7063 *zeroed_buffer = false;
7064
7065
7066
7067
7068
7069
7071
7072
7075
7076
7077 if (!failed)
7078 {
7079
7080
7081
7082
7083
7084
7085
7086
7087#ifdef USE_VALGRIND
7090#endif
7091
7093 failed_checksum))
7094 {
7096 {
7097 memset(bufdata, 0, BLCKSZ);
7098 *zeroed_buffer = true;
7099 }
7100 else
7101 {
7102 *buffer_invalid = true;
7103
7104 failed = true;
7105 }
7106 }
7107 else if (*failed_checksum)
7108 *ignored_checksum = true;
7109
7110
7111#ifdef USE_VALGRIND
7114#endif
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130 if (*buffer_invalid || *failed_checksum || *zeroed_buffer)
7131 {
7133
7135 *zeroed_buffer,
7136 *ignored_checksum,
7137 *buffer_invalid,
7138 *zeroed_buffer ? 1 : 0,
7139 *failed_checksum ? 1 : 0,
7140 buf_off, buf_off, buf_off);
7142 }
7143 }
7144
7145
7147 if (is_temp)
7149 else
7151
7152
7153
7154
7155
7156
7157
7158
7159 TRACE_POSTGRESQL_BUFFER_READ_DONE(tag.forkNum,
7165 false);
7166}
7167
7168
7169
7170
7171
7172
7173
7176 uint8 cb_data, bool is_temp)
7177{
7180 uint8 first_error_off = 0;
7181 uint8 first_zeroed_off = 0;
7182 uint8 first_ignored_off = 0;
7183 uint8 error_count = 0;
7184 uint8 zeroed_count = 0;
7185 uint8 ignored_count = 0;
7186 uint8 checkfail_count = 0;
7188 uint8 handle_data_len;
7189
7190 if (is_temp)
7191 {
7194 }
7195 else
7197
7198
7199
7200
7201
7203 for (uint8 buf_off = 0; buf_off < handle_data_len; buf_off++)
7204 {
7206 bool failed;
7207 bool failed_verification = false;
7208 bool failed_checksum = false;
7209 bool zeroed_buffer = false;
7210 bool ignored_checksum = false;
7211
7213
7214
7215
7216
7217
7218
7219 failed =
7221 || prior_result.result <= buf_off;
7222
7224 &failed_verification,
7225 &failed_checksum,
7226 &ignored_checksum,
7227 &zeroed_buffer);
7228
7229
7230
7231
7232
7233
7234 if (failed_verification && !zeroed_buffer && error_count++ == 0)
7235 first_error_off = buf_off;
7236 if (zeroed_buffer && zeroed_count++ == 0)
7237 first_zeroed_off = buf_off;
7238 if (ignored_checksum && ignored_count++ == 0)
7239 first_ignored_off = buf_off;
7240 if (failed_checksum)
7241 checkfail_count++;
7242 }
7243
7244
7245
7246
7247
7249 (error_count > 0 || ignored_count > 0 || zeroed_count > 0))
7250 {
7252 zeroed_count > 0, ignored_count > 0,
7253 error_count, zeroed_count, checkfail_count,
7254 first_error_off, first_zeroed_off,
7255 first_ignored_off);
7257 }
7258
7259
7260
7261
7262
7263 if (is_temp && checkfail_count > 0)
7265 checkfail_count);
7266
7267 return result;
7268}
7269
7270
7271
7272
7273
7274
7275
7276
7277static void
7279 int elevel)
7280{
7288 bool zeroed_any,
7289 ignored_any;
7290 uint8 zeroed_or_error_count,
7291 checkfail_count,
7292 first_off;
7293 uint8 affected_count;
7294 const char *msg_one,
7295 *msg_mult,
7296 *det_mult,
7297 *hint_mult;
7298
7300 &zeroed_or_error_count,
7301 &checkfail_count,
7302 &first_off);
7303
7304
7305
7306
7307
7308
7309 if (zeroed_any && ignored_any)
7310 {
7311 Assert(zeroed_any && ignored_any);
7312 Assert(nblocks > 1);
7314 affected_count = zeroed_or_error_count;
7315
7318 errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation %s",
7319 affected_count, checkfail_count, first, last, rpath.str),
7320 affected_count > 1 ?
7321 errdetail("Block %u held first zeroed page.",
7322 first + first_off) : 0,
7323 errhint("See server log for details about the other %u invalid block(s).",
7324 affected_count + checkfail_count - 1));
7325 return;
7326 }
7327
7328
7329
7330
7331
7332
7334 {
7335 Assert(!zeroed_any);
7336 affected_count = zeroed_or_error_count;
7337 msg_one = _("invalid page in block %u of relation %s");
7338 msg_mult = _("%u invalid pages among blocks %u..%u of relation %s");
7339 det_mult = _("Block %u held first invalid page.");
7340 hint_mult = _("See server log for the other %u invalid block(s).");
7341 }
7342 else if (zeroed_any && !ignored_any)
7343 {
7344 affected_count = zeroed_or_error_count;
7345 msg_one = _("invalid page in block %u of relation %s; zeroing out page");
7346 msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation %s");
7347 det_mult = _("Block %u held first zeroed page.");
7348 hint_mult = _("See server log for the other %u zeroed block(s).");
7349 }
7350 else if (!zeroed_any && ignored_any)
7351 {
7352 affected_count = checkfail_count;
7353 msg_one = _("ignoring checksum failure in block %u of relation %s");
7354 msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation %s");
7355 det_mult = _("Block %u held first ignored page.");
7356 hint_mult = _("See server log for the other %u ignored block(s).");
7357 }
7358 else
7360
7363 affected_count == 1 ?
7366 affected_count > 1 ? errdetail_internal(det_mult, first + first_off) : 0,
7367 affected_count > 1 ? errhint_internal(hint_mult, affected_count - 1) : 0);
7368}
7369
7370static void
7372{
7374}
7375
7379{
7381}
7382
7383
7384
7385
7386
7387
7388
7389
7393{
7394 bool zeroed_any,
7395 ignored_any;
7396 uint8 zeroed_or_error_count,
7397 checkfail_count,
7398 first_off;
7399
7401 return prior_result;
7402
7404 &zeroed_any,
7405 &ignored_any,
7406 &zeroed_or_error_count,
7407 &checkfail_count,
7408 &first_off);
7409
7410 if (checkfail_count)
7411 {
7413
7415 checkfail_count);
7416 }
7417
7418 return prior_result;
7419}
7420
7421static void
7423{
7425}
7426
7430{
7432}
7433
7434
7438
7441};
7442
7443
7446
7447
7448
7449
7450
7451
7452
7455};
bool pgaio_wref_valid(PgAioWaitRef *iow)
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
void pgaio_wref_clear(PgAioWaitRef *iow)
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
bool pgaio_have_staged(void)
bool pgaio_wref_check_done(PgAioWaitRef *iow)
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
void pgaio_submit_staged(void)
void pgaio_wref_wait(PgAioWaitRef *iow)
void pgaio_io_release(PgAioHandle *ioh)
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
@ PGAIO_HCB_LOCAL_BUFFER_READV
@ PGAIO_HCB_SHARED_BUFFER_READV
@ PGAIO_HF_REFERENCES_LOCAL
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
#define PGAIO_RESULT_ERROR_BITS
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
TimestampTz GetCurrentTimestamp(void)
Datum now(PG_FUNCTION_ARGS)
void binaryheap_build(binaryheap *heap)
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
bh_node_type binaryheap_first(binaryheap *heap)
bh_node_type binaryheap_remove_first(binaryheap *heap)
void binaryheap_free(binaryheap *heap)
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
#define binaryheap_empty(h)
#define InvalidBlockNumber
static bool BlockNumberIsValid(BlockNumber blockNumber)
#define BufferIsLocal(buffer)
CkptSortItem * CkptBufferIds
WritebackContext BackendWritebackContext
BufferDescPadded * BufferDescriptors
#define BM_MAX_USAGE_COUNT
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BUF_USAGECOUNT_MASK
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BM_PIN_COUNT_WAITER
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)
#define BUF_STATE_GET_USAGECOUNT(state)
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
#define BM_IO_IN_PROGRESS
static void ClearBufferTag(BufferTag *tag)
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
static void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
#define BUF_USAGECOUNT_ONE
#define BUF_STATE_GET_REFCOUNT(state)
static LWLock * BufMappingPartitionLock(uint32 hashcode)
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
static BufferDesc * GetBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
#define BM_CHECKPOINT_NEEDED
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
uint32 BufTableHashCode(BufferTag *tagPtr)
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
void CheckBufferIsPinnedOnce(Buffer buffer)
void FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
void IncrBufferRefCount(Buffer buffer)
void DropDatabaseBuffers(Oid dbid)
static int ckpt_buforder_comparator(const CkptSortItem *a, const CkptSortItem *b)
static pg_attribute_always_inline PgAioResult buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
bool BufferIsExclusiveLocked(Buffer buffer)
const ResourceOwnerDesc buffer_pin_resowner_desc
BlockNumber BufferGetBlockNumber(Buffer buffer)
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
static bool ReadBuffersCanStartIO(Buffer buffer, bool nowait)
void DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
static PgAioResult shared_buffer_readv_complete_local(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
static uint32 PrivateRefCountClock
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
static void ResOwnerReleaseBufferIO(Datum res)
static PgAioResult local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
bool StartReadBuffers(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
void EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
const ResourceOwnerDesc buffer_io_resowner_desc
#define BUF_DROP_FULL_SCAN_THRESHOLD
static void PinBuffer_Locked(BufferDesc *buf)
void EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
static pg_attribute_always_inline void buffer_readv_complete_one(PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
static int buffertag_comparator(const BufferTag *ba, const BufferTag *bb)
bool IsBufferCleanupOK(Buffer buffer)
#define BufferGetLSN(bufHdr)
static char * ResOwnerPrintBufferIO(Datum res)
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
void AtEOXact_Buffers(bool isCommit)
static void AbortBufferIO(Buffer buffer)
const PgAioHandleCallbacks aio_shared_buffer_readv_cb
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
static void CheckForBufferLeaks(void)
static bool ReadBuffersCanStartIOOnce(Buffer buffer, bool nowait)
void CreateAndCopyRelationData(RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
void DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
static int rlocator_comparator(const void *p1, const void *p2)
Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
struct SMgrSortArray SMgrSortArray
const PgAioHandleCallbacks aio_local_buffer_readv_cb
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
static void AtProcExit_Buffers(int code, Datum arg)
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
#define BufHdrGetBlock(bufHdr)
static pg_attribute_always_inline void buffer_stage_common(PgAioHandle *ioh, bool is_write, bool is_temp)
static void local_buffer_write_error_callback(void *arg)
static void BufferSync(int flags)
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
static void local_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
char * DebugPrintBufferRefcount(Buffer buffer)
static char * ResOwnerPrintBufferPin(Datum res)
void CheckPointBuffers(int flags)
bool BufferIsDirty(Buffer buffer)
static uint32 MaxProportionalPins
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
bool BgBufferSync(WritebackContext *wb_context)
static void WakePinCountWaiter(BufferDesc *buf)
bool BufferIsPermanent(Buffer buffer)
#define REFCOUNT_ARRAY_ENTRIES
static void shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
static PgAioResult shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
bool ConditionalLockBuffer(Buffer buffer)
BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
void ReleaseBuffer(Buffer buffer)
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
XLogRecPtr BufferGetLSNAtomic(Buffer buffer)
bool HoldingBufferPinThatDelaysRecovery(void)
int checkpoint_flush_after
void UnlockReleaseBuffer(Buffer buffer)
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner, bool release_aio)
static void UnpinBufferNoOwner(BufferDesc *buf)
static void shared_buffer_write_error_callback(void *arg)
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
void WaitReadBuffers(ReadBuffersOperation *operation)
void WritebackContextInit(WritebackContext *context, int *max_pending)
void MarkBufferDirty(Buffer buffer)
#define BufferIsPinned(bufnum)
double bgwriter_lru_multiplier
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
void LimitAdditionalPins(uint32 *additional_pins)
static void buffer_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
static void ReservePrivateRefCountEntry(void)
static BufferDesc * PinCountWaitBuf
static int32 GetPrivateRefCount(Buffer buffer)
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
void LockBufferForCleanup(Buffer buffer)
void LockBuffer(Buffer buffer, int mode)
static PrivateRefCountEntry * ReservedRefCountEntry
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
void FlushRelationBuffers(Relation rel)
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed)
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
bool ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
#define RELS_BSEARCH_THRESHOLD
int maintenance_io_concurrency
static void UnpinBuffer(BufferDesc *buf)
void FlushDatabaseBuffers(Oid dbid)
static void InvalidateBuffer(BufferDesc *buf)
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
int effective_io_concurrency
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
struct PrivateRefCountEntry PrivateRefCountEntry
struct CkptTsStatus CkptTsStatus
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
uint32 LockBufHdr(BufferDesc *desc)
static void ResOwnerReleaseBufferPin(Datum res)
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
static void buffer_readv_decode_error(PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
void InitBufferManagerAccess(void)
static void buffer_readv_encode_error(PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
uint32 GetAdditionalPinLimit(void)
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
static HTAB * PrivateRefCountHash
static int32 PrivateRefCountOverflowed
bool ConditionalLockBufferForCleanup(Buffer buffer)
int bgwriter_lru_maxpages
static void WaitIO(BufferDesc *buf)
void FlushOneBuffer(Buffer buffer)
#define BUFFER_LOCK_UNLOCK
#define BUFFER_LOCK_SHARE
#define READ_BUFFERS_ZERO_ON_ERROR
static Page BufferGetPage(Buffer buffer)
#define DEFAULT_IO_COMBINE_LIMIT
static Block BufferGetBlock(Buffer buffer)
#define READ_BUFFERS_ISSUE_ADVICE
#define MAX_IO_COMBINE_LIMIT
#define DEFAULT_EFFECTIVE_IO_CONCURRENCY
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES
#define DEFAULT_MAINTENANCE_IO_CONCURRENCY
@ EB_CREATE_FORK_IF_NEEDED
#define READ_BUFFERS_SYNCHRONOUSLY
#define BUFFER_LOCK_EXCLUSIVE
@ RBM_ZERO_AND_CLEANUP_LOCK
static bool BufferIsValid(Buffer bufnum)
bool ignore_checksum_failure
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
static bool PageIsNew(const PageData *page)
static void PageSetLSN(Page page, XLogRecPtr lsn)
static XLogRecPtr PageGetLSN(const PageData *page)
#define PIV_IGNORE_CHECKSUM_FAILURE
#define PG_USED_FOR_ASSERTS_ONLY
#define pg_attribute_always_inline
#define MemSet(start, val, len)
#define StaticAssertStmt(condition, errmessage)
bool IsCatalogRelationOid(Oid relid)
bool IsCatalogTextUniqueIndexOid(Oid relid)
void CheckpointWriteDelay(int flags, double progress)
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
void * hash_seq_search(HASH_SEQ_STATUS *status)
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
int errmsg_internal(const char *fmt,...)
int errdetail_internal(const char *fmt,...)
int errdetail(const char *fmt,...)
ErrorContextCallback * error_context_stack
int errhint_internal(const char *fmt,...)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
void FreeAccessStrategy(BufferAccessStrategy strategy)
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
void StrategyFreeBuffer(BufferDesc *buf)
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
volatile sig_atomic_t ProcSignalBarrierPending
Assert(PointerIsAligned(start, uint64))
BufferUsage pgBufferUsage
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
if(TABLE==NULL||TABLE_index==NULL)
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
void UnpinLocalBuffer(Buffer buffer)
bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait)
void AtEOXact_LocalBuffers(bool isCommit)
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber firstDelBlock)
void AtProcExit_LocalBuffers(void)
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
void MarkLocalBufferDirty(Buffer buffer)
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, uint32 set_flag_bits, bool release_aio)
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
void UnpinLocalBufferNoOwner(Buffer buffer)
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
bool LWLockHeldByMe(LWLock *lock)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
void LWLockDisown(LWLock *lock)
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
void LWLockRelease(LWLock *lock)
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
void ForEachLWLockHeldByMe(void(*callback)(LWLock *, LWLockMode, void *), void *context)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
#define START_CRIT_SECTION()
#define CHECK_FOR_INTERRUPTS()
#define END_CRIT_SECTION()
#define ERRCODE_DATA_CORRUPTED
static PgChecksumMode mode
static int64 current_size
#define WRITEBACK_MAX_PENDING_FLUSHES
#define DEFAULT_BACKEND_FLUSH_AFTER
#define DEFAULT_CHECKPOINT_FLUSH_AFTER
#define DEFAULT_BGWRITER_FLUSH_AFTER
#define pgstat_count_buffer_read(rel)
#define pgstat_count_buffer_hit(rel)
PgStat_BgWriterStats PendingBgWriterStats
PgStat_CheckpointerStats PendingCheckpointerStats
void pgstat_prepare_report_checksum_failure(Oid dboid)
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
instr_time pgstat_prepare_io_time(bool track_io_guc)
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
#define qsort(a, b, c, d)
static Datum PointerGetDatum(const void *X)
static Pointer DatumGetPointer(Datum X)
static int32 DatumGetInt32(Datum X)
#define NUM_AUXILIARY_PROCS
#define DELAY_CHKPT_START
#define INVALID_PROC_NUMBER
void ProcessProcSignalBarrier(void)
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
void set_ps_display_remove_suffix(void)
void set_ps_display_suffix(const char *suffix)
char * psprintf(const char *fmt,...)
ReadStream * read_stream_begin_smgr_relation(int flags, BufferAccessStrategy strategy, SMgrRelation smgr, char smgr_persistence, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_USE_BATCHING
static unsigned hash(unsigned *uv, int n)
static SMgrRelation RelationGetSmgr(Relation rel)
#define RelationUsesLocalBuffers(relation)
#define RELATION_IS_OTHER_TEMP(relation)
#define RelationIsValid(relation)
#define RelFileLocatorBackendIsTemp(rlocator)
#define RelFileLocatorEquals(locator1, locator2)
#define relpath(rlocator, forknum)
#define relpathbackend(rlocator, backend, forknum)
#define relpathperm(rlocator, forknum)
ResourceOwner CurrentResourceOwner
void ResourceOwnerEnlarge(ResourceOwner owner)
#define RELEASE_PRIO_BUFFER_IOS
@ RESOURCE_RELEASE_BEFORE_LOCKS
#define RELEASE_PRIO_BUFFER_PINS
void perform_spin_delay(SpinDelayStatus *status)
void finish_spin_delay(SpinDelayStatus *status)
#define init_local_spin_delay(status)
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
void smgrstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
void ProcSendSignal(ProcNumber procNumber)
int GetStartupBufferPinWaitBufId(void)
void SetStartupBufferPinWaitBufId(int bufid)
void ProcWaitForSignal(uint32 wait_event_info)
void ResolveRecoveryConflictWithBufferPin(void)
bool log_recovery_conflict_waits
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
BlockNumber last_exclusive
BlockNumber current_blocknum
int wait_backend_pgprocno
struct SMgrRelationData * smgr
int64 shared_blks_dirtied
int64 shared_blks_written
struct ErrorContextCallback * previous
void(* callback)(void *arg)
PgAioHandleCallbackStage stage
PgAioTargetData target_data
PgStat_Counter buf_written_clean
PgStat_Counter maxwritten_clean
PgStat_Counter buffers_written
BufferAccessStrategy strategy
struct SMgrRelationData * smgr
char str[REL_PATH_STR_MAXLEN+1]
RelFileLocator rd_locator
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
RelFileLocatorBackend smgr_rlocator
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
struct PgAioTargetData::@124 smgr
static volatile sig_atomic_t waiting
bool RecoveryInProgress(void)
bool XLogNeedsFlush(XLogRecPtr record)
CheckpointStatsData CheckpointStats
void XLogFlush(XLogRecPtr record)
#define CHECKPOINT_END_OF_RECOVERY
#define CHECKPOINT_FLUSH_ALL
#define CHECKPOINT_IS_SHUTDOWN
#define XLogHintBitIsNeeded()
#define XLogRecPtrIsInvalid(r)
#define InvalidXLogRecPtr
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)