PostgreSQL Source Code: src/backend/regex/regexec.c Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
35
36
37
38
40{
43};
44
46{
49#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
53#define STARTER 01
54#define POSTSTATE 02
55#define LOCKED 04
56#define NOPROGRESS 010
61};
62
64{
83 bool ismalloced;
84 bool arraysmalloced;
85};
86
87#define WORK 1
88
89
90#define FEWSTATES 20
91#define FEWCOLORS 15
93{
99};
100
101#define DOMALLOC ((struct smalldfa *)NULL)
102
103
104
105
107{
117 int err;
124};
125
126#define VISERR(vv) ((vv)->err != 0)
127#define ISERR() VISERR(v)
128#define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))
129#define ERR(e) VERR(v, e)
130#define NOERR() {if (ISERR()) return v->err;}
131#define OFF(p) ((p) - v->start)
132#define LOFF(p) ((long)OFF(p))
133
134
135
136
137
138
139
145 struct dfa *d, struct dfa *s, chr **coldp);
156
157
161 chr *max, chr **coldp, int *hitstopp);
163 struct sset **lastcss, chr **lastcp);
170static unsigned hash(unsigned *uv, int n);
179
180
181
182
183
184int
186 const chr *string,
187 size_t len,
188 size_t search_start,
190 size_t nmatch,
193{
194 struct vars var;
195 struct vars *v = &var;
196 int st;
197 size_t n;
198 size_t i;
199 int backref;
200
201#define LOCALMAT 20
203
204#define LOCALDFAS 40
206
207
208 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
210 if (re->re_csize != sizeof(chr))
212 if (search_start > len)
214
215
217
218
219 v->re = re;
220 v->g = (struct guts *) re->re_guts;
227 if (backref && nmatch <= v->g->nsub)
228 {
229
233 else
235 if (v->pmatch == NULL)
238 }
239 else
240 {
241
243
244 if (nmatch > 0)
246
247 if (nmatch > v->g->nsub + 1)
250 }
255 v->err = 0;
260
261
266 else
267 {
270 {
273 }
274 }
277
280 if (n > 0)
281 {
283 if (v->ladfas == NULL)
284 {
287 }
293 {
296 }
298 {
301 }
302 }
303
304
306 if (backref)
308 else
310
311
312 if (st == REG_OKAY && nmatch > 0)
313 {
314 if (v->pmatch != pmatch)
315 {
316
317 assert(nmatch <= v->nmatch);
319 }
321 {
322
324 }
325 }
326
327
332 {
335 {
338 }
339 if (v->subdfas != subdfas)
341 }
342 if (v->ladfas != NULL)
343 {
346 {
349 }
351 }
356
357#ifdef REG_DEBUG
360#endif
361
362 return st;
363}
364
365
366
367
368
369
370
371static struct dfa *
374{
376
377 if (d == NULL)
378 {
380 if (d == NULL)
381 return NULL;
382
383 if (t->op == 'b')
384 {
388 }
390 }
391 return d;
392}
393
394
395
396
397
398
399static struct dfa *
401 int n)
402{
403 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
404
405 if (v->ladfas[n] == NULL)
406 {
408
410
411 }
413}
414
415
416
417
418static int
422{
423 struct dfa *s;
424 struct dfa *d;
425 chr *begin;
426 chr *end = NULL;
427 chr *cold;
428 chr *open;
430 int hitend;
432
433
435 if (s == NULL)
436 return v->err;
438 cold = NULL;
440 &cold, (int *) NULL);
444 {
446 if (cold != NULL)
448 else
451 }
452 if (close == NULL)
454 if (v->nmatch == 0)
456
457
458 assert(cold != NULL);
459 open = cold;
460 cold = NULL;
463 if (d == NULL)
464 return v->err;
465 for (begin = open; begin <= close; begin++)
466 {
467 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
468 if (shorter)
470 (chr **) NULL, &hitend);
471 else
472 end = longest(v, d, begin, v->stop, &hitend);
474 {
476 return v->err;
477 }
478 if (hitend && cold == NULL)
479 cold = begin;
480 if (end != NULL)
481 break;
482 }
483 assert(end != NULL);
485
486
491 {
492 if (cold != NULL)
494 else
497 }
498 if (v->nmatch == 1)
500
501
503}
504
505
506
507
508static int
512{
513 struct dfa *s;
514 struct dfa *d;
515 chr *cold;
516 int ret;
517
519 if (s == NULL)
520 return v->err;
522 if (d == NULL)
523 {
525 return v->err;
526 }
527
529
534 {
536 if (cold != NULL)
538 else
541 }
542 return ret;
543}
544
545
546
547
548static int
552 struct dfa *d,
553 struct dfa *s,
554 chr **coldp)
555{
556 chr *begin;
557 chr *end;
558 chr *cold;
559 chr *open;
561 chr *estart;
562 chr *estop;
563 int er;
565 int hitend;
566
567 assert(d != NULL && s != NULL);
568 cold = NULL;
570 do
571 {
572
576 {
577 *coldp = cold;
578 return v->err;
579 }
580 if (close == NULL)
581 break;
582 assert(cold != NULL);
583 open = cold;
584 cold = NULL;
585
587 for (begin = open; begin <= close; begin++)
588 {
589 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
590 estart = begin;
591 estop = v->stop;
592 for (;;)
593 {
594
595 if (shorter)
596 end = shortest(v, d, begin, estart,
597 estop, (chr **) NULL, &hitend);
598 else
599 end = longest(v, d, begin, estop,
600 &hitend);
602 {
603 *coldp = cold;
604 return v->err;
605 }
606 if (hitend && cold == NULL)
607 cold = begin;
608 if (end == NULL)
609 break;
610 MDEBUG(("tentative end %ld\n", LOFF(end)));
611
614 {
616 {
619 }
620 *coldp = cold;
622 }
624 {
626 *coldp = cold;
627 return er;
628 }
629
630 if (shorter)
631 {
632 if (end == estop)
633 break;
634 estart = end + 1;
635 }
636 else
637 {
638 if (end == begin)
639 break;
640 estop = end - 1;
641 }
642 }
643 }
644
645
646
647
648
649
651 } while (close < v->stop);
652
653 *coldp = cold;
655}
656
657
658
659
660
661
662static void
664 size_t n)
665{
666 size_t i;
667
668 for (i = n - 1; i > 0; i--)
669 {
670 p[i].rm_so = -1;
671 p[i].rm_eo = -1;
672 }
673}
674
675
676
677
678static void
681{
682 int n = t->capno;
683 struct subre *t2;
684
685 if (n > 0)
686 {
687 if ((size_t) n < v->nmatch)
688 {
689 v->pmatch[n].rm_so = -1;
690 v->pmatch[n].rm_eo = -1;
691 }
692 }
693
694 for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
696}
697
698
699
700
701static void
703 struct subre *sub,
706{
707 int n = sub->capno;
708
710 if ((size_t) n >= v->nmatch)
711 return;
712
716}
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755static int
760{
761 int er;
762
765
766
768
771
772 switch (t->op)
773 {
774 case '=':
776 er = REG_OKAY;
777 break;
778 case 'b':
781 break;
782 case '.':
786 else
788 break;
789 case '|':
792 break;
793 case '*':
797 else
799 break;
800 case '(':
803 break;
804 default:
806 break;
807 }
808
809
810
811
812
813
815
816
817
818
821
822 return er;
823}
824
825
826
827
828static int
833{
836 struct dfa *d;
837 struct dfa *d2;
838 chr *mid;
839 int er;
840
846
851 MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
852
853
854 mid = longest(v, d, begin, end, (int *) NULL);
856 if (mid == NULL)
858 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
859
860
861 for (;;)
862 {
863
864 if (longest(v, d2, mid, end, (int *) NULL) == end)
865 {
866 er = cdissect(v, left, begin, mid);
868 {
869 er = cdissect(v, right, mid, end);
871 {
872
873 MDEBUG(("%d: successful\n", t->id));
875 }
876
878 }
880 return er;
881 }
883
884
885 if (mid == begin)
886 {
887
888 MDEBUG(("%d: no midpoint\n", t->id));
890 }
891 mid = longest(v, d, begin, mid - 1, (int *) NULL);
893 if (mid == NULL)
894 {
895
896 MDEBUG(("%d: failed midpoint\n", t->id));
898 }
899 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
900 }
901
902
904}
905
906
907
908
909static int
912 chr *begin,
913 chr *end)
914{
917 struct dfa *d;
918 struct dfa *d2;
919 chr *mid;
920 int er;
921
927
932 MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
933
934
935 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
937 if (mid == NULL)
939 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
940
941
942 for (;;)
943 {
944
945 if (longest(v, d2, mid, end, (int *) NULL) == end)
946 {
947 er = cdissect(v, left, begin, mid);
949 {
950 er = cdissect(v, right, mid, end);
952 {
953
954 MDEBUG(("%d: successful\n", t->id));
956 }
957
959 }
961 return er;
962 }
964
965
966 if (mid == end)
967 {
968
969 MDEBUG(("%d: no midpoint\n", t->id));
971 }
972 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
974 if (mid == NULL)
975 {
976
977 MDEBUG(("%d: failed midpoint\n", t->id));
979 }
980 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
981 }
982
983
985}
986
987
988
989
990
991
992
993static int
996 chr *begin,
997 chr *end)
998{
1000 size_t numreps;
1001 size_t tlen;
1002 size_t brlen;
1003 chr *brstring;
1005 int min = t->min;
1006 int max = t->max;
1007
1011 assert((size_t) n < v->nmatch);
1012
1013 MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
1015
1016
1017 if (v->pmatch[n].rm_so == -1)
1019 brstring = v->start + v->pmatch[n].rm_so;
1020 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1021
1022
1023 if (brlen == 0)
1024 {
1025
1026
1027
1028
1029 if (begin == end && min <= max)
1030 {
1031 MDEBUG(("%d: backref matched trivially\n", t->id));
1033 }
1035 }
1036 if (begin == end)
1037 {
1038
1039 if (min == 0)
1040 {
1041 MDEBUG(("%d: backref matched trivially\n", t->id));
1043 }
1045 }
1046
1047
1048
1049
1050
1051 assert(end > begin);
1052 tlen = end - begin;
1053 if (tlen % brlen != 0)
1055 numreps = tlen / brlen;
1056 if (numreps < min || (numreps > max && max != DUPINF))
1058
1059
1060 p = begin;
1061 while (numreps-- > 0)
1062 {
1063 if ((*v->g->compare) (brstring, p, brlen) != 0)
1065 p += brlen;
1066 }
1067
1068 MDEBUG(("%d: backref matched\n", t->id));
1070}
1071
1072
1073
1074
1075static int
1077 struct subre *t,
1078 chr *begin,
1079 chr *end)
1080{
1081 struct dfa *d;
1082 int er;
1083
1085
1087
1089
1090 while (t != NULL)
1091 {
1093
1094 MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1095
1098 if (longest(v, d, begin, end, (int *) NULL) == end)
1099 {
1100 MDEBUG(("%d: caltdissect matched\n", t->id));
1101 er = cdissect(v, t, begin, end);
1103 return er;
1104 }
1106
1108 }
1109
1111}
1112
1113
1114
1115
1116static int
1118 struct subre *t,
1119 chr *begin,
1120 chr *end)
1121{
1122 struct dfa *d;
1123 chr **endpts;
1124 chr *limit;
1125 int min_matches;
1126 size_t max_matches;
1127 int nverified;
1128 int k;
1129 int i;
1130 int er;
1131
1135 assert(begin <= end);
1136
1137 MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 min_matches = t->min;
1149 if (min_matches <= 0)
1150 min_matches = 1;
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161 max_matches = end - begin;
1162 if (max_matches > t->max && t->max != DUPINF)
1163 max_matches = t->max;
1164 if (max_matches < min_matches)
1165 max_matches = min_matches;
1166 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1167 if (endpts == NULL)
1169 endpts[0] = begin;
1170
1173 {
1174 FREE(endpts);
1175 return v->err;
1176 }
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189 nverified = 0;
1190 k = 1;
1191 limit = end;
1192
1193
1194 while (k > 0)
1195 {
1196
1197 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1199 {
1200 FREE(endpts);
1201 return v->err;
1202 }
1203 if (endpts[k] == NULL)
1204 {
1205
1206 k--;
1207 goto backtrack;
1208 }
1209 MDEBUG(("%d: working endpoint %d: %ld\n",
1210 t->id, k, LOFF(endpts[k])));
1211
1212
1213 if (nverified >= k)
1214 nverified = k - 1;
1215
1216 if (endpts[k] != end)
1217 {
1218
1219 if (k >= max_matches)
1220 {
1221
1222 k--;
1223 goto backtrack;
1224 }
1225
1226
1227 if (endpts[k] == endpts[k - 1] &&
1228 (k >= min_matches || min_matches - k < end - endpts[k]))
1229 goto backtrack;
1230
1231 k++;
1232 limit = end;
1233 continue;
1234 }
1235
1236
1237
1238
1239
1240
1241
1242 if (k < min_matches)
1243 goto backtrack;
1244
1245 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1246
1247 for (i = nverified + 1; i <= k; i++)
1248 {
1249
1253 {
1254 nverified = i;
1255 continue;
1256 }
1258 break;
1259
1260 FREE(endpts);
1261 return er;
1262 }
1263
1264 if (i > k)
1265 {
1266
1267 MDEBUG(("%d: successful\n", t->id));
1268 FREE(endpts);
1270 }
1271
1272
1273 k = i;
1274
1275backtrack:
1276
1277
1278
1279
1280
1281 while (k > 0)
1282 {
1283 chr *prev_end = endpts[k - 1];
1284
1285 if (endpts[k] > prev_end)
1286 {
1287 limit = endpts[k] - 1;
1288 if (limit > prev_end ||
1289 (k < min_matches && min_matches - k >= end - prev_end))
1290 {
1291
1292 break;
1293 }
1294 }
1295
1296 k--;
1297 }
1298 }
1299
1300
1301 FREE(endpts);
1302
1303
1304
1305
1306
1307 if (t->min == 0 && begin == end)
1308 {
1309 MDEBUG(("%d: allowing zero matches\n", t->id));
1311 }
1312
1313 MDEBUG(("%d: failed\n", t->id));
1315}
1316
1317
1318
1319
1320static int
1322 struct subre *t,
1323 chr *begin,
1324 chr *end)
1325{
1326 struct dfa *d;
1327 chr **endpts;
1328 chr *limit;
1329 int min_matches;
1330 size_t max_matches;
1331 int nverified;
1332 int k;
1333 int i;
1334 int er;
1335
1339 assert(begin <= end);
1340
1341 MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1342
1343
1344
1345
1346
1347
1348 min_matches = t->min;
1349 if (min_matches <= 0)
1350 {
1351 if (begin == end)
1352 {
1353 MDEBUG(("%d: allowing zero matches\n", t->id));
1355 }
1356 min_matches = 1;
1357 }
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368 max_matches = end - begin;
1369 if (max_matches > t->max && t->max != DUPINF)
1370 max_matches = t->max;
1371 if (max_matches < min_matches)
1372 max_matches = min_matches;
1373 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1374 if (endpts == NULL)
1376 endpts[0] = begin;
1377
1380 {
1381 FREE(endpts);
1382 return v->err;
1383 }
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396 nverified = 0;
1397 k = 1;
1398 limit = begin;
1399
1400
1401 while (k > 0)
1402 {
1403
1404 if (limit == endpts[k - 1] &&
1405 limit != end &&
1406 (k >= min_matches || min_matches - k < end - limit))
1407 limit++;
1408
1409
1410 if (k >= max_matches)
1411 limit = end;
1412
1413
1414 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1415 (chr **) NULL, (int *) NULL);
1417 {
1418 FREE(endpts);
1419 return v->err;
1420 }
1421 if (endpts[k] == NULL)
1422 {
1423
1424 k--;
1425 goto backtrack;
1426 }
1427 MDEBUG(("%d: working endpoint %d: %ld\n",
1428 t->id, k, LOFF(endpts[k])));
1429
1430
1431 if (nverified >= k)
1432 nverified = k - 1;
1433
1434 if (endpts[k] != end)
1435 {
1436
1437 if (k >= max_matches)
1438 {
1439
1440 k--;
1441 goto backtrack;
1442 }
1443
1444 k++;
1445 limit = endpts[k - 1];
1446 continue;
1447 }
1448
1449
1450
1451
1452
1453
1454
1455 if (k < min_matches)
1456 goto backtrack;
1457
1458 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1459
1460 for (i = nverified + 1; i <= k; i++)
1461 {
1462
1466 {
1467 nverified = i;
1468 continue;
1469 }
1471 break;
1472
1473 FREE(endpts);
1474 return er;
1475 }
1476
1477 if (i > k)
1478 {
1479
1480 MDEBUG(("%d: successful\n", t->id));
1481 FREE(endpts);
1483 }
1484
1485
1486 k = i;
1487
1488backtrack:
1489
1490
1491
1492
1493 while (k > 0)
1494 {
1495 if (endpts[k] < end)
1496 {
1497 limit = endpts[k] + 1;
1498
1499 break;
1500 }
1501
1502 k--;
1503 }
1504 }
1505
1506
1507 MDEBUG(("%d: failed\n", t->id));
1508 FREE(endpts);
1510}
1511
1512
1513
static void cleanup(void)
if(TABLE==NULL||TABLE_index==NULL)
void pg_set_regex_collation(Oid collation)
static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
static void freedfa(struct dfa *d)
static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)
static void zapallsubs(regmatch_t *p, size_t n)
static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)
static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)
static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm)
static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)
static unsigned hash(unsigned *uv, int n)
static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
static struct dfa * getladfa(struct vars *v, int n)
static chr * lastcold(struct vars *v, struct dfa *d)
static struct dfa * getsubdfa(struct vars *v, struct subre *t)
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)
static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end)
static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)
static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)
static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)
static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
static void zaptreesubs(struct vars *v, struct subre *t)
static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)
static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end)
static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end)
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)
#define STACK_TOO_DEEP(re)
unsigned statesarea[FEWSTATES *2+WORK]
struct arcp incarea[FEWSTATES *2 *FEWCOLORS]
struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]
struct sset ssets[FEWSTATES *2]