PostgreSQL Source Code: src/backend/regex/regexec.c Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

35

36

37

38

40{

43};

44

46{

47 unsigned *states;

48 unsigned hash;

49#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))

50#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \

51 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))

53#define STARTER 01

54#define POSTSTATE 02

55#define LOCKED 04

56#define NOPROGRESS 010

57 struct arcp ins;

59 struct sset **outs;

60 struct arcp *inchain;

61};

62

64{

65 int nssets;

66 int nssused;

67 int nstates;

68 int ncolors;

69 int wordsper;

72 unsigned *work;

77 chr *lastpost;

78 chr *lastnopr;

79 struct sset *search;

80 int backno;

81 short backmin;

82 short backmax;

83 bool ismalloced;

84 bool arraysmalloced;

85};

86

87#define WORK 1

88

89

90#define FEWSTATES 20

91#define FEWCOLORS 15

93{

94 struct dfa dfa;

99};

100

101#define DOMALLOC ((struct smalldfa *)NULL)

102

103

104

105

107{

110 int eflags;

116 chr *stop;

117 int err;

119 struct dfa **ladfas;

120 struct sset **lblastcss;

121 chr **lblastcp;

124};

125

126#define VISERR(vv) ((vv)->err != 0)

127#define ISERR() VISERR(v)

128#define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))

129#define ERR(e) VERR(v, e)

130#define NOERR() {if (ISERR()) return v->err;}

131#define OFF(p) ((p) - v->start)

132#define LOFF(p) ((long)OFF(p))

133

134

135

136

137

138

139

145 struct dfa *d, struct dfa *s, chr **coldp);

156

157

161 chr *max, chr **coldp, int *hitstopp);

163 struct sset **lastcss, chr **lastcp);

170static unsigned hash(unsigned *uv, int n);

179

180

181

182

183

184int

186 const chr *string,

187 size_t len,

188 size_t search_start,

190 size_t nmatch,

193{

194 struct vars var;

195 struct vars *v = &var;

196 int st;

197 size_t n;

198 size_t i;

199 int backref;

200

201#define LOCALMAT 20

203

204#define LOCALDFAS 40

206

207

208 if (re == NULL || string == NULL || re->re_magic != REMAGIC)

210 if (re->re_csize != sizeof(chr))

212 if (search_start > len)

214

215

217

218

219 v->re = re;

220 v->g = (struct guts *) re->re_guts;

227 if (backref && nmatch <= v->g->nsub)

228 {

229

233 else

235 if (v->pmatch == NULL)

238 }

239 else

240 {

241

243

244 if (nmatch > 0)

246

247 if (nmatch > v->g->nsub + 1)

248 nmatch = v->g->nsub + 1;

250 }

255 v->err = 0;

260

261

263 n = (size_t) v->g->ntree;

266 else

267 {

270 {

273 }

274 }

275 for (i = 0; i < n; i++)

277

280 if (n > 0)

281 {

283 if (v->ladfas == NULL)

284 {

287 }

288 for (i = 0; i < n; i++)

293 {

296 }

297 for (i = 0; i < n; i++)

298 {

301 }

302 }

303

304

306 if (backref)

308 else

310

311

312 if (st == REG_OKAY && nmatch > 0)

313 {

314 if (v->pmatch != pmatch)

315 {

316

317 assert(nmatch <= v->nmatch);

319 }

321 {

322

324 }

325 }

326

327

332 {

333 n = (size_t) v->g->ntree;

334 for (i = 0; i < n; i++)

335 {

338 }

339 if (v->subdfas != subdfas)

341 }

342 if (v->ladfas != NULL)

343 {

345 for (i = 0; i < n; i++)

346 {

347 if (v->ladfas[i] != NULL)

349 }

351 }

356

357#ifdef REG_DEBUG

360#endif

361

362 return st;

363}

364

365

366

367

368

369

370

371static struct dfa *

374{

376

377 if (d == NULL)

378 {

380 if (d == NULL)

381 return NULL;

382

383 if (t->op == 'b')

384 {

388 }

390 }

391 return d;

392}

393

394

395

396

397

398

399static struct dfa *

401 int n)

402{

403 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);

404

405 if (v->ladfas[n] == NULL)

406 {

408

410

411 }

413}

414

415

416

417

418static int

422{

423 struct dfa *s;

424 struct dfa *d;

425 chr *begin;

426 chr *end = NULL;

427 chr *cold;

428 chr *open;

430 int hitend;

432

433

435 if (s == NULL)

436 return v->err;

438 cold = NULL;

440 &cold, (int *) NULL);

444 {

446 if (cold != NULL)

448 else

451 }

452 if (close == NULL)

454 if (v->nmatch == 0)

456

457

458 assert(cold != NULL);

459 open = cold;

460 cold = NULL;

463 if (d == NULL)

464 return v->err;

465 for (begin = open; begin <= close; begin++)

466 {

467 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));

468 if (shorter)

470 (chr **) NULL, &hitend);

471 else

472 end = longest(v, d, begin, v->stop, &hitend);

474 {

476 return v->err;

477 }

478 if (hitend && cold == NULL)

479 cold = begin;

480 if (end != NULL)

481 break;

482 }

483 assert(end != NULL);

485

486

491 {

492 if (cold != NULL)

494 else

497 }

498 if (v->nmatch == 1)

500

501

503}

504

505

506

507

508static int

512{

513 struct dfa *s;

514 struct dfa *d;

515 chr *cold;

516 int ret;

517

519 if (s == NULL)

520 return v->err;

522 if (d == NULL)

523 {

525 return v->err;

526 }

527

529

534 {

536 if (cold != NULL)

538 else

541 }

542 return ret;

543}

544

545

546

547

548static int

552 struct dfa *d,

553 struct dfa *s,

554 chr **coldp)

555{

556 chr *begin;

557 chr *end;

558 chr *cold;

559 chr *open;

561 chr *estart;

562 chr *estop;

563 int er;

565 int hitend;

566

567 assert(d != NULL && s != NULL);

568 cold = NULL;

570 do

571 {

572

576 {

577 *coldp = cold;

578 return v->err;

579 }

580 if (close == NULL)

581 break;

582 assert(cold != NULL);

583 open = cold;

584 cold = NULL;

585

587 for (begin = open; begin <= close; begin++)

588 {

589 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));

590 estart = begin;

591 estop = v->stop;

592 for (;;)

593 {

594

595 if (shorter)

596 end = shortest(v, d, begin, estart,

597 estop, (chr **) NULL, &hitend);

598 else

599 end = longest(v, d, begin, estop,

600 &hitend);

602 {

603 *coldp = cold;

604 return v->err;

605 }

606 if (hitend && cold == NULL)

607 cold = begin;

608 if (end == NULL)

609 break;

610 MDEBUG(("tentative end %ld\n", LOFF(end)));

611

614 {

616 {

619 }

620 *coldp = cold;

622 }

624 {

626 *coldp = cold;

627 return er;

628 }

629

630 if (shorter)

631 {

632 if (end == estop)

633 break;

634 estart = end + 1;

635 }

636 else

637 {

638 if (end == begin)

639 break;

640 estop = end - 1;

641 }

642 }

643 }

644

645

646

647

648

649

651 } while (close < v->stop);

652

653 *coldp = cold;

655}

656

657

658

659

660

661

662static void

664 size_t n)

665{

666 size_t i;

667

668 for (i = n - 1; i > 0; i--)

669 {

670 p[i].rm_so = -1;

671 p[i].rm_eo = -1;

672 }

673}

674

675

676

677

678static void

681{

682 int n = t->capno;

683 struct subre *t2;

684

685 if (n > 0)

686 {

687 if ((size_t) n < v->nmatch)

688 {

689 v->pmatch[n].rm_so = -1;

690 v->pmatch[n].rm_eo = -1;

691 }

692 }

693

694 for (t2 = t->child; t2 != NULL; t2 = t2->sibling)

696}

697

698

699

700

701static void

703 struct subre *sub,

706{

707 int n = sub->capno;

708

710 if ((size_t) n >= v->nmatch)

711 return;

712

716}

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755static int

758 chr *begin,

759 chr *end)

760{

761 int er;

762

765

766

768

771

772 switch (t->op)

773 {

774 case '=':

776 er = REG_OKAY;

777 break;

778 case 'b':

781 break;

782 case '.':

786 else

788 break;

789 case '|':

792 break;

793 case '*':

797 else

799 break;

800 case '(':

803 break;

804 default:

806 break;

807 }

808

809

810

811

812

813

815

816

817

818

821

822 return er;

823}

824

825

826

827

828static int

831 chr *begin,

832 chr *end)

833{

836 struct dfa *d;

837 struct dfa *d2;

838 chr *mid;

839 int er;

840

846

851 MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));

852

853

854 mid = longest(v, d, begin, end, (int *) NULL);

856 if (mid == NULL)

858 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));

859

860

861 for (;;)

862 {

863

864 if (longest(v, d2, mid, end, (int *) NULL) == end)

865 {

866 er = cdissect(v, left, begin, mid);

868 {

869 er = cdissect(v, right, mid, end);

871 {

872

873 MDEBUG(("%d: successful\n", t->id));

875 }

876

878 }

880 return er;

881 }

883

884

885 if (mid == begin)

886 {

887

888 MDEBUG(("%d: no midpoint\n", t->id));

890 }

891 mid = longest(v, d, begin, mid - 1, (int *) NULL);

893 if (mid == NULL)

894 {

895

896 MDEBUG(("%d: failed midpoint\n", t->id));

898 }

899 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));

900 }

901

902

904}

905

906

907

908

909static int

912 chr *begin,

913 chr *end)

914{

917 struct dfa *d;

918 struct dfa *d2;

919 chr *mid;

920 int er;

921

927

932 MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));

933

934

935 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);

937 if (mid == NULL)

939 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));

940

941

942 for (;;)

943 {

944

945 if (longest(v, d2, mid, end, (int *) NULL) == end)

946 {

947 er = cdissect(v, left, begin, mid);

949 {

950 er = cdissect(v, right, mid, end);

952 {

953

954 MDEBUG(("%d: successful\n", t->id));

956 }

957

959 }

961 return er;

962 }

964

965

966 if (mid == end)

967 {

968

969 MDEBUG(("%d: no midpoint\n", t->id));

971 }

972 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);

974 if (mid == NULL)

975 {

976

977 MDEBUG(("%d: failed midpoint\n", t->id));

979 }

980 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));

981 }

982

983

985}

986

987

988

989

990

991

992

993static int

996 chr *begin,

997 chr *end)

998{

1000 size_t numreps;

1001 size_t tlen;

1002 size_t brlen;

1003 chr *brstring;

1005 int min = t->min;

1006 int max = t->max;

1007

1011 assert((size_t) n < v->nmatch);

1012

1013 MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,

1015

1016

1017 if (v->pmatch[n].rm_so == -1)

1019 brstring = v->start + v->pmatch[n].rm_so;

1020 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;

1021

1022

1023 if (brlen == 0)

1024 {

1025

1026

1027

1028

1029 if (begin == end && min <= max)

1030 {

1031 MDEBUG(("%d: backref matched trivially\n", t->id));

1033 }

1035 }

1036 if (begin == end)

1037 {

1038

1039 if (min == 0)

1040 {

1041 MDEBUG(("%d: backref matched trivially\n", t->id));

1043 }

1045 }

1046

1047

1048

1049

1050

1051 assert(end > begin);

1052 tlen = end - begin;

1053 if (tlen % brlen != 0)

1055 numreps = tlen / brlen;

1056 if (numreps < min || (numreps > max && max != DUPINF))

1058

1059

1060 p = begin;

1061 while (numreps-- > 0)

1062 {

1063 if ((*v->g->compare) (brstring, p, brlen) != 0)

1065 p += brlen;

1066 }

1067

1068 MDEBUG(("%d: backref matched\n", t->id));

1070}

1071

1072

1073

1074

1075static int

1077 struct subre *t,

1078 chr *begin,

1079 chr *end)

1080{

1081 struct dfa *d;

1082 int er;

1083

1085

1087

1089

1090 while (t != NULL)

1091 {

1093

1094 MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));

1095

1098 if (longest(v, d, begin, end, (int *) NULL) == end)

1099 {

1100 MDEBUG(("%d: caltdissect matched\n", t->id));

1101 er = cdissect(v, t, begin, end);

1103 return er;

1104 }

1106

1108 }

1109

1111}

1112

1113

1114

1115

1116static int

1118 struct subre *t,

1119 chr *begin,

1120 chr *end)

1121{

1122 struct dfa *d;

1123 chr **endpts;

1124 chr *limit;

1125 int min_matches;

1126 size_t max_matches;

1127 int nverified;

1128 int k;

1129 int i;

1130 int er;

1131

1135 assert(begin <= end);

1136

1137 MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));

1138

1139

1140

1141

1142

1143

1144

1145

1146

1147

1148 min_matches = t->min;

1149 if (min_matches <= 0)

1150 min_matches = 1;

1151

1152

1153

1154

1155

1156

1157

1158

1159

1160

1161 max_matches = end - begin;

1162 if (max_matches > t->max && t->max != DUPINF)

1163 max_matches = t->max;

1164 if (max_matches < min_matches)

1165 max_matches = min_matches;

1166 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));

1167 if (endpts == NULL)

1169 endpts[0] = begin;

1170

1173 {

1174 FREE(endpts);

1175 return v->err;

1176 }

1177

1178

1179

1180

1181

1182

1183

1184

1185

1186

1187

1188

1189 nverified = 0;

1190 k = 1;

1191 limit = end;

1192

1193

1194 while (k > 0)

1195 {

1196

1197 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);

1199 {

1200 FREE(endpts);

1201 return v->err;

1202 }

1203 if (endpts[k] == NULL)

1204 {

1205

1206 k--;

1207 goto backtrack;

1208 }

1209 MDEBUG(("%d: working endpoint %d: %ld\n",

1210 t->id, k, LOFF(endpts[k])));

1211

1212

1213 if (nverified >= k)

1214 nverified = k - 1;

1215

1216 if (endpts[k] != end)

1217 {

1218

1219 if (k >= max_matches)

1220 {

1221

1222 k--;

1223 goto backtrack;

1224 }

1225

1226

1227 if (endpts[k] == endpts[k - 1] &&

1228 (k >= min_matches || min_matches - k < end - endpts[k]))

1229 goto backtrack;

1230

1231 k++;

1232 limit = end;

1233 continue;

1234 }

1235

1236

1237

1238

1239

1240

1241

1242 if (k < min_matches)

1243 goto backtrack;

1244

1245 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));

1246

1247 for (i = nverified + 1; i <= k; i++)

1248 {

1249

1253 {

1254 nverified = i;

1255 continue;

1256 }

1258 break;

1259

1260 FREE(endpts);

1261 return er;

1262 }

1263

1264 if (i > k)

1265 {

1266

1267 MDEBUG(("%d: successful\n", t->id));

1268 FREE(endpts);

1270 }

1271

1272

1273 k = i;

1274

1275backtrack:

1276

1277

1278

1279

1280

1281 while (k > 0)

1282 {

1283 chr *prev_end = endpts[k - 1];

1284

1285 if (endpts[k] > prev_end)

1286 {

1287 limit = endpts[k] - 1;

1288 if (limit > prev_end ||

1289 (k < min_matches && min_matches - k >= end - prev_end))

1290 {

1291

1292 break;

1293 }

1294 }

1295

1296 k--;

1297 }

1298 }

1299

1300

1301 FREE(endpts);

1302

1303

1304

1305

1306

1307 if (t->min == 0 && begin == end)

1308 {

1309 MDEBUG(("%d: allowing zero matches\n", t->id));

1311 }

1312

1313 MDEBUG(("%d: failed\n", t->id));

1315}

1316

1317

1318

1319

1320static int

1322 struct subre *t,

1323 chr *begin,

1324 chr *end)

1325{

1326 struct dfa *d;

1327 chr **endpts;

1328 chr *limit;

1329 int min_matches;

1330 size_t max_matches;

1331 int nverified;

1332 int k;

1333 int i;

1334 int er;

1335

1339 assert(begin <= end);

1340

1341 MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));

1342

1343

1344

1345

1346

1347

1348 min_matches = t->min;

1349 if (min_matches <= 0)

1350 {

1351 if (begin == end)

1352 {

1353 MDEBUG(("%d: allowing zero matches\n", t->id));

1355 }

1356 min_matches = 1;

1357 }

1358

1359

1360

1361

1362

1363

1364

1365

1366

1367

1368 max_matches = end - begin;

1369 if (max_matches > t->max && t->max != DUPINF)

1370 max_matches = t->max;

1371 if (max_matches < min_matches)

1372 max_matches = min_matches;

1373 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));

1374 if (endpts == NULL)

1376 endpts[0] = begin;

1377

1380 {

1381 FREE(endpts);

1382 return v->err;

1383 }

1384

1385

1386

1387

1388

1389

1390

1391

1392

1393

1394

1395

1396 nverified = 0;

1397 k = 1;

1398 limit = begin;

1399

1400

1401 while (k > 0)

1402 {

1403

1404 if (limit == endpts[k - 1] &&

1405 limit != end &&

1406 (k >= min_matches || min_matches - k < end - limit))

1407 limit++;

1408

1409

1410 if (k >= max_matches)

1411 limit = end;

1412

1413

1414 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,

1415 (chr **) NULL, (int *) NULL);

1417 {

1418 FREE(endpts);

1419 return v->err;

1420 }

1421 if (endpts[k] == NULL)

1422 {

1423

1424 k--;

1425 goto backtrack;

1426 }

1427 MDEBUG(("%d: working endpoint %d: %ld\n",

1428 t->id, k, LOFF(endpts[k])));

1429

1430

1431 if (nverified >= k)

1432 nverified = k - 1;

1433

1434 if (endpts[k] != end)

1435 {

1436

1437 if (k >= max_matches)

1438 {

1439

1440 k--;

1441 goto backtrack;

1442 }

1443

1444 k++;

1445 limit = endpts[k - 1];

1446 continue;

1447 }

1448

1449

1450

1451

1452

1453

1454

1455 if (k < min_matches)

1456 goto backtrack;

1457

1458 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));

1459

1460 for (i = nverified + 1; i <= k; i++)

1461 {

1462

1466 {

1467 nverified = i;

1468 continue;

1469 }

1471 break;

1472

1473 FREE(endpts);

1474 return er;

1475 }

1476

1477 if (i > k)

1478 {

1479

1480 MDEBUG(("%d: successful\n", t->id));

1481 FREE(endpts);

1483 }

1484

1485

1486 k = i;

1487

1488backtrack:

1489

1490

1491

1492

1493 while (k > 0)

1494 {

1495 if (endpts[k] < end)

1496 {

1497 limit = endpts[k] + 1;

1498

1499 break;

1500 }

1501

1502 k--;

1503 }

1504 }

1505

1506

1507 MDEBUG(("%d: failed\n", t->id));

1508 FREE(endpts);

1510}

1511

1512

1513

static void cleanup(void)

if(TABLE==NULL||TABLE_index==NULL)

void pg_set_regex_collation(Oid collation)

static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm)

static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end)

static void freedfa(struct dfa *d)

static chr * shortest(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, chr **coldp, int *hitstopp)

static void zapallsubs(regmatch_t *p, size_t n)

static chr * dfa_backref(struct vars *v, struct dfa *d, chr *start, chr *min, chr *max, bool shortest)

static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co)

static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end)

static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm)

static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end)

static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct dfa *d, struct dfa *s, chr **coldp)

static unsigned hash(unsigned *uv, int n)

static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end)

static struct dfa * getladfa(struct vars *v, int n)

static chr * lastcold(struct vars *v, struct dfa *d)

static struct dfa * getsubdfa(struct vars *v, struct subre *t)

static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)

static struct sset * getvacant(struct vars *v, struct dfa *d, chr *cp, chr *start)

static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end)

static struct sset * miss(struct vars *v, struct dfa *d, struct sset *css, color co, chr *cp, chr *start)

static struct dfa * newdfa(struct vars *v, struct cnfa *cnfa, struct colormap *cm, struct smalldfa *sml)

static struct sset * initialize(struct vars *v, struct dfa *d, chr *start)

static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end)

static void zaptreesubs(struct vars *v, struct subre *t)

static struct sset * pickss(struct vars *v, struct dfa *d, chr *cp, chr *start)

static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end)

static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end)

int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)

static int matchuntil(struct vars *v, struct dfa *d, chr *probe, struct sset **lastcss, chr **lastcp)

#define STACK_TOO_DEEP(re)

unsigned statesarea[FEWSTATES *2+WORK]

struct arcp incarea[FEWSTATES *2 *FEWCOLORS]

struct sset * outsarea[FEWSTATES *2 *FEWCOLORS]

struct sset ssets[FEWSTATES *2]