PostgreSQL Source Code: contrib/pg_trgm/trgm_gist.c Source File (original) (raw)

1

2

3

5

12

13

14typedef struct

15{

16 int32 vl_len_;

17 int siglen;

19

20#define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \

21 ((TrgmGistOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \

22 SIGLEN_DEFAULT)

23

24typedef struct

25{

26

29

31

33

34

35

36

37

39

40#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))

41

42

54

55

58{

60 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

61 errmsg("cannot accept a value of type %s", "gtrgm")));

62

64}

65

68{

70 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

71 errmsg("cannot display a value of type %s", "gtrgm")));

72

74}

75

78{

82

85

86 if (!isalltrue)

87 {

90 else

91 memset(GETSIGN(res), 0, siglen);

92 }

93

94 return res;

95}

96

97static void

99{

104

107 for (k = 0; k < len; k++)

108 {

109 CPTRGM(&tmp, ptr + k);

111 }

112}

113

116{

120

122 {

125

130 entry->offset, false);

131 }

134 {

138

140 {

141 if ((sign[i] & 0xff) != 0xff)

143 }

144

149 entry->offset, false);

150 }

152}

153

156{

160

162

164 {

165

170 }

171 else

172 {

173

175 }

176}

177

180{

186

187 for (k = 0; k < len; k++)

188 {

189 CPTRGM(&tmp, ptr + k);

191 }

192

193 return count;

194}

195

198{

202

203

208 bool res;

211 double nlimit;

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

228 if (cache == NULL ||

229 cache->strategy != strategy ||

231 memcmp(cache->query, query, querysize) != 0)

232 {

235 Size qtrgsize;

236

237 switch (strategy)

238 {

245 break;

247#ifndef IGNORECASE

248 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");

249#endif

250

254 break;

256#ifndef IGNORECASE

257 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");

258#endif

259

262 &graph, fcinfo->flinfo->fn_mcxt);

263

264 if (qtrg && ARRNELEM(qtrg) <= 0)

265 {

267 qtrg = NULL;

268 }

269 break;

270 default:

271 elog(ERROR, "unrecognized strategy number: %d", strategy);

272 qtrg = NULL;

273 break;

274 }

275

276 qtrgsize = qtrg ? VARSIZE(qtrg) : 0;

277

282 qtrgsize);

283

284 newcache->strategy = strategy;

287 memcpy(newcache->query, query, querysize);

288 if (qtrg)

289 {

291 ((char *) newcache->query + MAXALIGN(querysize));

292 memcpy((char *) newcache->trigrams, qtrg, qtrgsize);

293

295 }

296 else

298 newcache->graph = graph;

299

300 if (cache)

302 fcinfo->flinfo->fn_extra = newcache;

303 cache = newcache;

304 }

305

307

308 switch (strategy)

309 {

313

314

315

316

317

319

321

323 {

324 double tmpsml = cnt_sml(qtrg, key, *recheck);

325

326 res = (tmpsml >= nlimit);

327 }

329 {

330 res = true;

331 }

332 else

333 {

336

337 if (len == 0)

338 res = false;

339 else

341 }

342 break;

344#ifndef IGNORECASE

345 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");

346#endif

347

350

351 *recheck = true;

352

353

354

355

356

358 {

360 }

362 {

363 res = true;

364 }

365 else

366 {

368 tmp = 0,

372

373 res = true;

374 for (k = 0; k < len; k++)

375 {

376 CPTRGM(&tmp, ptr + k);

378 {

379 res = false;

380 break;

381 }

382 }

383 }

384 break;

386#ifndef IGNORECASE

387 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");

388#endif

389

391

392 *recheck = true;

393

394

395 if (qtrg)

396 {

398 {

399 bool *check;

400

404 }

406 {

407 res = true;

408 }

409 else

410 {

412 tmp = 0,

416 bool *check;

417

418

419

420

421

422

423

424

425

426 check = (bool *) palloc(len * sizeof(bool));

427 for (k = 0; k < len; k++)

428 {

429 CPTRGM(&tmp, ptr + k);

431 }

434 }

435 }

436 else

437 {

438

439 res = true;

440 }

441 break;

442 default:

443 elog(ERROR, "unrecognized strategy number: %d", strategy);

444 res = false;

445 break;

446 }

447

449}

450

453{

457

458

465 char *cache = (char *) fcinfo->flinfo->fn_extra;

466

467

468

469

470 if (cache == NULL ||

471 VARSIZE(cache) != querysize ||

472 memcmp(cache, query, querysize) != 0)

473 {

474 char *newcache;

475

477

481

482 memcpy(newcache, query, querysize);

483 memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));

484

485 if (cache)

487 fcinfo->flinfo->fn_extra = newcache;

488 cache = newcache;

489 }

490

491 qtrg = (TRGM *) (cache + MAXALIGN(querysize));

492

493 switch (strategy)

494 {

498

501 {

502

503

504

505

506

507

509

510 res = 1.0 - sml;

511 }

513 {

514 res = 0.0;

515 }

516 else

517 {

520

522 }

523 break;

524 default:

525 elog(ERROR, "unrecognized strategy number: %d", strategy);

526 res = 0;

527 break;

528 }

529

531}

532

535{

537

539 {

541

543 return 1;

544

546 sbase[i] |= sadd[i];

547 }

548 else

549 {

552

554 {

556 HASH(sbase, tmp, siglen);

557 }

558 }

559 return 0;

560}

561

562

565{

573

574 for (i = 0; i < len; i++)

575 {

577 {

580 break;

581 }

582 }

583

585

587}

588

591{

596

598 {

600 *result = true;

602 *result = false;

604 *result = false;

605 else

606 {

610

611 *result = true;

613 {

614 if (sa[i] != sb[i])

615 {

616 *result = false;

617 break;

618 }

619 }

620 }

621 }

622 else

623 {

626

627 if (lena != lenb)

628 *result = false;

629 else

630 {

634

635 *result = true;

636 for (i = 0; i < lena; i++)

638 {

639 *result = false;

640 break;

641 }

642 }

643 }

644

646}

647

650{

652}

653

654static int

656{

657 int i,

658 diff,

659 dist = 0;

660

662 {

663 diff = (unsigned char) (a[i] ^ b[i]);

664

666 }

667 return dist;

668}

669

670static int

672{

674 {

676 return 0;

677 else

679 }

682

684}

685

688{

696

697 *penalty = 0.0;

698

700 {

701 char *cache = (char *) fcinfo->flinfo->fn_extra;

705

706

707

708

709 if (cache == NULL ||

710 VARSIZE(cachedVal) != newvalsize ||

711 memcmp(cachedVal, newval, newvalsize) != 0)

712 {

713 char *newcache;

714

717 newvalsize);

718

720

721 cachedVal = (TRGM *) (newcache + MAXALIGN(siglen));

722 memcpy(cachedVal, newval, newvalsize);

723

724 if (cache)

726 fcinfo->flinfo->fn_extra = newcache;

727 cache = newcache;

728 }

729

731

734 else

736 }

737 else

740}

741

742typedef struct

743{

747

748static void

750{

757 else

759}

760

761#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )

762typedef struct

763{

767

768static int

770{

772 return 0;

773 else

774 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;

775}

776

777

778static int

780{

781 if (a->allistrue)

782 {

783 if (b->allistrue)

784 return 0;

785 else

787 }

788 else if (b->allistrue)

790

792}

793

796{

802 j;

803 TRGM *datum_l,

804 *datum_r;

806 union_r;

807 int32 size_alpha,

808 size_beta;

809 int32 size_waste,

810 waste = -1;

813 seed_2 = 0;

815 *right;

817 int i;

819 char *cache_sign;

821

822

824 cache_sign = palloc(siglen * (maxoff + 1));

825

827 fillcache(&cache[k], GETENTRY(entryvec, k), &cache_sign[siglen * k],

828 siglen);

829

830

832 {

834 {

835 size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);

836 if (size_waste > waste)

837 {

838 waste = size_waste;

839 seed_1 = k;

840 seed_2 = j;

841 }

842 }

843 }

844

845

846 if (seed_1 == 0 || seed_2 == 0)

847 {

848 seed_1 = 1;

849 seed_2 = 2;

850 }

851

852

858

859

860 datum_l = gtrgm_alloc(cache[seed_1].allistrue, siglen, cache[seed_1].sign);

861 datum_r = gtrgm_alloc(cache[seed_2].allistrue, siglen, cache[seed_2].sign);

862

863 union_l = GETSIGN(datum_l);

864 union_r = GETSIGN(datum_r);

865

866

869 {

870 costvector[j - 1].pos = j;

871 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);

872 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);

873 costvector[j - 1].cost = abs(size_alpha - size_beta);

874 }

876

877 for (k = 0; k < maxoff; k++)

878 {

879 j = costvector[k].pos;

880 if (j == seed_1)

881 {

882 *left++ = j;

884 continue;

885 }

886 else if (j == seed_2)

887 {

888 *right++ = j;

890 continue;

891 }

892

893 if (ISALLTRUE(datum_l) || cache[j].allistrue)

894 {

895 if (ISALLTRUE(datum_l) && cache[j].allistrue)

896 size_alpha = 0;

897 else

901 siglen);

902 }

903 else

905

906 if (ISALLTRUE(datum_r) || cache[j].allistrue)

907 {

908 if (ISALLTRUE(datum_r) && cache[j].allistrue)

909 size_beta = 0;

910 else

914 siglen);

915 }

916 else

918

920 {

921 if (ISALLTRUE(datum_l) || cache[j].allistrue)

922 {

924 memset(GETSIGN(datum_l), 0xff, siglen);

925 }

926 else

927 {

928 ptr = cache[j].sign;

930 union_l[i] |= ptr[i];

931 }

932 *left++ = j;

934 }

935 else

936 {

937 if (ISALLTRUE(datum_r) || cache[j].allistrue)

938 {

940 memset(GETSIGN(datum_r), 0xff, siglen);

941 }

942 else

943 {

944 ptr = cache[j].sign;

946 union_r[i] |= ptr[i];

947 }

948 *right++ = j;

950 }

951 }

952

955

957}

958

961{

963

966 "signature length in bytes",

969

971}

#define MemSet(start, val, len)

int errcode(int sqlerrcode)

int errmsg(const char *fmt,...)

#define ereport(elevel,...)

#define PG_RETURN_FLOAT8(x)

#define DatumGetTextPP(X)

#define PG_GETARG_POINTER(n)

#define PG_GETARG_UINT16(n)

#define PG_RETURN_POINTER(x)

#define PG_GET_COLLATION()

#define PG_RETURN_BOOL(x)

#define PG_GETARG_TEXT_P(n)

#define gistentryinit(e, k, r, pg, o, l)

#define HASHVAL(val, siglen)

#define CALCGTSIZE(flag, siglen)

#define SIGLENBIT(siglen)

#define HASH(sign, val, siglen)

if(TABLE==NULL||TABLE_index==NULL)

void * MemoryContextAlloc(MemoryContext context, Size size)

void pfree(void *pointer)

#define OffsetNumberNext(offsetNumber)

#define FirstOffsetNumber

PGDLLIMPORT const uint8 pg_number_of_ones[256]

static uint64 pg_popcount(const char *buf, int bytes)

#define qsort(a, b, c, d)

static Datum PointerGetDatum(const void *X)

static Pointer DatumGetPointer(Datum X)

void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)

void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)

#define RegExpICaseStrategyNumber

#define WordSimilarityStrategyNumber

TRGM * generate_trgm(char *str, int slen)

#define DistanceStrategyNumber

#define StrictWordSimilarityStrategyNumber

#define StrictWordDistanceStrategyNumber

int(* CMPTRGM)(const void *a, const void *b)

bool * trgm_presence_map(TRGM *query, TRGM *key)

double index_strategy_get_limit(StrategyNumber strategy)

TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)

#define SimilarityStrategyNumber

bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)

bool trgm_contained_by(TRGM *trg1, TRGM *trg2)

#define ILikeStrategyNumber

TRGM * generate_wildcard_trgm(const char *str, int slen)

float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)

#define LikeStrategyNumber

#define EqualStrategyNumber

#define RegExpStrategyNumber

#define WordDistanceStrategyNumber

Datum gtrgm_out(PG_FUNCTION_ARGS)

static int hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)

static void fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)

Datum gtrgm_same(PG_FUNCTION_ARGS)

static int32 unionkey(BITVECP sbase, TRGM *add, int siglen)

PG_FUNCTION_INFO_V1(gtrgm_in)

Datum gtrgm_distance(PG_FUNCTION_ARGS)

static int32 sizebitvec(BITVECP sign, int siglen)

Datum gtrgm_in(PG_FUNCTION_ARGS)

Datum gtrgm_options(PG_FUNCTION_ARGS)

static int hemdistsign(BITVECP a, BITVECP b, int siglen)

#define GETENTRY(vec, pos)

Datum gtrgm_compress(PG_FUNCTION_ARGS)

Datum gtrgm_picksplit(PG_FUNCTION_ARGS)

Datum gtrgm_decompress(PG_FUNCTION_ARGS)

static TRGM * gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)

Datum gtrgm_union(PG_FUNCTION_ARGS)

Datum gtrgm_penalty(PG_FUNCTION_ARGS)

static void makesign(BITVECP sign, TRGM *a, int siglen)

Datum gtrgm_consistent(PG_FUNCTION_ARGS)

static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign, int siglen)

static int hemdist(TRGM *a, TRGM *b, int siglen)

static int comparecost(const void *a, const void *b)

#define SET_VARSIZE(PTR, len)

#define VARSIZE_ANY_EXHDR(PTR)