PostgreSQL Source Code: contrib/pg_trgm/trgm_gist.c Source File (original) (raw)
1
2
3
5
12
13
14typedef struct
15{
19
20#define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
21 ((TrgmGistOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
22 SIGLEN_DEFAULT)
23
24typedef struct
25{
26
29
31
33
34
35
36
37
39
40#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
41
42
54
55
58{
60 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
61 errmsg("cannot accept a value of type %s", "gtrgm")));
62
64}
65
68{
70 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
71 errmsg("cannot display a value of type %s", "gtrgm")));
72
74}
75
78{
82
85
86 if (!isalltrue)
87 {
90 else
91 memset(GETSIGN(res), 0, siglen);
92 }
93
94 return res;
95}
96
97static void
99{
104
107 for (k = 0; k < len; k++)
108 {
109 CPTRGM(&tmp, ptr + k);
111 }
112}
113
116{
120
122 {
125
130 entry->offset, false);
131 }
134 {
138
140 {
141 if ((sign[i] & 0xff) != 0xff)
143 }
144
149 entry->offset, false);
150 }
152}
153
156{
160
162
164 {
165
170 }
171 else
172 {
173
175 }
176}
177
180{
186
187 for (k = 0; k < len; k++)
188 {
189 CPTRGM(&tmp, ptr + k);
191 }
192
193 return count;
194}
195
198{
202
203
208 bool res;
211 double nlimit;
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
228 if (cache == NULL ||
229 cache->strategy != strategy ||
231 memcmp(cache->query, query, querysize) != 0)
232 {
235 Size qtrgsize;
236
237 switch (strategy)
238 {
245 break;
247#ifndef IGNORECASE
248 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
249#endif
250
254 break;
256#ifndef IGNORECASE
257 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
258#endif
259
262 &graph, fcinfo->flinfo->fn_mcxt);
263
264 if (qtrg && ARRNELEM(qtrg) <= 0)
265 {
267 qtrg = NULL;
268 }
269 break;
270 default:
271 elog(ERROR, "unrecognized strategy number: %d", strategy);
272 qtrg = NULL;
273 break;
274 }
275
276 qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
277
282 qtrgsize);
283
284 newcache->strategy = strategy;
287 memcpy(newcache->query, query, querysize);
288 if (qtrg)
289 {
291 ((char *) newcache->query + MAXALIGN(querysize));
292 memcpy((char *) newcache->trigrams, qtrg, qtrgsize);
293
295 }
296 else
298 newcache->graph = graph;
299
300 if (cache)
302 fcinfo->flinfo->fn_extra = newcache;
303 cache = newcache;
304 }
305
307
308 switch (strategy)
309 {
313
314
315
316
317
319
321
323 {
324 double tmpsml = cnt_sml(qtrg, key, *recheck);
325
326 res = (tmpsml >= nlimit);
327 }
329 {
330 res = true;
331 }
332 else
333 {
336
337 if (len == 0)
338 res = false;
339 else
341 }
342 break;
344#ifndef IGNORECASE
345 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
346#endif
347
350
351 *recheck = true;
352
353
354
355
356
358 {
360 }
362 {
363 res = true;
364 }
365 else
366 {
368 tmp = 0,
372
373 res = true;
374 for (k = 0; k < len; k++)
375 {
376 CPTRGM(&tmp, ptr + k);
378 {
379 res = false;
380 break;
381 }
382 }
383 }
384 break;
386#ifndef IGNORECASE
387 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
388#endif
389
391
392 *recheck = true;
393
394
395 if (qtrg)
396 {
398 {
399 bool *check;
400
404 }
406 {
407 res = true;
408 }
409 else
410 {
412 tmp = 0,
416 bool *check;
417
418
419
420
421
422
423
424
425
426 check = (bool *) palloc(len * sizeof(bool));
427 for (k = 0; k < len; k++)
428 {
429 CPTRGM(&tmp, ptr + k);
431 }
434 }
435 }
436 else
437 {
438
439 res = true;
440 }
441 break;
442 default:
443 elog(ERROR, "unrecognized strategy number: %d", strategy);
444 res = false;
445 break;
446 }
447
449}
450
453{
457
458
465 char *cache = (char *) fcinfo->flinfo->fn_extra;
466
467
468
469
470 if (cache == NULL ||
471 VARSIZE(cache) != querysize ||
472 memcmp(cache, query, querysize) != 0)
473 {
474 char *newcache;
475
477
481
482 memcpy(newcache, query, querysize);
483 memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
484
485 if (cache)
487 fcinfo->flinfo->fn_extra = newcache;
488 cache = newcache;
489 }
490
491 qtrg = (TRGM *) (cache + MAXALIGN(querysize));
492
493 switch (strategy)
494 {
498
501 {
502
503
504
505
506
507
509
510 res = 1.0 - sml;
511 }
513 {
514 res = 0.0;
515 }
516 else
517 {
520
522 }
523 break;
524 default:
525 elog(ERROR, "unrecognized strategy number: %d", strategy);
526 res = 0;
527 break;
528 }
529
531}
532
535{
537
539 {
541
543 return 1;
544
547 }
548 else
549 {
552
554 {
556 HASH(sbase, tmp, siglen);
557 }
558 }
559 return 0;
560}
561
562
565{
573
575 {
577 {
580 break;
581 }
582 }
583
585
587}
588
591{
596
598 {
600 *result = true;
602 *result = false;
604 *result = false;
605 else
606 {
610
611 *result = true;
613 {
615 {
616 *result = false;
617 break;
618 }
619 }
620 }
621 }
622 else
623 {
626
627 if (lena != lenb)
628 *result = false;
629 else
630 {
634
635 *result = true;
636 for (i = 0; i < lena; i++)
638 {
639 *result = false;
640 break;
641 }
642 }
643 }
644
646}
647
650{
652}
653
654static int
656{
657 int i,
658 diff,
659 dist = 0;
660
662 {
663 diff = (unsigned char) (a[i] ^ b[i]);
664
666 }
667 return dist;
668}
669
670static int
672{
674 {
676 return 0;
677 else
679 }
682
684}
685
688{
696
697 *penalty = 0.0;
698
700 {
701 char *cache = (char *) fcinfo->flinfo->fn_extra;
705
706
707
708
709 if (cache == NULL ||
710 VARSIZE(cachedVal) != newvalsize ||
711 memcmp(cachedVal, newval, newvalsize) != 0)
712 {
713 char *newcache;
714
717 newvalsize);
718
720
721 cachedVal = (TRGM *) (newcache + MAXALIGN(siglen));
722 memcpy(cachedVal, newval, newvalsize);
723
724 if (cache)
726 fcinfo->flinfo->fn_extra = newcache;
727 cache = newcache;
728 }
729
731
734 else
736 }
737 else
740}
741
742typedef struct
743{
747
748static void
750{
757 else
759}
760
761#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
762typedef struct
763{
767
768static int
770{
772 return 0;
773 else
774 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
775}
776
777
778static int
780{
781 if (a->allistrue)
782 {
783 if (b->allistrue)
784 return 0;
785 else
787 }
788 else if (b->allistrue)
790
792}
793
796{
802 j;
803 TRGM *datum_l,
804 *datum_r;
806 union_r;
807 int32 size_alpha,
808 size_beta;
809 int32 size_waste,
810 waste = -1;
813 seed_2 = 0;
815 *right;
817 int i;
819 char *cache_sign;
821
822
824 cache_sign = palloc(siglen * (maxoff + 1));
825
827 fillcache(&cache[k], GETENTRY(entryvec, k), &cache_sign[siglen * k],
828 siglen);
829
830
832 {
834 {
835 size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);
836 if (size_waste > waste)
837 {
838 waste = size_waste;
839 seed_1 = k;
840 seed_2 = j;
841 }
842 }
843 }
844
845
846 if (seed_1 == 0 || seed_2 == 0)
847 {
848 seed_1 = 1;
849 seed_2 = 2;
850 }
851
852
858
859
860 datum_l = gtrgm_alloc(cache[seed_1].allistrue, siglen, cache[seed_1].sign);
861 datum_r = gtrgm_alloc(cache[seed_2].allistrue, siglen, cache[seed_2].sign);
862
863 union_l = GETSIGN(datum_l);
864 union_r = GETSIGN(datum_r);
865
866
869 {
870 costvector[j - 1].pos = j;
871 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);
872 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);
873 costvector[j - 1].cost = abs(size_alpha - size_beta);
874 }
876
877 for (k = 0; k < maxoff; k++)
878 {
880 if (j == seed_1)
881 {
882 *left++ = j;
884 continue;
885 }
886 else if (j == seed_2)
887 {
888 *right++ = j;
890 continue;
891 }
892
893 if (ISALLTRUE(datum_l) || cache[j].allistrue)
894 {
895 if (ISALLTRUE(datum_l) && cache[j].allistrue)
896 size_alpha = 0;
897 else
901 siglen);
902 }
903 else
905
906 if (ISALLTRUE(datum_r) || cache[j].allistrue)
907 {
908 if (ISALLTRUE(datum_r) && cache[j].allistrue)
909 size_beta = 0;
910 else
914 siglen);
915 }
916 else
918
920 {
921 if (ISALLTRUE(datum_l) || cache[j].allistrue)
922 {
924 memset(GETSIGN(datum_l), 0xff, siglen);
925 }
926 else
927 {
931 }
932 *left++ = j;
934 }
935 else
936 {
937 if (ISALLTRUE(datum_r) || cache[j].allistrue)
938 {
940 memset(GETSIGN(datum_r), 0xff, siglen);
941 }
942 else
943 {
947 }
948 *right++ = j;
950 }
951 }
952
955
957}
958
961{
963
966 "signature length in bytes",
969
971}
#define MemSet(start, val, len)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_RETURN_FLOAT8(x)
#define DatumGetTextPP(X)
#define PG_GETARG_POINTER(n)
#define PG_GETARG_UINT16(n)
#define PG_RETURN_POINTER(x)
#define PG_GET_COLLATION()
#define PG_RETURN_BOOL(x)
#define PG_GETARG_TEXT_P(n)
#define gistentryinit(e, k, r, pg, o, l)
#define HASHVAL(val, siglen)
#define CALCGTSIZE(flag, siglen)
#define SIGLENBIT(siglen)
#define HASH(sign, val, siglen)
if(TABLE==NULL||TABLE_index==NULL)
void * MemoryContextAlloc(MemoryContext context, Size size)
void pfree(void *pointer)
#define OffsetNumberNext(offsetNumber)
#define FirstOffsetNumber
PGDLLIMPORT const uint8 pg_number_of_ones[256]
static uint64 pg_popcount(const char *buf, int bytes)
#define qsort(a, b, c, d)
static Datum PointerGetDatum(const void *X)
static Pointer DatumGetPointer(Datum X)
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
#define RegExpICaseStrategyNumber
#define WordSimilarityStrategyNumber
TRGM * generate_trgm(char *str, int slen)
#define DistanceStrategyNumber
#define StrictWordSimilarityStrategyNumber
#define StrictWordDistanceStrategyNumber
int(* CMPTRGM)(const void *a, const void *b)
bool * trgm_presence_map(TRGM *query, TRGM *key)
double index_strategy_get_limit(StrategyNumber strategy)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
#define SimilarityStrategyNumber
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
#define ILikeStrategyNumber
TRGM * generate_wildcard_trgm(const char *str, int slen)
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
#define LikeStrategyNumber
#define EqualStrategyNumber
#define RegExpStrategyNumber
#define WordDistanceStrategyNumber
Datum gtrgm_out(PG_FUNCTION_ARGS)
static int hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
static void fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
Datum gtrgm_same(PG_FUNCTION_ARGS)
static int32 unionkey(BITVECP sbase, TRGM *add, int siglen)
PG_FUNCTION_INFO_V1(gtrgm_in)
Datum gtrgm_distance(PG_FUNCTION_ARGS)
static int32 sizebitvec(BITVECP sign, int siglen)
Datum gtrgm_in(PG_FUNCTION_ARGS)
Datum gtrgm_options(PG_FUNCTION_ARGS)
static int hemdistsign(BITVECP a, BITVECP b, int siglen)
#define GETENTRY(vec, pos)
Datum gtrgm_compress(PG_FUNCTION_ARGS)
Datum gtrgm_picksplit(PG_FUNCTION_ARGS)
Datum gtrgm_decompress(PG_FUNCTION_ARGS)
static TRGM * gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)
Datum gtrgm_union(PG_FUNCTION_ARGS)
Datum gtrgm_penalty(PG_FUNCTION_ARGS)
static void makesign(BITVECP sign, TRGM *a, int siglen)
Datum gtrgm_consistent(PG_FUNCTION_ARGS)
static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign, int siglen)
static int hemdist(TRGM *a, TRGM *b, int siglen)
static int comparecost(const void *a, const void *b)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)