PostgreSQL Source Code: contrib/fuzzystrmatch/fuzzystrmatch.c File Reference (original) (raw)
#include "[postgres.h](postgres%5F8h%5Fsource.html)"
#include <ctype.h>
#include "[utils/builtins.h](builtins%5F8h%5Fsource.html)"
#include "[utils/varlena.h](varlena%5F8h%5Fsource.html)"
#include "[varatt.h](varatt%5F8h%5Fsource.html)"
Go to the source code of this file.
Macros | |
---|---|
#define | SOUNDEX_LEN 4 |
#define | MAX_METAPHONE_STRLEN 255 |
#define | SH 'X' |
#define | TH '0' |
#define | isvowel(c) (getcode(c) & 1) /* AEIOU */ |
#define | NOCHANGE(c) (getcode(c) & 2) /* FJMNR */ |
#define | AFFECTH(c) (getcode(c) & 4) /* CGPST */ |
#define | MAKESOFT(c) (getcode(c) & 8) /* EIY */ |
#define | NOGHTOF(c) (getcode(c) & 16) /* BDH */ |
#define | Next_Letter (toupper((unsigned char) word[w_idx+1])) |
#define | Curr_Letter (toupper((unsigned char) word[w_idx])) |
#define | Look_Back_Letter(n) (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0') |
#define | Prev_Letter (Look_Back_Letter(1)) |
#define | After_Next_Letter (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0') |
#define | Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n)) |
#define | Phonize(c) do {(*phoned_word)[p_idx++] = c;} while (0) |
#define | End_Phoned_Word do {(*phoned_word)[p_idx] = '\0';} while (0) |
#define | Phone_Len (p_idx) |
#define | Isbreak(c) (!isalpha((unsigned char) (c))) |
Functions | |
---|---|
PG_MODULE_MAGIC_EXT (.name="fuzzystrmatch",.version=PG_VERSION) | |
static void | _soundex (const char *instr, char *outstr) |
static char | soundex_code (char letter) |
static char | Lookahead (char *word, int how_far) |
static void | _metaphone (char *word, int max_phonemes, char **phoned_word) |
static int | getcode (char c) |
PG_FUNCTION_INFO_V1 (levenshtein_with_costs) | |
Datum | levenshtein_with_costs (PG_FUNCTION_ARGS) |
PG_FUNCTION_INFO_V1 (levenshtein) | |
Datum | levenshtein (PG_FUNCTION_ARGS) |
PG_FUNCTION_INFO_V1 (levenshtein_less_equal_with_costs) | |
Datum | levenshtein_less_equal_with_costs (PG_FUNCTION_ARGS) |
PG_FUNCTION_INFO_V1 (levenshtein_less_equal) | |
Datum | levenshtein_less_equal (PG_FUNCTION_ARGS) |
PG_FUNCTION_INFO_V1 (metaphone) | |
Datum | metaphone (PG_FUNCTION_ARGS) |
PG_FUNCTION_INFO_V1 (soundex) | |
Datum | soundex (PG_FUNCTION_ARGS) |
PG_FUNCTION_INFO_V1 (difference) | |
Datum | difference (PG_FUNCTION_ARGS) |
Variables | |
---|---|
static const char *const | soundex_table = "01230120022455012623010202" |
static const char | _codes [26] |
◆ AFFECTH
| #define AFFECTH | ( | | c | ) | (getcode(c) & 4) /* CGPST */ | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ After_Next_Letter
#define After_Next_Letter (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
◆ Curr_Letter
#define Curr_Letter (toupper((unsigned char) word[w_idx]))
◆ End_Phoned_Word
#define End_Phoned_Word do {(*phoned_word)[p_idx] = '\0';} while (0)
◆ Isbreak
| #define Isbreak | ( | | c | ) | (!isalpha((unsigned char) (c))) | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------- |
◆ isvowel
| #define isvowel | ( | | c | ) | (getcode(c) & 1) /* AEIOU */ | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ Look_Ahead_Letter
| #define Look_Ahead_Letter | ( | | n | ) | toupper((unsigned char) Lookahead(word+w_idx, n)) | | --------------------------- | - | | - | - | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ Look_Back_Letter
| #define Look_Back_Letter | ( | | n | ) | (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0') | | -------------------------- | - | | - | - | ---------------------------------------------------------------------------------------------------------------------------- |
◆ MAKESOFT
| #define MAKESOFT | ( | | c | ) | (getcode(c) & 8) /* EIY */ | | ---------------- | - | | --------------------------------------------------------------- | - | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ MAX_METAPHONE_STRLEN
#define MAX_METAPHONE_STRLEN 255
◆ Next_Letter
#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
◆ NOCHANGE
| #define NOCHANGE | ( | | c | ) | (getcode(c) & 2) /* FJMNR */ | | ---------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ NOGHTOF
| #define NOGHTOF | ( | | c | ) | (getcode(c) & 16) /* BDH */ | | --------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ Phone_Len
#define Phone_Len (p_idx)
◆ Phonize
| #define Phonize | ( | | c | ) | do {(*phoned_word)[p_idx++] = c;} while (0) | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ Prev_Letter
◆ SH
◆ SOUNDEX_LEN
◆ TH
◆ _metaphone()
static void _metaphone ( char * word, int max_phonemes, char ** phoned_word ) | static |
---|
Definition at line 347 of file fuzzystrmatch.c.
350{
351 int w_idx = 0;
352 int p_idx = 0;
353
354
355
356
357
358
359
360
361 if (!(max_phonemes > 0))
362
363 elog(ERROR, "metaphone: Requested output length must be > 0");
364
365
366 if ((word == NULL) || !(strlen(word) > 0))
367
368 elog(ERROR, "metaphone: Input string length must be > 0");
369
370
371 if (max_phonemes == 0)
372 {
373 *phoned_word = palloc(sizeof(char) * strlen(word) + 1);
374 }
375 else
376 {
377 *phoned_word = palloc(sizeof(char) * max_phonemes + 1);
378 }
379
380
381
382 for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)
383 {
384
386 {
388 return;
389 }
390 }
391
393 {
394
395 case 'A':
397 {
399 w_idx += 2;
400 }
401
402 else
403 {
405 w_idx++;
406 }
407 break;
408
409 case 'G':
410 case 'K':
411 case 'P':
413 {
415 w_idx += 2;
416 }
417 break;
418
419
420
421
422 case 'W':
425 {
427 w_idx += 2;
428 }
430 {
432 w_idx += 2;
433 }
434
435 break;
436
437 case 'X':
439 w_idx++;
440 break;
441
442
443
444
445
446 case 'E':
447 case 'I':
448 case 'O':
449 case 'U':
451 w_idx++;
452 break;
453 default:
454
455 break;
456 }
457
458
459
460
462 (max_phonemes == 0 || Phone_Len < max_phonemes);
463 w_idx++)
464 {
465
466
467
468
469 unsigned short int skip_letter = 0;
470
471
472
473
474
475
476
477
478
479
480
481 if (!isalpha((unsigned char) (Curr_Letter)))
482 continue;
483
484
487 continue;
488
490 {
491
492 case 'B':
495 break;
496
497
498
499
500
501
502 case 'C':
504 {
507 {
509 }
510
512 {
513
514 }
515 else
517 }
519 {
520#ifndef USE_TRADITIONAL_METAPHONE
523 {
525 }
526 else
528#else
530#endif
531 skip_letter++;
532 }
533 else
535 break;
536
537
538
539
540 case 'D':
543 {
545 skip_letter++;
546 }
547 else
549 break;
550
551
552
553
554
555
556
557 case 'G':
559 {
562 {
564 skip_letter++;
565 }
566 else
567 {
568
569 }
570 }
572 {
576 {
577
578 }
579 else
581 }
585 else
587 break;
588
589 case 'H':
593 break;
594
595
596
597
598 case 'K':
601 break;
602
603
604
605
606 case 'P':
609 else
611 break;
612
613
614
615
616 case 'Q':
618 break;
619
620
621
622
623 case 'S':
629 {
631 skip_letter++;
632 }
633#ifndef USE_TRADITIONAL_METAPHONE
637 {
639 skip_letter += 2;
640 }
641#endif
642 else
644 break;
645
646
647
648
649 case 'T':
655 {
657 skip_letter++;
658 }
659 else
661 break;
662
663 case 'V':
665 break;
666
667 case 'W':
670 break;
671
672 case 'X':
674 if (max_phonemes == 0 || Phone_Len < max_phonemes)
676 break;
677
678 case 'Y':
681 break;
682
683 case 'Z':
685 break;
686
687 case 'F':
688 case 'J':
689 case 'L':
690 case 'M':
691 case 'N':
692 case 'R':
694 break;
695 default:
696
697 break;
698 }
699
700 w_idx += skip_letter;
701 }
702
704}
#define After_Next_Letter
#define Look_Back_Letter(n)
#define Look_Ahead_Letter(n)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
References AFFECTH, After_Next_Letter, Curr_Letter, elog, End_Phoned_Word, ERROR, Isbreak, isvowel, Look_Ahead_Letter, Look_Back_Letter, MAKESOFT, Next_Letter, NOGHTOF, palloc(), Phone_Len, Phonize, Prev_Letter, SH, TH, and word().
Referenced by metaphone().
◆ _soundex()
static void _soundex ( const char * instr, char * outstr ) | static |
---|
Definition at line 726 of file fuzzystrmatch.c.
727{
728 int count;
729
732
733
734 while (*instr && !isalpha((unsigned char) *instr))
735 ++instr;
736
737
738 if (!*instr)
739 {
741 return;
742 }
743
744
745 *outstr++ = (char) toupper((unsigned char) *instr++);
746
747 count = 1;
749 {
750 if (isalpha((unsigned char) *instr) &&
752 {
754 if (*outstr != '0')
755 {
756 ++outstr;
757 ++count;
758 }
759 }
760 ++instr;
761 }
762
763
765 {
766 *outstr = '0';
767 ++outstr;
768 ++count;
769 }
770
771
772 *outstr = '\0';
773}
static char soundex_code(char letter)
Assert(PointerIsAligned(start, uint64))
References Assert(), soundex_code(), and SOUNDEX_LEN.
Referenced by difference(), and soundex().
◆ difference()
Definition at line 778 of file fuzzystrmatch.c.
779{
782 int i,
783 result;
784
787
788 result = 0;
790 {
792 result++;
793 }
794
796}
#define PG_GETARG_TEXT_PP(n)
#define PG_RETURN_INT32(x)
static void _soundex(const char *instr, char *outstr)
char * text_to_cstring(const text *t)
References _soundex(), i, PG_GETARG_TEXT_PP, PG_RETURN_INT32, SOUNDEX_LEN, and text_to_cstring().
Referenced by checkcondition_str(), ExtendMultiXactMember(), find_wordentry(), getKeyJsonValueFromContainer(), hstore_concat(), hstore_delete_array(), hstore_delete_hstore(), hstore_subscript_assign(), and hstoreFindKey().
◆ getcode()
static int getcode ( char c) | static |
---|
Definition at line 123 of file fuzzystrmatch.c.
124{
125 if (isalpha((unsigned char) c))
126 {
127 c = toupper((unsigned char) c);
128
131 }
132 return 0;
133}
static const char _codes[26]
References _codes.
◆ levenshtein()
Definition at line 177 of file fuzzystrmatch.c.
178{
181 const char *s_data;
182 const char *t_data;
183 int s_bytes,
184 t_bytes;
185
186
189
192
194 1, 1, 1, false));
195}
int varstr_levenshtein(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)
#define VARSIZE_ANY_EXHDR(PTR)
References PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein().
◆ levenshtein_less_equal()
Definition at line 229 of file fuzzystrmatch.c.
230{
234 const char *s_data;
235 const char *t_data;
236 int s_bytes,
237 t_bytes;
238
239
242
245
247 t_data, t_bytes,
248 1, 1, 1,
249 max_d, false));
250}
#define PG_GETARG_INT32(n)
int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)
References PG_GETARG_INT32, PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein_less_equal().
◆ levenshtein_less_equal_with_costs()
Definition at line 200 of file fuzzystrmatch.c.
201{
208 const char *s_data;
209 const char *t_data;
210 int s_bytes,
211 t_bytes;
212
213
216
219
221 t_data, t_bytes,
222 ins_c, del_c, sub_c,
223 max_d, false));
224}
References PG_GETARG_INT32, PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein_less_equal().
◆ levenshtein_with_costs()
◆ Lookahead()
static char Lookahead ( char * word, int how_far ) | static |
---|
Definition at line 321 of file fuzzystrmatch.c.
322{
323 char letter_ahead = '\0';
325
327
328
330
331 return letter_ahead;
332}
Datum idx(PG_FUNCTION_ARGS)
◆ metaphone()
Definition at line 260 of file fuzzystrmatch.c.
261{
263 size_t str_i_len = strlen(str_i);
264 int reqlen;
265 char *metaph;
266
267
268 if (!(str_i_len > 0))
270
273 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
274 errmsg("argument exceeds the maximum length of %d bytes",
276
280 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
281 errmsg("output exceeds the maximum length of %d bytes",
283
284 if (!(reqlen > 0))
286 (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
287 errmsg("output cannot be empty string")));
288
291}
#define TextDatumGetCString(d)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_GETARG_DATUM(n)
#define PG_RETURN_TEXT_P(x)
static void _metaphone(char *word, int max_phonemes, char **phoned_word)
#define MAX_METAPHONE_STRLEN
text * cstring_to_text(const char *s)
References _metaphone(), cstring_to_text(), ereport, errcode(), errmsg(), ERROR, MAX_METAPHONE_STRLEN, PG_GETARG_DATUM, PG_GETARG_INT32, PG_RETURN_TEXT_P, and TextDatumGetCString.
◆ PG_FUNCTION_INFO_V1() [1/7]
◆ PG_FUNCTION_INFO_V1() [2/7]
◆ PG_FUNCTION_INFO_V1() [3/7]
◆ PG_FUNCTION_INFO_V1() [4/7]
◆ PG_FUNCTION_INFO_V1() [5/7]
◆ PG_FUNCTION_INFO_V1() [6/7]
◆ PG_FUNCTION_INFO_V1() [7/7]
◆ PG_MODULE_MAGIC_EXT()
PG_MODULE_MAGIC_EXT | ( | . | name = "fuzzystrmatch", |
---|---|---|---|
. | version = PG_VERSION | ||
) |
◆ soundex()
Definition at line 713 of file fuzzystrmatch.c.
714{
716 char *arg;
717
719
721
723}
References _soundex(), arg, cstring_to_text(), PG_GETARG_TEXT_PP, PG_RETURN_TEXT_P, SOUNDEX_LEN, and text_to_cstring().
Referenced by daitch_mokotoff(), daitch_mokotoff_coding(), find_or_create_child_node(), update_leaves(), and update_node().
◆ soundex_code()
static char soundex_code ( char letter) | static |
---|
Definition at line 63 of file fuzzystrmatch.c.
64{
65 letter = toupper((unsigned char) letter);
66
67 if (letter >= 'A' && letter <= 'Z')
69 return letter;
70}
static const char *const soundex_table
References soundex_table.
Referenced by _soundex().
◆ _codes
Initial value:
= {
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
}
Definition at line 117 of file fuzzystrmatch.c.
Referenced by getcode().
◆ soundex_table
const char* const soundex_table = "01230120022455012623010202" | static |
---|