PostgreSQL Source Code: contrib/fuzzystrmatch/fuzzystrmatch.c Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
40
41#include <ctype.h>
42
46
48 .name = "fuzzystrmatch",
49 .version = PG_VERSION
50);
51
52
53
54
55static void _soundex(const char *instr, char *outstr);
56
57#define SOUNDEX_LEN 4
58
59
60static const char *const soundex_table = "01230120022455012623010202";
61
62static char
64{
65 letter = toupper((unsigned char) letter);
66
67 if (letter >= 'A' && letter <= 'Z')
69 return letter;
70}
71
72
73
74
75#define MAX_METAPHONE_STRLEN 255
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103#undef USE_TRADITIONAL_METAPHONE
104
105
106#define SH 'X'
107#define TH '0'
108
110static void _metaphone(char *word, int max_phonemes, char **phoned_word);
111
112
113
114
115
116
118 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
119
120};
121
122static int
124{
125 if (isalpha((unsigned char) c))
126 {
127 c = toupper((unsigned char) c);
128
131 }
132 return 0;
133}
134
135#define isvowel(c) (getcode(c) & 1)
136
137
138#define NOCHANGE(c) (getcode(c) & 2)
139
140
141#define AFFECTH(c) (getcode(c) & 4)
142
143
144#define MAKESOFT(c) (getcode(c) & 8)
145
146
147#define NOGHTOF(c) (getcode(c) & 16)
148
152{
158 const char *s_data;
159 const char *t_data;
160 int s_bytes,
161 t_bytes;
162
163
166
169
171 ins_c, del_c, sub_c, false));
172}
173
174
178{
181 const char *s_data;
182 const char *t_data;
183 int s_bytes,
184 t_bytes;
185
186
189
192
194 1, 1, 1, false));
195}
196
197
201{
208 const char *s_data;
209 const char *t_data;
210 int s_bytes,
211 t_bytes;
212
213
216
219
221 t_data, t_bytes,
222 ins_c, del_c, sub_c,
223 max_d, false));
224}
225
226
230{
234 const char *s_data;
235 const char *t_data;
236 int s_bytes,
237 t_bytes;
238
239
242
245
247 t_data, t_bytes,
248 1, 1, 1,
249 max_d, false));
250}
251
252
253
254
255
256
257
261{
263 size_t str_i_len = strlen(str_i);
264 int reqlen;
265 char *metaph;
266
267
268 if (!(str_i_len > 0))
270
273 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
274 errmsg("argument exceeds the maximum length of %d bytes",
276
280 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
281 errmsg("output exceeds the maximum length of %d bytes",
283
284 if (!(reqlen > 0))
286 (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
287 errmsg("output cannot be empty string")));
288
291}
292
293
294
295
296
297
298
299
300
301
302
303
304#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
305
306#define Curr_Letter (toupper((unsigned char) word[w_idx]))
307
308#define Look_Back_Letter(n) \
309 (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
310
311#define Prev_Letter (Look_Back_Letter(1))
312
313#define After_Next_Letter \
314 (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
315#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
316
317
318
319
320static char
322{
323 char letter_ahead = '\0';
325
327
328
330
331 return letter_ahead;
332}
333
334
335
336#define Phonize(c) do {(*phoned_word)[p_idx++] = c;} while (0)
337
338#define End_Phoned_Word do {(*phoned_word)[p_idx] = '\0';} while (0)
339
340#define Phone_Len (p_idx)
341
342
343#define Isbreak(c) (!isalpha((unsigned char) (c)))
344
345
346static void
348 int max_phonemes,
349 char **phoned_word)
350{
351 int w_idx = 0;
352 int p_idx = 0;
353
354
355
356
357
358
359
360
361 if (!(max_phonemes > 0))
362
363 elog(ERROR, "metaphone: Requested output length must be > 0");
364
365
366 if ((word == NULL) || !(strlen(word) > 0))
367
368 elog(ERROR, "metaphone: Input string length must be > 0");
369
370
371 if (max_phonemes == 0)
372 {
373 *phoned_word = palloc(sizeof(char) * strlen(word) + 1);
374 }
375 else
376 {
377 *phoned_word = palloc(sizeof(char) * max_phonemes + 1);
378 }
379
380
381
382 for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)
383 {
384
386 {
388 return;
389 }
390 }
391
393 {
394
395 case 'A':
397 {
399 w_idx += 2;
400 }
401
402 else
403 {
405 w_idx++;
406 }
407 break;
408
409 case 'G':
410 case 'K':
411 case 'P':
413 {
415 w_idx += 2;
416 }
417 break;
418
419
420
421
422 case 'W':
425 {
427 w_idx += 2;
428 }
430 {
432 w_idx += 2;
433 }
434
435 break;
436
437 case 'X':
439 w_idx++;
440 break;
441
442
443
444
445
446 case 'E':
447 case 'I':
448 case 'O':
449 case 'U':
451 w_idx++;
452 break;
453 default:
454
455 break;
456 }
457
458
459
460
462 (max_phonemes == 0 || Phone_Len < max_phonemes);
463 w_idx++)
464 {
465
466
467
468
469 unsigned short int skip_letter = 0;
470
471
472
473
474
475
476
477
478
479
480
481 if (!isalpha((unsigned char) (Curr_Letter)))
482 continue;
483
484
487 continue;
488
490 {
491
492 case 'B':
495 break;
496
497
498
499
500
501
502 case 'C':
504 {
507 {
509 }
510
512 {
513
514 }
515 else
517 }
519 {
520#ifndef USE_TRADITIONAL_METAPHONE
523 {
525 }
526 else
528#else
530#endif
531 skip_letter++;
532 }
533 else
535 break;
536
537
538
539
540 case 'D':
543 {
545 skip_letter++;
546 }
547 else
549 break;
550
551
552
553
554
555
556
557 case 'G':
559 {
562 {
564 skip_letter++;
565 }
566 else
567 {
568
569 }
570 }
572 {
576 {
577
578 }
579 else
581 }
585 else
587 break;
588
589 case 'H':
593 break;
594
595
596
597
598 case 'K':
601 break;
602
603
604
605
606 case 'P':
609 else
611 break;
612
613
614
615
616 case 'Q':
618 break;
619
620
621
622
623 case 'S':
629 {
631 skip_letter++;
632 }
633#ifndef USE_TRADITIONAL_METAPHONE
637 {
639 skip_letter += 2;
640 }
641#endif
642 else
644 break;
645
646
647
648
649 case 'T':
655 {
657 skip_letter++;
658 }
659 else
661 break;
662
663 case 'V':
665 break;
666
667 case 'W':
670 break;
671
672 case 'X':
674 if (max_phonemes == 0 || Phone_Len < max_phonemes)
676 break;
677
678 case 'Y':
681 break;
682
683 case 'Z':
685 break;
686
687 case 'F':
688 case 'J':
689 case 'L':
690 case 'M':
691 case 'N':
692 case 'R':
694 break;
695 default:
696
697 break;
698 }
699
700 w_idx += skip_letter;
701 }
702
704}
705
706
707
708
709
711
714{
716 char *arg;
717
719
721
723}
724
725static void
727{
728 int count;
729
732
733
734 while (*instr && !isalpha((unsigned char) *instr))
735 ++instr;
736
737
738 if (!*instr)
739 {
741 return;
742 }
743
744
745 *outstr++ = (char) toupper((unsigned char) *instr++);
746
747 count = 1;
749 {
750 if (isalpha((unsigned char) *instr) &&
752 {
754 if (*outstr != '0')
755 {
756 ++outstr;
757 ++count;
758 }
759 }
760 ++instr;
761 }
762
763
765 {
766 *outstr = '0';
767 ++outstr;
768 ++count;
769 }
770
771
772 *outstr = '\0';
773}
774
776
779{
782 int i,
783 result;
784
787
788 result = 0;
790 {
792 result++;
793 }
794
796}
Datum idx(PG_FUNCTION_ARGS)
#define TextDatumGetCString(d)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_GETARG_TEXT_PP(n)
#define PG_GETARG_DATUM(n)
#define PG_RETURN_TEXT_P(x)
#define PG_RETURN_INT32(x)
#define PG_GETARG_INT32(n)
Datum metaphone(PG_FUNCTION_ARGS)
static void _metaphone(char *word, int max_phonemes, char **phoned_word)
#define After_Next_Letter
Datum levenshtein_less_equal_with_costs(PG_FUNCTION_ARGS)
#define Look_Back_Letter(n)
static const char *const soundex_table
Datum soundex(PG_FUNCTION_ARGS)
static char soundex_code(char letter)
static const char _codes[26]
Datum levenshtein_with_costs(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="fuzzystrmatch",.version=PG_VERSION)
static void _soundex(const char *instr, char *outstr)
static int getcode(char c)
Datum difference(PG_FUNCTION_ARGS)
#define MAX_METAPHONE_STRLEN
static char Lookahead(char *word, int how_far)
Datum levenshtein_less_equal(PG_FUNCTION_ARGS)
#define Look_Ahead_Letter(n)
PG_FUNCTION_INFO_V1(levenshtein_with_costs)
Datum levenshtein(PG_FUNCTION_ARGS)
Assert(PointerIsAligned(start, uint64))
int varstr_levenshtein(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
#define VARSIZE_ANY_EXHDR(PTR)
text * cstring_to_text(const char *s)
char * text_to_cstring(const text *t)
int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)