PostgreSQL Source Code: contrib/fuzzystrmatch/dmetaphone.c Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#ifndef DMETAPHONE_MAIN
98
100
102
103
104#define NDEBUG
105
106#else
107
108
109#include <stdio.h>
110#include <stdlib.h>
112#include <stdarg.h>
113
114#endif
115
116#include <assert.h>
117#include <ctype.h>
118
119
121
122#ifndef DMETAPHONE_MAIN
123
124
125
126
127
129
132{
134 char *aptr,
135 *codes[2],
136 *code;
137
138#ifdef DMETAPHONE_NOSTRICT
141#endif
144
146 code = codes[0];
147 if (!code)
148 code = "";
149
151}
152
153
154
155
156
158
161{
163 char *aptr,
164 *codes[2],
165 *code;
166
167#ifdef DMETAPHONE_NOSTRICT
170#endif
173
175 code = codes[1];
176 if (!code)
177 code = "";
178
180}
181
182
183
184
185
186
187#define META_MALLOC(v,n,t) \
188 (v = (t*)palloc(((n)*sizeof(t))))
189
190#define META_REALLOC(v,n,t) \
191 (v = (t*)repalloc((v),((n)*sizeof(t))))
192
193
194
195
196
197
198
199
200#define META_FREE(x) ((void)true)
201#else
202
203
204
205#define META_MALLOC(v,n,t) \
206 (v = (t*)malloc(((n)*sizeof(t))))
207
208#define META_REALLOC(v,n,t) \
209 (v = (t*)realloc((v),((n)*sizeof(t))))
210
211#define META_FREE(x) free((x))
212#endif
213
214
215
216
217
218typedef struct
219{
224}
225
227
228
229
230
231
232
233
236{
238 char empty_string[] = "";
239
242
243 if (init_str == NULL)
244 init_str = empty_string;
245 s->length = strlen(init_str);
246
248
251
252 memcpy(s->str, init_str, s->length + 1);
254
255 return s;
256}
257
258
259static void
261{
262 if (s == NULL)
263 return;
264
267
269}
270
271
272static void
274{
278}
279
280
281static void
283{
284 char *i;
285
287 *i = toupper((unsigned char) *i);
288}
289
290
291static int
293{
294 char c;
295
296 if ((pos < 0) || (pos >= s->length))
297 return 0;
298
300 if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
302 return 1;
303
304 return 0;
305}
306
307
308static int
310{
311 if (strstr(s->str, "W"))
312 return 1;
313 else if (strstr(s->str, "K"))
314 return 1;
315 else if (strstr(s->str, "CZ"))
316 return 1;
317 else if (strstr(s->str, "WITZ"))
318 return 1;
319 else
320 return 0;
321}
322
323
324static char
326{
327 if ((pos < 0) || (pos >= s->length))
328 return '\0';
329
330 return ((char) *(s->str + pos));
331}
332
333
334static void
336{
337 if ((pos < 0) || (pos >= s->length))
338 return;
339
341}
342
343
344
345
346
347static int
349{
351 char *pos;
352 va_list ap;
353
355 return 0;
356
358 va_start(ap, length);
359
360 do
361 {
362 test = va_arg(ap, char *);
363 if (*test && (strncmp(pos, test, length) == 0))
364 {
365 va_end(ap);
366 return 1;
367 }
368 }
369 while (strcmp(test, "") != 0);
370
371 va_end(ap);
372
373 return 0;
374}
375
376
377static void
379{
380 int add_length;
381
382 if (new_str == NULL)
383 return;
384
385 add_length = strlen(new_str);
388
389 strcat(s->str, new_str);
390 s->length += add_length;
391}
392
393
394static void
396{
397 int length;
401 int current;
402 int last;
403
404 current = 0;
405
406 length = strlen(str);
407 last = length - 1;
409
411
416
418
419
420 if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
421 current += 1;
422
423
424 if (GetAt(original, 0) == 'X')
425 {
426 MetaphAdd(primary, "S");
428 current += 1;
429 }
430
431
432 while ((primary->length < 4) || (secondary->length < 4))
433 {
434 if (current >= length)
435 break;
436
437 switch (GetAt(original, current))
438 {
439 case 'A':
440 case 'E':
441 case 'I':
442 case 'O':
443 case 'U':
444 case 'Y':
445 if (current == 0)
446 {
447
450 }
451 current += 1;
452 break;
453
454 case 'B':
455
456
459
460 if (GetAt(original, current + 1) == 'B')
461 current += 2;
462 else
463 current += 1;
464 break;
465
466 case '\xc7':
469 current += 1;
470 break;
471
472 case 'C':
473
474 if ((current > 1)
475 && (original, current - 2)
476 && StringAt(original, (current - 1), 3, "ACH", "")
477 && ((GetAt(original, current + 2) != 'I')
478 && ((GetAt(original, current + 2) != 'E')
479 || StringAt(original, (current - 2), 6, "BACHER",
480 "MACHER", ""))))
481 {
484 current += 2;
485 break;
486 }
487
488
489 if ((current == 0)
490 && StringAt(original, current, 6, "CAESAR", ""))
491 {
494 current += 2;
495 break;
496 }
497
498
499 if (StringAt(original, current, 4, "CHIA", ""))
500 {
503 current += 2;
504 break;
505 }
506
507 if (StringAt(original, current, 2, "CH", ""))
508 {
509
510 if ((current > 0)
511 && StringAt(original, current, 4, "CHAE", ""))
512 {
515 current += 2;
516 break;
517 }
518
519
520 if ((current == 0)
521 && (StringAt(original, (current + 1), 5,
522 "HARAC", "HARIS", "")
523 || StringAt(original, (current + 1), 3, "HOR",
524 "HYM", "HIA", "HEM", ""))
525 && (original, 0, 5, "CHORE", ""))
526 {
529 current += 2;
530 break;
531 }
532
533
534 if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
535 || StringAt(original, 0, 3, "SCH", ""))
536
537 || StringAt(original, (current - 2), 6, "ORCHES",
538 "ARCHIT", "ORCHID", "")
539 || StringAt(original, (current + 2), 1, "T", "S",
540 "")
541 || ((StringAt(original, (current - 1), 1,
542 "A", "O", "U", "E", "")
543 || (current == 0))
544
545
546
547
548 && StringAt(original, (current + 2), 1, "L", "R",
549 "N", "M", "B", "H", "F", "V", "W",
550 " ", "")))
551 {
554 }
555 else
556 {
557 if (current > 0)
558 {
559 if (StringAt(original, 0, 2, "MC", ""))
560 {
561
564 }
565 else
566 {
569 }
570 }
571 else
572 {
575 }
576 }
577 current += 2;
578 break;
579 }
580
581 if (StringAt(original, current, 2, "CZ", "")
582 && (original, (current - 2), 4, "WICZ", ""))
583 {
586 current += 2;
587 break;
588 }
589
590
591 if (StringAt(original, (current + 1), 3, "CIA", ""))
592 {
595 current += 3;
596 break;
597 }
598
599
600 if (StringAt(original, current, 2, "CC", "")
601 && !((current == 1) && (GetAt(original, 0) == 'M')))
602 {
603
604 if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
605 && (original, (current + 2), 2, "HU", ""))
606 {
607
608 if (((current == 1)
609 && (GetAt(original, current - 1) == 'A'))
610 || StringAt(original, (current - 1), 5, "UCCEE",
611 "UCCES", ""))
612 {
615
616 }
617 else
618 {
621 }
622 current += 3;
623 break;
624 }
625 else
626 {
629 current += 2;
630 break;
631 }
632 }
633
634 if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
635 {
638 current += 2;
639 break;
640 }
641
642 if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
643 {
644
646 (original, current, 3, "CIO", "CIE", "CIA", ""))
647 {
650 }
651 else
652 {
655 }
656 current += 2;
657 break;
658 }
659
660
663
664
665 if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
666 current += 3;
667 else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
668 && (original, (current + 1), 2,
669 "CE", "CI", ""))
670 current += 2;
671 else
672 current += 1;
673 break;
674
675 case 'D':
676 if (StringAt(original, current, 2, "DG", ""))
677 {
678 if (StringAt(original, (current + 2), 1,
679 "I", "E", "Y", ""))
680 {
681
684 current += 3;
685 break;
686 }
687 else
688 {
689
692 current += 2;
693 break;
694 }
695 }
696
697 if (StringAt(original, current, 2, "DT", "DD", ""))
698 {
701 current += 2;
702 break;
703 }
704
705
708 current += 1;
709 break;
710
711 case 'F':
712 if (GetAt(original, current + 1) == 'F')
713 current += 2;
714 else
715 current += 1;
718 break;
719
720 case 'G':
721 if (GetAt(original, current + 1) == 'H')
722 {
723 if ((current > 0) && (original, current - 1))
724 {
727 current += 2;
728 break;
729 }
730
731 if (current < 3)
732 {
733
734 if (current == 0)
735 {
736 if (GetAt(original, current + 2) == 'I')
737 {
740 }
741 else
742 {
745 }
746 current += 2;
747 break;
748 }
749 }
750
751
752
753
754
755 if (((current > 1)
756 && StringAt(original, (current - 2), 1,
757 "B", "H", "D", ""))
758
759 || ((current > 2)
760 && StringAt(original, (current - 3), 1,
761 "B", "H", "D", ""))
762
763 || ((current > 3)
764 && StringAt(original, (current - 4), 1,
765 "B", "H", "")))
766 {
767 current += 2;
768 break;
769 }
770 else
771 {
772
773
774
775
776 if ((current > 2)
777 && (GetAt(original, current - 1) == 'U')
778 && StringAt(original, (current - 3), 1, "C",
779 "G", "L", "R", "T", ""))
780 {
783 }
784 else if ((current > 0)
785 && GetAt(original, current - 1) != 'I')
786 {
787
788
791 }
792
793 current += 2;
794 break;
795 }
796 }
797
798 if (GetAt(original, current + 1) == 'N')
799 {
800 if ((current == 1) && IsVowel(original, 0)
802 {
805 }
806 else
807
808 if ((original, (current + 2), 2, "EY", "")
809 && (GetAt(original, current + 1) != 'Y')
811 {
814 }
815 else
816 {
819 }
820 current += 2;
821 break;
822 }
823
824
825 if (StringAt(original, (current + 1), 2, "LI", "")
827 {
830 current += 2;
831 break;
832 }
833
834
835 if ((current == 0)
836 && ((GetAt(original, current + 1) == 'Y')
837 || StringAt(original, (current + 1), 2, "ES", "EP",
838 "EB", "EL", "EY", "IB", "IL", "IN", "IE",
839 "EI", "ER", "")))
840 {
843 current += 2;
844 break;
845 }
846
847
848 if ((StringAt(original, (current + 1), 2, "ER", "")
849 || (GetAt(original, current + 1) == 'Y'))
851 "DANGER", "RANGER", "MANGER", "")
852 && (original, (current - 1), 1, "E", "I", "")
853 && (original, (current - 1), 3, "RGY", "OGY", ""))
854 {
857 current += 2;
858 break;
859 }
860
861
862 if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
863 || StringAt(original, (current - 1), 4,
864 "AGGI", "OGGI", ""))
865 {
866
867 if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
868 || StringAt(original, 0, 3, "SCH", ""))
869 || StringAt(original, (current + 1), 2, "ET", ""))
870 {
873 }
874 else
875 {
876
878 (original, (current + 1), 4, "IER ", ""))
879 {
882 }
883 else
884 {
887 }
888 }
889 current += 2;
890 break;
891 }
892
893 if (GetAt(original, current + 1) == 'G')
894 current += 2;
895 else
896 current += 1;
899 break;
900
901 case 'H':
902
903 if (((current == 0) || IsVowel(original, current - 1))
904 && IsVowel(original, current + 1))
905 {
908 current += 2;
909 }
910 else
911
912 current += 1;
913 break;
914
915 case 'J':
916
917 if (StringAt(original, current, 4, "JOSE", "")
918 || StringAt(original, 0, 4, "SAN ", ""))
919 {
920 if (((current == 0)
921 && (GetAt(original, current + 4) == ' '))
922 || StringAt(original, 0, 4, "SAN ", ""))
923 {
926 }
927 else
928 {
931 }
932 current += 1;
933 break;
934 }
935
936 if ((current == 0)
937 && (original, current, 4, "JOSE", ""))
938 {
939 MetaphAdd(primary, "J");
941 }
942 else
943 {
944
945 if (IsVowel(original, current - 1)
947 && ((GetAt(original, current + 1) == 'A')
948 || (GetAt(original, current + 1) == 'O')))
949 {
952 }
953 else
954 {
955 if (current == last)
956 {
959 }
960 else
961 {
962 if ((original, (current + 1), 1, "L", "T",
963 "K", "S", "N", "M", "B", "Z", "")
964 && (original, (current - 1), 1,
965 "S", "K", "L", ""))
966 {
969 }
970 }
971 }
972 }
973
974 if (GetAt(original, current + 1) == 'J')
975 current += 2;
976 else
977 current += 1;
978 break;
979
980 case 'K':
981 if (GetAt(original, current + 1) == 'K')
982 current += 2;
983 else
984 current += 1;
987 break;
988
989 case 'L':
990 if (GetAt(original, current + 1) == 'L')
991 {
992
993 if (((current == (length - 3))
994 && StringAt(original, (current - 1), 4, "ILLO",
995 "ILLA", "ALLE", ""))
996 || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
997 || StringAt(original, last, 1, "A", "O", ""))
998 && StringAt(original, (current - 1), 4,
999 "ALLE", "")))
1000 {
1003 current += 2;
1004 break;
1005 }
1006 current += 2;
1007 }
1008 else
1009 current += 1;
1012 break;
1013
1014 case 'M':
1015 if ((StringAt(original, (current - 1), 3, "UMB", "")
1016 && (((current + 1) == last)
1017 || StringAt(original, (current + 2), 2, "ER", "")))
1018
1019 || (GetAt(original, current + 1) == 'M'))
1020 current += 2;
1021 else
1022 current += 1;
1025 break;
1026
1027 case 'N':
1028 if (GetAt(original, current + 1) == 'N')
1029 current += 2;
1030 else
1031 current += 1;
1034 break;
1035
1036 case '\xd1':
1037 current += 1;
1040 break;
1041
1042 case 'P':
1043 if (GetAt(original, current + 1) == 'H')
1044 {
1047 current += 2;
1048 break;
1049 }
1050
1051
1052 if (StringAt(original, (current + 1), 1, "P", "B", ""))
1053 current += 2;
1054 else
1055 current += 1;
1058 break;
1059
1060 case 'Q':
1061 if (GetAt(original, current + 1) == 'Q')
1062 current += 2;
1063 else
1064 current += 1;
1067 break;
1068
1069 case 'R':
1070
1071 if ((current == last)
1073 && StringAt(original, (current - 2), 2, "IE", "")
1074 && (original, (current - 4), 2, "ME", "MA", ""))
1075 {
1078 }
1079 else
1080 {
1083 }
1084
1085 if (GetAt(original, current + 1) == 'R')
1086 current += 2;
1087 else
1088 current += 1;
1089 break;
1090
1091 case 'S':
1092
1093 if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
1094 {
1095 current += 1;
1096 break;
1097 }
1098
1099
1100 if ((current == 0)
1101 && StringAt(original, current, 5, "SUGAR", ""))
1102 {
1105 current += 1;
1106 break;
1107 }
1108
1109 if (StringAt(original, current, 2, "SH", ""))
1110 {
1111
1113 (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
1114 "HOLZ", ""))
1115 {
1118 }
1119 else
1120 {
1123 }
1124 current += 2;
1125 break;
1126 }
1127
1128
1129 if (StringAt(original, current, 3, "SIO", "SIA", "")
1130 || StringAt(original, current, 4, "SIAN", ""))
1131 {
1133 {
1136 }
1137 else
1138 {
1141 }
1142 current += 3;
1143 break;
1144 }
1145
1146
1147
1148
1149
1150
1151 if (((current == 0)
1152 && StringAt(original, (current + 1), 1,
1153 "M", "N", "L", "W", ""))
1154 || StringAt(original, (current + 1), 1, "Z", ""))
1155 {
1158 if (StringAt(original, (current + 1), 1, "Z", ""))
1159 current += 2;
1160 else
1161 current += 1;
1162 break;
1163 }
1164
1165 if (StringAt(original, current, 2, "SC", ""))
1166 {
1167
1168 if (GetAt(original, current + 2) == 'H')
1169 {
1170
1171 if (StringAt(original, (current + 3), 2,
1172 "OO", "ER", "EN",
1173 "UY", "ED", "EM", ""))
1174 {
1175
1176 if (StringAt(original, (current + 3), 2,
1177 "ER", "EN", ""))
1178 {
1181 }
1182 else
1183 {
1186 }
1187 current += 3;
1188 break;
1189 }
1190 else
1191 {
1192 if ((current == 0) && (original, 3)
1193 && (GetAt(original, 3) != 'W'))
1194 {
1197 }
1198 else
1199 {
1202 }
1203 current += 3;
1204 break;
1205 }
1206 }
1207
1208 if (StringAt(original, (current + 2), 1,
1209 "I", "E", "Y", ""))
1210 {
1213 current += 3;
1214 break;
1215 }
1216
1219 current += 3;
1220 break;
1221 }
1222
1223
1224 if ((current == last)
1225 && StringAt(original, (current - 2), 2, "AI", "OI", ""))
1226 {
1229 }
1230 else
1231 {
1234 }
1235
1236 if (StringAt(original, (current + 1), 1, "S", "Z", ""))
1237 current += 2;
1238 else
1239 current += 1;
1240 break;
1241
1242 case 'T':
1243 if (StringAt(original, current, 4, "TION", ""))
1244 {
1247 current += 3;
1248 break;
1249 }
1250
1251 if (StringAt(original, current, 3, "TIA", "TCH", ""))
1252 {
1255 current += 3;
1256 break;
1257 }
1258
1259 if (StringAt(original, current, 2, "TH", "")
1260 || StringAt(original, current, 3, "TTH", ""))
1261 {
1262
1263 if (StringAt(original, (current + 2), 2, "OM", "AM", "")
1264 || StringAt(original, 0, 4, "VAN ", "VON ", "")
1265 || StringAt(original, 0, 3, "SCH", ""))
1266 {
1269 }
1270 else
1271 {
1274 }
1275 current += 2;
1276 break;
1277 }
1278
1279 if (StringAt(original, (current + 1), 1, "T", "D", ""))
1280 current += 2;
1281 else
1282 current += 1;
1285 break;
1286
1287 case 'V':
1288 if (GetAt(original, current + 1) == 'V')
1289 current += 2;
1290 else
1291 current += 1;
1294 break;
1295
1296 case 'W':
1297
1298 if (StringAt(original, current, 2, "WR", ""))
1299 {
1302 current += 2;
1303 break;
1304 }
1305
1306 if ((current == 0)
1307 && (IsVowel(original, current + 1)
1308 || StringAt(original, current, 2, "WH", "")))
1309 {
1310
1311 if (IsVowel(original, current + 1))
1312 {
1315 }
1316 else
1317 {
1318
1321 }
1322 }
1323
1324
1325 if (((current == last) && IsVowel(original, current - 1))
1326 || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
1327 "OWSKI", "OWSKY", "")
1328 || StringAt(original, 0, 3, "SCH", ""))
1329 {
1332 current += 1;
1333 break;
1334 }
1335
1336
1337 if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
1338 {
1341 current += 4;
1342 break;
1343 }
1344
1345
1346 current += 1;
1347 break;
1348
1349 case 'X':
1350
1351 if (!((current == last)
1352 && (StringAt(original, (current - 3), 3,
1353 "IAU", "EAU", "")
1354 || StringAt(original, (current - 2), 2,
1355 "AU", "OU", ""))))
1356 {
1359 }
1360
1361
1362 if (StringAt(original, (current + 1), 1, "C", "X", ""))
1363 current += 2;
1364 else
1365 current += 1;
1366 break;
1367
1368 case 'Z':
1369
1370 if (GetAt(original, current + 1) == 'H')
1371 {
1374 current += 2;
1375 break;
1376 }
1377 else if (StringAt(original, (current + 1), 2,
1378 "ZO", "ZI", "ZA", "")
1380 && ((current > 0)
1381 && GetAt(original, current - 1) != 'T')))
1382 {
1385 }
1386 else
1387 {
1390 }
1391
1392 if (GetAt(original, current + 1) == 'Z')
1393 current += 2;
1394 else
1395 current += 1;
1396 break;
1397
1398 default:
1399 current += 1;
1400 }
1401
1402
1403
1404
1405
1406 }
1407
1408
1409 if (primary->length > 4)
1410 SetAt(primary, 4, '\0');
1411
1412 if (secondary->length > 4)
1413 SetAt(secondary, 4, '\0');
1414
1415 *codes = primary->str;
1416 *++codes = secondary->str;
1417
1421}
1422
1423#ifdef DMETAPHONE_MAIN
1424
1425
1426
1427main(int argc, char **argv)
1428{
1429 char *codes[2];
1430
1431 if (argc > 1)
1432 {
1434 printf("%s|%s\n", codes[0], codes[1]);
1435 }
1436}
1437
1438#endif
#define PG_GETARG_TEXT_PP(n)
#define PG_RETURN_TEXT_P(x)
int main(int argc, char **argv)
text * cstring_to_text(const char *s)
char * text_to_cstring(const text *t)