PostgreSQL Source Code: contrib/fuzzystrmatch/fuzzystrmatch.c Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

40

41#include <ctype.h>

42

46

48 .name = "fuzzystrmatch",

49 .version = PG_VERSION

50);

51

52

53

54

55static void _soundex(const char *instr, char *outstr);

56

57#define SOUNDEX_LEN 4

58

59

60static const char *const soundex_table = "01230120022455012623010202";

61

62static char

64{

65 letter = toupper((unsigned char) letter);

66

67 if (letter >= 'A' && letter <= 'Z')

69 return letter;

70}

71

72

73

74

75#define MAX_METAPHONE_STRLEN 255

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103#undef USE_TRADITIONAL_METAPHONE

104

105

106#define SH 'X'

107#define TH '0'

108

110static void _metaphone(char *word, int max_phonemes, char **phoned_word);

111

112

113

114

115

116

118 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0

119

120};

121

122static int

124{

125 if (isalpha((unsigned char) c))

126 {

127 c = toupper((unsigned char) c);

128

129 if (c >= 'A' && c <= 'Z')

131 }

132 return 0;

133}

134

135#define isvowel(c) (getcode(c) & 1)

136

137

138#define NOCHANGE(c) (getcode(c) & 2)

139

140

141#define AFFECTH(c) (getcode(c) & 4)

142

143

144#define MAKESOFT(c) (getcode(c) & 8)

145

146

147#define NOGHTOF(c) (getcode(c) & 16)

148

152{

158 const char *s_data;

159 const char *t_data;

160 int s_bytes,

161 t_bytes;

162

163

166

169

171 ins_c, del_c, sub_c, false));

172}

173

174

178{

181 const char *s_data;

182 const char *t_data;

183 int s_bytes,

184 t_bytes;

185

186

189

192

194 1, 1, 1, false));

195}

196

197

201{

208 const char *s_data;

209 const char *t_data;

210 int s_bytes,

211 t_bytes;

212

213

216

219

221 t_data, t_bytes,

222 ins_c, del_c, sub_c,

223 max_d, false));

224}

225

226

230{

234 const char *s_data;

235 const char *t_data;

236 int s_bytes,

237 t_bytes;

238

239

242

245

247 t_data, t_bytes,

248 1, 1, 1,

249 max_d, false));

250}

251

252

253

254

255

256

257

261{

263 size_t str_i_len = strlen(str_i);

264 int reqlen;

265 char *metaph;

266

267

268 if (!(str_i_len > 0))

270

273 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

274 errmsg("argument exceeds the maximum length of %d bytes",

276

280 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

281 errmsg("output exceeds the maximum length of %d bytes",

283

284 if (!(reqlen > 0))

286 (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),

287 errmsg("output cannot be empty string")));

288

291}

292

293

294

295

296

297

298

299

300

301

302

303

304#define Next_Letter (toupper((unsigned char) word[w_idx+1]))

305

306#define Curr_Letter (toupper((unsigned char) word[w_idx]))

307

308#define Look_Back_Letter(n) \

309 (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')

310

311#define Prev_Letter (Look_Back_Letter(1))

312

313#define After_Next_Letter \

314 (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')

315#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))

316

317

318

319

320static char

322{

323 char letter_ahead = '\0';

325

327

328

329 letter_ahead = word[idx];

330

331 return letter_ahead;

332}

333

334

335

336#define Phonize(c) do {(*phoned_word)[p_idx++] = c;} while (0)

337

338#define End_Phoned_Word do {(*phoned_word)[p_idx] = '\0';} while (0)

339

340#define Phone_Len (p_idx)

341

342

343#define Isbreak(c) (!isalpha((unsigned char) (c)))

344

345

346static void

348 int max_phonemes,

349 char **phoned_word)

350{

351 int w_idx = 0;

352 int p_idx = 0;

353

354

355

356

357

358

359

360

361 if (!(max_phonemes > 0))

362

363 elog(ERROR, "metaphone: Requested output length must be > 0");

364

365

366 if ((word == NULL) || !(strlen(word) > 0))

367

368 elog(ERROR, "metaphone: Input string length must be > 0");

369

370

371 if (max_phonemes == 0)

372 {

373 *phoned_word = palloc(sizeof(char) * strlen(word) + 1);

374 }

375 else

376 {

377 *phoned_word = palloc(sizeof(char) * max_phonemes + 1);

378 }

379

380

381

382 for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)

383 {

384

386 {

388 return;

389 }

390 }

391

393 {

394

395 case 'A':

397 {

399 w_idx += 2;

400 }

401

402 else

403 {

405 w_idx++;

406 }

407 break;

408

409 case 'G':

410 case 'K':

411 case 'P':

413 {

415 w_idx += 2;

416 }

417 break;

418

419

420

421

422 case 'W':

425 {

427 w_idx += 2;

428 }

430 {

432 w_idx += 2;

433 }

434

435 break;

436

437 case 'X':

439 w_idx++;

440 break;

441

442

443

444

445

446 case 'E':

447 case 'I':

448 case 'O':

449 case 'U':

451 w_idx++;

452 break;

453 default:

454

455 break;

456 }

457

458

459

460

462 (max_phonemes == 0 || Phone_Len < max_phonemes);

463 w_idx++)

464 {

465

466

467

468

469 unsigned short int skip_letter = 0;

470

471

472

473

474

475

476

477

478

479

480

481 if (!isalpha((unsigned char) (Curr_Letter)))

482 continue;

483

484

487 continue;

488

490 {

491

492 case 'B':

495 break;

496

497

498

499

500

501

502 case 'C':

504 {

507 {

509 }

510

512 {

513

514 }

515 else

517 }

519 {

520#ifndef USE_TRADITIONAL_METAPHONE

523 {

525 }

526 else

528#else

530#endif

531 skip_letter++;

532 }

533 else

535 break;

536

537

538

539

540 case 'D':

543 {

545 skip_letter++;

546 }

547 else

549 break;

550

551

552

553

554

555

556

557 case 'G':

559 {

562 {

564 skip_letter++;

565 }

566 else

567 {

568

569 }

570 }

572 {

576 {

577

578 }

579 else

581 }

585 else

587 break;

588

589 case 'H':

593 break;

594

595

596

597

598 case 'K':

601 break;

602

603

604

605

606 case 'P':

609 else

611 break;

612

613

614

615

616 case 'Q':

618 break;

619

620

621

622

623 case 'S':

629 {

631 skip_letter++;

632 }

633#ifndef USE_TRADITIONAL_METAPHONE

637 {

639 skip_letter += 2;

640 }

641#endif

642 else

644 break;

645

646

647

648

649 case 'T':

655 {

657 skip_letter++;

658 }

659 else

661 break;

662

663 case 'V':

665 break;

666

667 case 'W':

670 break;

671

672 case 'X':

674 if (max_phonemes == 0 || Phone_Len < max_phonemes)

676 break;

677

678 case 'Y':

681 break;

682

683 case 'Z':

685 break;

686

687 case 'F':

688 case 'J':

689 case 'L':

690 case 'M':

691 case 'N':

692 case 'R':

694 break;

695 default:

696

697 break;

698 }

699

700 w_idx += skip_letter;

701 }

702

704}

705

706

707

708

709

711

714{

716 char *arg;

717

719

721

723}

724

725static void

727{

728 int count;

729

732

733

734 while (*instr && !isalpha((unsigned char) *instr))

735 ++instr;

736

737

738 if (!*instr)

739 {

741 return;

742 }

743

744

745 *outstr++ = (char) toupper((unsigned char) *instr++);

746

747 count = 1;

749 {

750 if (isalpha((unsigned char) *instr) &&

752 {

754 if (*outstr != '0')

755 {

756 ++outstr;

757 ++count;

758 }

759 }

760 ++instr;

761 }

762

763

765 {

766 *outstr = '0';

767 ++outstr;

768 ++count;

769 }

770

771

772 *outstr = '\0';

773}

774

776

779{

782 int i,

783 result;

784

787

788 result = 0;

790 {

791 if (sndx1[i] == sndx2[i])

792 result++;

793 }

794

796}

Datum idx(PG_FUNCTION_ARGS)

#define TextDatumGetCString(d)

int errcode(int sqlerrcode)

int errmsg(const char *fmt,...)

#define ereport(elevel,...)

#define PG_GETARG_TEXT_PP(n)

#define PG_GETARG_DATUM(n)

#define PG_RETURN_TEXT_P(x)

#define PG_RETURN_INT32(x)

#define PG_GETARG_INT32(n)

Datum metaphone(PG_FUNCTION_ARGS)

static void _metaphone(char *word, int max_phonemes, char **phoned_word)

#define After_Next_Letter

Datum levenshtein_less_equal_with_costs(PG_FUNCTION_ARGS)

#define Look_Back_Letter(n)

static const char *const soundex_table

Datum soundex(PG_FUNCTION_ARGS)

static char soundex_code(char letter)

static const char _codes[26]

Datum levenshtein_with_costs(PG_FUNCTION_ARGS)

PG_MODULE_MAGIC_EXT(.name="fuzzystrmatch",.version=PG_VERSION)

static void _soundex(const char *instr, char *outstr)

static int getcode(char c)

Datum difference(PG_FUNCTION_ARGS)

#define MAX_METAPHONE_STRLEN

static char Lookahead(char *word, int how_far)

Datum levenshtein_less_equal(PG_FUNCTION_ARGS)

#define Look_Ahead_Letter(n)

PG_FUNCTION_INFO_V1(levenshtein_with_costs)

Datum levenshtein(PG_FUNCTION_ARGS)

Assert(PointerIsAligned(start, uint64))

int varstr_levenshtein(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)

static void word(struct vars *v, int dir, struct state *lp, struct state *rp)

#define VARSIZE_ANY_EXHDR(PTR)

text * cstring_to_text(const char *s)

char * text_to_cstring(const text *t)

int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)