#include "utils/builtins.h" #include "utils/varlena.h" #include "varatt.h"">

PostgreSQL Source Code: contrib/fuzzystrmatch/fuzzystrmatch.c File Reference (original) (raw)

#include "[postgres.h](postgres%5F8h%5Fsource.html)"
#include <ctype.h>
#include "[utils/builtins.h](builtins%5F8h%5Fsource.html)"
#include "[utils/varlena.h](varlena%5F8h%5Fsource.html)"
#include "[varatt.h](varatt%5F8h%5Fsource.html)"

Go to the source code of this file.

Macros
#define SOUNDEX_LEN 4
#define MAX_METAPHONE_STRLEN 255
#define SH 'X'
#define TH '0'
#define isvowel(c) (getcode(c) & 1) /* AEIOU */
#define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */
#define AFFECTH(c) (getcode(c) & 4) /* CGPST */
#define MAKESOFT(c) (getcode(c) & 8) /* EIY */
#define NOGHTOF(c) (getcode(c) & 16) /* BDH */
#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
#define Curr_Letter (toupper((unsigned char) word[w_idx]))
#define Look_Back_Letter(n) (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
#define Prev_Letter (Look_Back_Letter(1))
#define After_Next_Letter (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
#define Phonize(c) do {(*phoned_word)[p_idx++] = c;} while (0)
#define End_Phoned_Word do {(*phoned_word)[p_idx] = '\0';} while (0)
#define Phone_Len (p_idx)
#define Isbreak(c) (!isalpha((unsigned char) (c)))
Functions
PG_MODULE_MAGIC_EXT (.name="fuzzystrmatch",.version=PG_VERSION)
static void _soundex (const char *instr, char *outstr)
static char soundex_code (char letter)
static char Lookahead (char *word, int how_far)
static void _metaphone (char *word, int max_phonemes, char **phoned_word)
static int getcode (char c)
PG_FUNCTION_INFO_V1 (levenshtein_with_costs)
Datum levenshtein_with_costs (PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1 (levenshtein)
Datum levenshtein (PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1 (levenshtein_less_equal_with_costs)
Datum levenshtein_less_equal_with_costs (PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1 (levenshtein_less_equal)
Datum levenshtein_less_equal (PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1 (metaphone)
Datum metaphone (PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1 (soundex)
Datum soundex (PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1 (difference)
Datum difference (PG_FUNCTION_ARGS)
Variables
static const char *const soundex_table = "01230120022455012623010202"
static const char _codes [26]

AFFECTH

| #define AFFECTH | ( | | c | ) | (getcode(c) & 4) /* CGPST */ | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------- |

After_Next_Letter

#define After_Next_Letter (Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')

Curr_Letter

#define Curr_Letter (toupper((unsigned char) word[w_idx]))

End_Phoned_Word

#define End_Phoned_Word do {(*phoned_word)[p_idx] = '\0';} while (0)

Isbreak

| #define Isbreak | ( | | c | ) | (!isalpha((unsigned char) (c))) | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------- |

isvowel

| #define isvowel | ( | | c | ) | (getcode(c) & 1) /* AEIOU */ | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------- |

Look_Ahead_Letter

| #define Look_Ahead_Letter | ( | | n | ) | toupper((unsigned char) Lookahead(word+w_idx, n)) | | --------------------------- | - | | - | - | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

Look_Back_Letter

| #define Look_Back_Letter | ( | | n | ) | (w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0') | | -------------------------- | - | | - | - | ---------------------------------------------------------------------------------------------------------------------------- |

MAKESOFT

| #define MAKESOFT | ( | | c | ) | (getcode(c) & 8) /* EIY */ | | ---------------- | - | | --------------------------------------------------------------- | - | ------------------------------------------------------------------------------------------------------------------------------------------------------- |

MAX_METAPHONE_STRLEN

#define MAX_METAPHONE_STRLEN 255

Next_Letter

#define Next_Letter (toupper((unsigned char) word[w_idx+1]))

NOCHANGE

| #define NOCHANGE | ( | | c | ) | (getcode(c) & 2) /* FJMNR */ | | ---------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------- |

NOGHTOF

| #define NOGHTOF | ( | | c | ) | (getcode(c) & 16) /* BDH */ | | --------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------- |

Phone_Len

#define Phone_Len (p_idx)

Phonize

| #define Phonize | ( | | c | ) | do {(*phoned_word)[p_idx++] = c;} while (0) | | --------------- | - | | --------------------------------------------------------------- | - | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

Prev_Letter

SH

SOUNDEX_LEN

TH

_metaphone()

static void _metaphone ( char * word, int max_phonemes, char ** phoned_word ) static

Definition at line 347 of file fuzzystrmatch.c.

350{

351 int w_idx = 0;

352 int p_idx = 0;

353

354

355

356

357

358

359

360

361 if (!(max_phonemes > 0))

362

363 elog(ERROR, "metaphone: Requested output length must be > 0");

364

365

366 if ((word == NULL) || !(strlen(word) > 0))

367

368 elog(ERROR, "metaphone: Input string length must be > 0");

369

370

371 if (max_phonemes == 0)

372 {

373 *phoned_word = palloc(sizeof(char) * strlen(word) + 1);

374 }

375 else

376 {

377 *phoned_word = palloc(sizeof(char) * max_phonemes + 1);

378 }

379

380

381

382 for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)

383 {

384

386 {

388 return;

389 }

390 }

391

393 {

394

395 case 'A':

397 {

399 w_idx += 2;

400 }

401

402 else

403 {

405 w_idx++;

406 }

407 break;

408

409 case 'G':

410 case 'K':

411 case 'P':

413 {

415 w_idx += 2;

416 }

417 break;

418

419

420

421

422 case 'W':

425 {

427 w_idx += 2;

428 }

430 {

432 w_idx += 2;

433 }

434

435 break;

436

437 case 'X':

439 w_idx++;

440 break;

441

442

443

444

445

446 case 'E':

447 case 'I':

448 case 'O':

449 case 'U':

451 w_idx++;

452 break;

453 default:

454

455 break;

456 }

457

458

459

460

462 (max_phonemes == 0 || Phone_Len < max_phonemes);

463 w_idx++)

464 {

465

466

467

468

469 unsigned short int skip_letter = 0;

470

471

472

473

474

475

476

477

478

479

480

481 if (!isalpha((unsigned char) (Curr_Letter)))

482 continue;

483

484

487 continue;

488

490 {

491

492 case 'B':

495 break;

496

497

498

499

500

501

502 case 'C':

504 {

507 {

509 }

510

512 {

513

514 }

515 else

517 }

519 {

520#ifndef USE_TRADITIONAL_METAPHONE

523 {

525 }

526 else

528#else

530#endif

531 skip_letter++;

532 }

533 else

535 break;

536

537

538

539

540 case 'D':

543 {

545 skip_letter++;

546 }

547 else

549 break;

550

551

552

553

554

555

556

557 case 'G':

559 {

562 {

564 skip_letter++;

565 }

566 else

567 {

568

569 }

570 }

572 {

576 {

577

578 }

579 else

581 }

585 else

587 break;

588

589 case 'H':

593 break;

594

595

596

597

598 case 'K':

601 break;

602

603

604

605

606 case 'P':

609 else

611 break;

612

613

614

615

616 case 'Q':

618 break;

619

620

621

622

623 case 'S':

629 {

631 skip_letter++;

632 }

633#ifndef USE_TRADITIONAL_METAPHONE

637 {

639 skip_letter += 2;

640 }

641#endif

642 else

644 break;

645

646

647

648

649 case 'T':

655 {

657 skip_letter++;

658 }

659 else

661 break;

662

663 case 'V':

665 break;

666

667 case 'W':

670 break;

671

672 case 'X':

674 if (max_phonemes == 0 || Phone_Len < max_phonemes)

676 break;

677

678 case 'Y':

681 break;

682

683 case 'Z':

685 break;

686

687 case 'F':

688 case 'J':

689 case 'L':

690 case 'M':

691 case 'N':

692 case 'R':

694 break;

695 default:

696

697 break;

698 }

699

700 w_idx += skip_letter;

701 }

702

704}

#define After_Next_Letter

#define Look_Back_Letter(n)

#define Look_Ahead_Letter(n)

static void word(struct vars *v, int dir, struct state *lp, struct state *rp)

References AFFECTH, After_Next_Letter, Curr_Letter, elog, End_Phoned_Word, ERROR, Isbreak, isvowel, Look_Ahead_Letter, Look_Back_Letter, MAKESOFT, Next_Letter, NOGHTOF, palloc(), Phone_Len, Phonize, Prev_Letter, SH, TH, and word().

Referenced by metaphone().

_soundex()

static void _soundex ( const char * instr, char * outstr ) static

Definition at line 726 of file fuzzystrmatch.c.

727{

728 int count;

729

732

733

734 while (*instr && !isalpha((unsigned char) *instr))

735 ++instr;

736

737

738 if (!*instr)

739 {

741 return;

742 }

743

744

745 *outstr++ = (char) toupper((unsigned char) *instr++);

746

747 count = 1;

749 {

750 if (isalpha((unsigned char) *instr) &&

752 {

754 if (*outstr != '0')

755 {

756 ++outstr;

757 ++count;

758 }

759 }

760 ++instr;

761 }

762

763

765 {

766 *outstr = '0';

767 ++outstr;

768 ++count;

769 }

770

771

772 *outstr = '\0';

773}

static char soundex_code(char letter)

Assert(PointerIsAligned(start, uint64))

References Assert(), soundex_code(), and SOUNDEX_LEN.

Referenced by difference(), and soundex().

difference()

Definition at line 778 of file fuzzystrmatch.c.

779{

782 int i,

783 result;

784

787

788 result = 0;

790 {

791 if (sndx1[i] == sndx2[i])

792 result++;

793 }

794

796}

#define PG_GETARG_TEXT_PP(n)

#define PG_RETURN_INT32(x)

static void _soundex(const char *instr, char *outstr)

char * text_to_cstring(const text *t)

References _soundex(), i, PG_GETARG_TEXT_PP, PG_RETURN_INT32, SOUNDEX_LEN, and text_to_cstring().

Referenced by checkcondition_str(), ExtendMultiXactMember(), find_wordentry(), getKeyJsonValueFromContainer(), hstore_concat(), hstore_delete_array(), hstore_delete_hstore(), hstore_subscript_assign(), and hstoreFindKey().

getcode()

static int getcode ( char c) static

Definition at line 123 of file fuzzystrmatch.c.

124{

125 if (isalpha((unsigned char) c))

126 {

127 c = toupper((unsigned char) c);

128

129 if (c >= 'A' && c <= 'Z')

131 }

132 return 0;

133}

static const char _codes[26]

References _codes.

levenshtein()

Definition at line 177 of file fuzzystrmatch.c.

178{

181 const char *s_data;

182 const char *t_data;

183 int s_bytes,

184 t_bytes;

185

186

189

192

194 1, 1, 1, false));

195}

int varstr_levenshtein(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)

#define VARSIZE_ANY_EXHDR(PTR)

References PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein().

levenshtein_less_equal()

Definition at line 229 of file fuzzystrmatch.c.

230{

234 const char *s_data;

235 const char *t_data;

236 int s_bytes,

237 t_bytes;

238

239

242

245

247 t_data, t_bytes,

248 1, 1, 1,

249 max_d, false));

250}

#define PG_GETARG_INT32(n)

int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)

References PG_GETARG_INT32, PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein_less_equal().

levenshtein_less_equal_with_costs()

Definition at line 200 of file fuzzystrmatch.c.

201{

208 const char *s_data;

209 const char *t_data;

210 int s_bytes,

211 t_bytes;

212

213

216

219

221 t_data, t_bytes,

222 ins_c, del_c, sub_c,

223 max_d, false));

224}

References PG_GETARG_INT32, PG_GETARG_TEXT_PP, PG_RETURN_INT32, VARDATA_ANY, VARSIZE_ANY_EXHDR, and varstr_levenshtein_less_equal().

levenshtein_with_costs()

Lookahead()

static char Lookahead ( char * word, int how_far ) static

Definition at line 321 of file fuzzystrmatch.c.

322{

323 char letter_ahead = '\0';

325

327

328

329 letter_ahead = word[idx];

330

331 return letter_ahead;

332}

Datum idx(PG_FUNCTION_ARGS)

References idx(), and word().

metaphone()

Definition at line 260 of file fuzzystrmatch.c.

261{

263 size_t str_i_len = strlen(str_i);

264 int reqlen;

265 char *metaph;

266

267

268 if (!(str_i_len > 0))

270

273 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

274 errmsg("argument exceeds the maximum length of %d bytes",

276

280 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

281 errmsg("output exceeds the maximum length of %d bytes",

283

284 if (!(reqlen > 0))

286 (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),

287 errmsg("output cannot be empty string")));

288

291}

#define TextDatumGetCString(d)

int errcode(int sqlerrcode)

int errmsg(const char *fmt,...)

#define ereport(elevel,...)

#define PG_GETARG_DATUM(n)

#define PG_RETURN_TEXT_P(x)

static void _metaphone(char *word, int max_phonemes, char **phoned_word)

#define MAX_METAPHONE_STRLEN

text * cstring_to_text(const char *s)

References _metaphone(), cstring_to_text(), ereport, errcode(), errmsg(), ERROR, MAX_METAPHONE_STRLEN, PG_GETARG_DATUM, PG_GETARG_INT32, PG_RETURN_TEXT_P, and TextDatumGetCString.

PG_FUNCTION_INFO_V1() [1/7]

PG_FUNCTION_INFO_V1() [2/7]

PG_FUNCTION_INFO_V1() [3/7]

PG_FUNCTION_INFO_V1() [4/7]

PG_FUNCTION_INFO_V1() [5/7]

PG_FUNCTION_INFO_V1() [6/7]

PG_FUNCTION_INFO_V1() [7/7]

PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( . name = "fuzzystrmatch",
. version = PG_VERSION
)

soundex()

Definition at line 713 of file fuzzystrmatch.c.

714{

716 char *arg;

717

719

721

723}

References _soundex(), arg, cstring_to_text(), PG_GETARG_TEXT_PP, PG_RETURN_TEXT_P, SOUNDEX_LEN, and text_to_cstring().

Referenced by daitch_mokotoff(), daitch_mokotoff_coding(), find_or_create_child_node(), update_leaves(), and update_node().

soundex_code()

static char soundex_code ( char letter) static

Definition at line 63 of file fuzzystrmatch.c.

64{

65 letter = toupper((unsigned char) letter);

66

67 if (letter >= 'A' && letter <= 'Z')

69 return letter;

70}

static const char *const soundex_table

References soundex_table.

Referenced by _soundex().

_codes

Initial value:

= {

1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0

}

Definition at line 117 of file fuzzystrmatch.c.

Referenced by getcode().

soundex_table

const char* const soundex_table = "01230120022455012623010202" static