PostgreSQL Source Code: src/backend/utils/mb/conv.c Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32int

34 unsigned char *p,

36 int src_encoding,

37 int dest_encoding,

38 const unsigned char *tab,

39 bool noError)

40{

41 const unsigned char *start = l;

42 unsigned char c1,

43 c2;

44

45 while (len > 0)

46 {

47 c1 = *l;

48 if (c1 == 0)

49 {

50 if (noError)

51 break;

53 }

55 *p++ = c1;

56 else

57 {

59 if (c2)

60 *p++ = c2;

61 else

62 {

63 if (noError)

64 break;

66 (const char *) l, len);

67 }

68 }

69 l++;

71 }

72 *p = '\0';

73

75}

76

77

78

79

80

81

82

83

84

85

86

87

88int

89latin2mic(const unsigned char *l, unsigned char *p, int len,

90 int lc, int encoding, bool noError)

91{

92 const unsigned char *start = l;

93 int c1;

94

95 while (len > 0)

96 {

97 c1 = *l;

98 if (c1 == 0)

99 {

100 if (noError)

101 break;

103 }

105 *p++ = lc;

106 *p++ = c1;

107 l++;

109 }

110 *p = '\0';

111

112 return l - start;

113}

114

115

116

117

118

119

120

121

122

123

124

125

126int

127mic2latin(const unsigned char *mic, unsigned char *p, int len,

128 int lc, int encoding, bool noError)

129{

130 const unsigned char *start = mic;

131 int c1;

132

133 while (len > 0)

134 {

135 c1 = *mic;

136 if (c1 == 0)

137 {

138 if (noError)

139 break;

141 }

143 {

144

145 *p++ = c1;

146 mic++;

148 }

149 else

150 {

152

153 if (len < l)

154 {

155 if (noError)

156 break;

159 }

161 {

162 if (noError)

163 break;

165 (const char *) mic, len);

166 }

167 *p++ = mic[1];

168 mic += 2;

169 len -= 2;

170 }

171 }

172 *p = '\0';

173

174 return mic - start;

175}

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193int

195 unsigned char *p,

197 int lc,

199 const unsigned char *tab,

200 bool noError)

201{

202 const unsigned char *start = l;

203 unsigned char c1,

204 c2;

205

206 while (len > 0)

207 {

208 c1 = *l;

209 if (c1 == 0)

210 {

211 if (noError)

212 break;

214 }

216 *p++ = c1;

217 else

218 {

220 if (c2)

221 {

222 *p++ = lc;

223 *p++ = c2;

224 }

225 else

226 {

227 if (noError)

228 break;

230 (const char *) l, len);

231 }

232 }

233 l++;

235 }

236 *p = '\0';

237

238 return l - start;

239}

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256int

258 unsigned char *p,

260 int lc,

262 const unsigned char *tab,

263 bool noError)

264{

265 const unsigned char *start = mic;

266 unsigned char c1,

267 c2;

268

269 while (len > 0)

270 {

271 c1 = *mic;

272 if (c1 == 0)

273 {

274 if (noError)

275 break;

277 }

279 {

280

281 *p++ = c1;

282 mic++;

284 }

285 else

286 {

288

289 if (len < l)

290 {

291 if (noError)

292 break;

295 }

297 (c2 = tab[mic[1] - HIGHBIT]) == 0)

298 {

299 if (noError)

300 break;

302 (const char *) mic, len);

303 break;

304 }

305 *p++ = c2;

306 mic += 2;

307 len -= 2;

308 }

309 }

310 *p = '\0';

311

312 return mic - start;

313}

314

315

316

317

318

319static int

321{

324 d1,

325 d2;

326

328 s2 = *((const uint32 *) p1 + 1);

331 return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);

332}

333

334

335

336

337

338static int

340{

342 v2;

343

344 v1 = *(const uint32 *) p1;

346 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);

347}

348

349

350

351

352static inline unsigned char *

354{

355 if (code & 0xff000000)

356 *dest++ = code >> 24;

357 if (code & 0x00ff0000)

358 *dest++ = code >> 16;

359 if (code & 0x0000ff00)

360 *dest++ = code >> 8;

361 if (code & 0x000000ff)

362 *dest++ = code;

364}

365

366

367

368

369

370

371

374 int l,

375 unsigned char b1,

376 unsigned char b2,

377 unsigned char b3,

378 unsigned char b4)

379{

380 if (l == 4)

381 {

382

383

384

385 if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||

386 b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||

387 b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||

388 b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)

389 return 0;

390

391

393 {

395

400 }

401 else

402 {

404

409 }

410 }

411 else if (l == 3)

412 {

413

414

415

416 if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||

417 b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||

418 b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)

419 return 0;

420

421

423 {

425

429 }

430 else

431 {

433

437 }

438 }

439 else if (l == 2)

440 {

441

442

443

444 if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||

445 b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)

446 return 0;

447

448

450 {

452

455 }

456 else

457 {

459

462 }

463 }

464 else if (l == 1)

465 {

466

467

468

469 if (b4 < rt->b1_lower || b4 > rt->b1_upper)

470 return 0;

471

472

475 else

477 }

478 return 0;

479}

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506int

508 unsigned char *iso,

513{

515 int l;

517 const unsigned char *start = utf;

518

521 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

523

524 for (; len > 0; len -= l)

525 {

526 unsigned char b1 = 0;

527 unsigned char b2 = 0;

528 unsigned char b3 = 0;

529 unsigned char b4 = 0;

530

531

532 if (*utf == '\0')

533 break;

534

536 if (len < l)

537 break;

538

540 break;

541

542 if (l == 1)

543 {

544

545 *iso++ = *utf++;

546 continue;

547 }

548

549

550 if (l == 2)

551 {

552 b3 = *utf++;

553 b4 = *utf++;

554 }

555 else if (l == 3)

556 {

557 b2 = *utf++;

558 b3 = *utf++;

559 b4 = *utf++;

560 }

561 else if (l == 4)

562 {

563 b1 = *utf++;

564 b2 = *utf++;

565 b3 = *utf++;

566 b4 = *utf++;

567 }

568 else

569 {

570 elog(ERROR, "unsupported character length %d", l);

571 iutf = 0;

572 }

573 iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);

574

575

576 if (cmap && len > l)

577 {

578 const unsigned char *utf_save = utf;

579 int len_save = len;

580 int l_save = l;

581

582

583 len -= l;

584

586 if (len < l)

587 {

588

589 utf -= l_save;

590 break;

591 }

592

594 {

595 if (!noError)

597 utf -= l_save;

598 break;

599 }

600

601

602 if (l > 1)

603 {

606

607 if (l == 2)

608 {

609 iutf2 = *utf++ << 8;

610 iutf2 |= *utf++;

611 }

612 else if (l == 3)

613 {

614 iutf2 = *utf++ << 16;

615 iutf2 |= *utf++ << 8;

616 iutf2 |= *utf++;

617 }

618 else if (l == 4)

619 {

620 iutf2 = *utf++ << 24;

621 iutf2 |= *utf++ << 16;

622 iutf2 |= *utf++ << 8;

623 iutf2 |= *utf++;

624 }

625 else

626 {

627 elog(ERROR, "unsupported character length %d", l);

628 iutf2 = 0;

629 }

630

631 cutf[0] = iutf;

632 cutf[1] = iutf2;

633

634 cp = bsearch(cutf, cmap, cmapsize,

636

637 if (cp)

638 {

640 continue;

641 }

642 }

643

644

645 utf = utf_save;

646 len = len_save;

647 l = l_save;

648 }

649

650

651 if (map)

652 {

654

655 if (converted)

656 {

658 continue;

659 }

660 }

661

662

663 if (conv_func)

664 {

665 uint32 converted = (*conv_func) (iutf);

666

667 if (converted)

668 {

670 continue;

671 }

672 }

673

674

675 utf -= l;

676 if (noError)

677 break;

679 (const char *) utf, len);

680 }

681

682

683 if (len > 0 && !noError)

685

686 *iso = '\0';

687

688 return utf - start;

689}

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716int

718 unsigned char *utf,

723 bool noError)

724{

726 int l;

728 const unsigned char *start = iso;

729

732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

734

735 for (; len > 0; len -= l)

736 {

737 unsigned char b1 = 0;

738 unsigned char b2 = 0;

739 unsigned char b3 = 0;

740 unsigned char b4 = 0;

741

742

743 if (*iso == '\0')

744 break;

745

747 {

748

749 *utf++ = *iso++;

750 l = 1;

751 continue;

752 }

753

755 if (l < 0)

756 break;

757

758

759 if (l == 1)

760 b4 = *iso++;

761 else if (l == 2)

762 {

763 b3 = *iso++;

764 b4 = *iso++;

765 }

766 else if (l == 3)

767 {

768 b2 = *iso++;

769 b3 = *iso++;

770 b4 = *iso++;

771 }

772 else if (l == 4)

773 {

774 b1 = *iso++;

775 b2 = *iso++;

776 b3 = *iso++;

777 b4 = *iso++;

778 }

779 else

780 {

781 elog(ERROR, "unsupported character length %d", l);

782 iiso = 0;

783 }

784 iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);

785

786 if (map)

787 {

789

790 if (converted)

791 {

793 continue;

794 }

795

796

797 if (cmap)

798 {

799 cp = bsearch(&iiso, cmap, cmapsize,

801

802 if (cp)

803 {

806 continue;

807 }

808 }

809 }

810

811

812 if (conv_func)

813 {

814 uint32 converted = (*conv_func) (iiso);

815

816 if (converted)

817 {

819 continue;

820 }

821 }

822

823

824 iso -= l;

825 if (noError)

826 break;

828 (const char *) iso, len);

829 }

830

831

832 if (len > 0 && !noError)

834

835 *utf = '\0';

836

837 return iso - start;

838}

Datum idx(PG_FUNCTION_ARGS)

#define IS_HIGHBIT_SET(ch)

int mic2latin_with_table(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)

int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)

static int compare3(const void *p1, const void *p2)

int latin2mic_with_table(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)

static unsigned char * store_coded_char(unsigned char *dest, uint32 code)

int mic2latin(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)

static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)

int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)

static int compare4(const void *p1, const void *p2)

int local2local(const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)

int latin2mic(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)

int errcode(int sqlerrcode)

int errmsg(const char *fmt,...)

#define ereport(elevel,...)

void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)

void report_invalid_encoding(int encoding, const char *mbstr, int len)

uint32(* utf_local_conversion_func)(uint32 code)

#define PG_VALID_ENCODING(_enc)

bool pg_utf8_islegal(const unsigned char *source, int length)

int pg_mule_mblen(const unsigned char *s)

int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)