PostgreSQL Source Code: src/backend/utils/mb/conv.c Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32int
34 unsigned char *p,
36 int src_encoding,
37 int dest_encoding,
38 const unsigned char *tab,
39 bool noError)
40{
41 const unsigned char *start = l;
42 unsigned char c1,
43 c2;
44
45 while (len > 0)
46 {
47 c1 = *l;
48 if (c1 == 0)
49 {
50 if (noError)
51 break;
53 }
55 *p++ = c1;
56 else
57 {
59 if (c2)
60 *p++ = c2;
61 else
62 {
63 if (noError)
64 break;
66 (const char *) l, len);
67 }
68 }
69 l++;
71 }
72 *p = '\0';
73
75}
76
77
78
79
80
81
82
83
84
85
86
87
88int
89latin2mic(const unsigned char *l, unsigned char *p, int len,
90 int lc, int encoding, bool noError)
91{
92 const unsigned char *start = l;
93 int c1;
94
95 while (len > 0)
96 {
97 c1 = *l;
98 if (c1 == 0)
99 {
100 if (noError)
101 break;
103 }
105 *p++ = lc;
106 *p++ = c1;
107 l++;
109 }
110 *p = '\0';
111
112 return l - start;
113}
114
115
116
117
118
119
120
121
122
123
124
125
126int
127mic2latin(const unsigned char *mic, unsigned char *p, int len,
128 int lc, int encoding, bool noError)
129{
130 const unsigned char *start = mic;
131 int c1;
132
133 while (len > 0)
134 {
135 c1 = *mic;
136 if (c1 == 0)
137 {
138 if (noError)
139 break;
141 }
143 {
144
145 *p++ = c1;
146 mic++;
148 }
149 else
150 {
152
153 if (len < l)
154 {
155 if (noError)
156 break;
159 }
161 {
162 if (noError)
163 break;
165 (const char *) mic, len);
166 }
167 *p++ = mic[1];
168 mic += 2;
169 len -= 2;
170 }
171 }
172 *p = '\0';
173
174 return mic - start;
175}
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193int
195 unsigned char *p,
197 int lc,
199 const unsigned char *tab,
200 bool noError)
201{
202 const unsigned char *start = l;
203 unsigned char c1,
204 c2;
205
206 while (len > 0)
207 {
208 c1 = *l;
209 if (c1 == 0)
210 {
211 if (noError)
212 break;
214 }
216 *p++ = c1;
217 else
218 {
220 if (c2)
221 {
222 *p++ = lc;
223 *p++ = c2;
224 }
225 else
226 {
227 if (noError)
228 break;
230 (const char *) l, len);
231 }
232 }
233 l++;
235 }
236 *p = '\0';
237
238 return l - start;
239}
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256int
258 unsigned char *p,
260 int lc,
262 const unsigned char *tab,
263 bool noError)
264{
265 const unsigned char *start = mic;
266 unsigned char c1,
267 c2;
268
269 while (len > 0)
270 {
271 c1 = *mic;
272 if (c1 == 0)
273 {
274 if (noError)
275 break;
277 }
279 {
280
281 *p++ = c1;
282 mic++;
284 }
285 else
286 {
288
289 if (len < l)
290 {
291 if (noError)
292 break;
295 }
297 (c2 = tab[mic[1] - HIGHBIT]) == 0)
298 {
299 if (noError)
300 break;
302 (const char *) mic, len);
303 break;
304 }
305 *p++ = c2;
306 mic += 2;
307 len -= 2;
308 }
309 }
310 *p = '\0';
311
312 return mic - start;
313}
314
315
316
317
318
319static int
321{
324 d1,
325 d2;
326
328 s2 = *((const uint32 *) p1 + 1);
331 return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
332}
333
334
335
336
337
338static int
340{
342 v2;
343
344 v1 = *(const uint32 *) p1;
346 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
347}
348
349
350
351
352static inline unsigned char *
354{
355 if (code & 0xff000000)
356 *dest++ = code >> 24;
357 if (code & 0x00ff0000)
358 *dest++ = code >> 16;
359 if (code & 0x0000ff00)
360 *dest++ = code >> 8;
361 if (code & 0x000000ff)
362 *dest++ = code;
364}
365
366
367
368
369
370
371
374 int l,
375 unsigned char b1,
376 unsigned char b2,
377 unsigned char b3,
378 unsigned char b4)
379{
380 if (l == 4)
381 {
382
383
384
385 if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
386 b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
387 b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
388 b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
389 return 0;
390
391
393 {
395
400 }
401 else
402 {
404
409 }
410 }
411 else if (l == 3)
412 {
413
414
415
416 if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
417 b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
418 b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
419 return 0;
420
421
423 {
425
429 }
430 else
431 {
433
437 }
438 }
439 else if (l == 2)
440 {
441
442
443
444 if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
445 b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
446 return 0;
447
448
450 {
452
455 }
456 else
457 {
459
462 }
463 }
464 else if (l == 1)
465 {
466
467
468
469 if (b4 < rt->b1_lower || b4 > rt->b1_upper)
470 return 0;
471
472
475 else
477 }
478 return 0;
479}
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506int
508 unsigned char *iso,
513{
515 int l;
517 const unsigned char *start = utf;
518
521 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
523
525 {
526 unsigned char b1 = 0;
527 unsigned char b2 = 0;
528 unsigned char b3 = 0;
529 unsigned char b4 = 0;
530
531
532 if (*utf == '\0')
533 break;
534
536 if (len < l)
537 break;
538
540 break;
541
542 if (l == 1)
543 {
544
545 *iso++ = *utf++;
546 continue;
547 }
548
549
550 if (l == 2)
551 {
552 b3 = *utf++;
553 b4 = *utf++;
554 }
555 else if (l == 3)
556 {
557 b2 = *utf++;
558 b3 = *utf++;
559 b4 = *utf++;
560 }
561 else if (l == 4)
562 {
563 b1 = *utf++;
564 b2 = *utf++;
565 b3 = *utf++;
566 b4 = *utf++;
567 }
568 else
569 {
570 elog(ERROR, "unsupported character length %d", l);
571 iutf = 0;
572 }
573 iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
574
575
576 if (cmap && len > l)
577 {
578 const unsigned char *utf_save = utf;
579 int len_save = len;
580 int l_save = l;
581
582
583 len -= l;
584
586 if (len < l)
587 {
588
589 utf -= l_save;
590 break;
591 }
592
594 {
595 if (!noError)
597 utf -= l_save;
598 break;
599 }
600
601
602 if (l > 1)
603 {
606
607 if (l == 2)
608 {
609 iutf2 = *utf++ << 8;
610 iutf2 |= *utf++;
611 }
612 else if (l == 3)
613 {
614 iutf2 = *utf++ << 16;
615 iutf2 |= *utf++ << 8;
616 iutf2 |= *utf++;
617 }
618 else if (l == 4)
619 {
620 iutf2 = *utf++ << 24;
621 iutf2 |= *utf++ << 16;
622 iutf2 |= *utf++ << 8;
623 iutf2 |= *utf++;
624 }
625 else
626 {
627 elog(ERROR, "unsupported character length %d", l);
628 iutf2 = 0;
629 }
630
631 cutf[0] = iutf;
632 cutf[1] = iutf2;
633
634 cp = bsearch(cutf, cmap, cmapsize,
636
637 if (cp)
638 {
640 continue;
641 }
642 }
643
644
645 utf = utf_save;
646 len = len_save;
647 l = l_save;
648 }
649
650
651 if (map)
652 {
654
655 if (converted)
656 {
658 continue;
659 }
660 }
661
662
663 if (conv_func)
664 {
665 uint32 converted = (*conv_func) (iutf);
666
667 if (converted)
668 {
670 continue;
671 }
672 }
673
674
675 utf -= l;
676 if (noError)
677 break;
679 (const char *) utf, len);
680 }
681
682
683 if (len > 0 && !noError)
685
686 *iso = '\0';
687
688 return utf - start;
689}
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716int
718 unsigned char *utf,
723 bool noError)
724{
726 int l;
728 const unsigned char *start = iso;
729
732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
734
736 {
737 unsigned char b1 = 0;
738 unsigned char b2 = 0;
739 unsigned char b3 = 0;
740 unsigned char b4 = 0;
741
742
743 if (*iso == '\0')
744 break;
745
747 {
748
749 *utf++ = *iso++;
750 l = 1;
751 continue;
752 }
753
755 if (l < 0)
756 break;
757
758
759 if (l == 1)
760 b4 = *iso++;
761 else if (l == 2)
762 {
763 b3 = *iso++;
764 b4 = *iso++;
765 }
766 else if (l == 3)
767 {
768 b2 = *iso++;
769 b3 = *iso++;
770 b4 = *iso++;
771 }
772 else if (l == 4)
773 {
774 b1 = *iso++;
775 b2 = *iso++;
776 b3 = *iso++;
777 b4 = *iso++;
778 }
779 else
780 {
781 elog(ERROR, "unsupported character length %d", l);
782 iiso = 0;
783 }
784 iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
785
786 if (map)
787 {
789
790 if (converted)
791 {
793 continue;
794 }
795
796
797 if (cmap)
798 {
799 cp = bsearch(&iiso, cmap, cmapsize,
801
802 if (cp)
803 {
806 continue;
807 }
808 }
809 }
810
811
812 if (conv_func)
813 {
814 uint32 converted = (*conv_func) (iiso);
815
816 if (converted)
817 {
819 continue;
820 }
821 }
822
823
824 iso -= l;
825 if (noError)
826 break;
828 (const char *) iso, len);
829 }
830
831
832 if (len > 0 && !noError)
834
835 *utf = '\0';
836
837 return iso - start;
838}
Datum idx(PG_FUNCTION_ARGS)
#define IS_HIGHBIT_SET(ch)
int mic2latin_with_table(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
static int compare3(const void *p1, const void *p2)
int latin2mic_with_table(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
int mic2latin(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
static int compare4(const void *p1, const void *p2)
int local2local(const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)
int latin2mic(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
void report_invalid_encoding(int encoding, const char *mbstr, int len)
uint32(* utf_local_conversion_func)(uint32 code)
#define PG_VALID_ENCODING(_enc)
bool pg_utf8_islegal(const unsigned char *source, int length)
int pg_mule_mblen(const unsigned char *s)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)