PostgreSQL Source Code: src/include/mb/pg_wchar.h File Reference (original) (raw)
Go to the source code of this file.
Data Structures | |
---|---|
struct | pg_enc2name |
struct | pg_wchar_tbl |
struct | pg_mb_radix_tree |
struct | pg_utf_to_local_combined |
struct | pg_local_to_utf_combined |
Macros | |
---|---|
#define | MAX_MULTIBYTE_CHAR_LEN 4 |
#define | SS2 0x8e /* single shift 2 (JIS0201) */ |
#define | SS3 0x8f /* single shift 3 (JIS0212) */ |
#define | ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) | |
#define | ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) | |
#define | LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */ |
#define | LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */ |
#define | LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */ |
#define | LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */ |
#define | LC_TIS620 0x85 /* Thai (not supported yet) */ |
#define | LC_ISO8859_7 0x86 /* Greek (not supported yet) */ |
#define | LC_ISO8859_6 0x87 /* Arabic (not supported yet) */ |
#define | LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */ |
#define | LC_JISX0201K 0x89 /* Japanese 1 byte kana */ |
#define | LC_JISX0201R 0x8a /* Japanese 1 byte Roman */ |
#define | LC_KOI8_R 0x8b /* Cyrillic KOI8-R */ |
#define | LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */ |
#define | LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */ |
#define | LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */ |
#define | IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d) |
#define | LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */ |
#define | LC_GB2312_80 0x91 /* Chinese */ |
#define | LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */ |
#define | LC_KS5601 0x93 /* Korean */ |
#define | LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */ |
#define | LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */ |
#define | LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */ |
#define | LC_JISX0213_1 |
#define | LC_BIG5_1 |
#define | LC_BIG5_2 |
#define | IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) |
#define | LCPRV1_A 0x9a |
#define | LCPRV1_B 0x9b |
#define | IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A | |
#define | IS_LCPRV1_A_RANGE(c) ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf) |
#define | IS_LCPRV1_B_RANGE(c) ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef) |
#define | LCPRV2_A 0x9c |
#define | LCPRV2_B 0x9d |
#define | IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A | |
#define | IS_LCPRV2_A_RANGE(c) ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4) |
#define | IS_LCPRV2_B_RANGE(c) ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe) |
#define | LC_SISHENG |
#define | LC_IPA |
#define | LC_VISCII_LOWER |
#define | LC_VISCII_UPPER |
#define | LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */ |
#define | LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */ |
#define | LC_ASCII_RIGHT_TO_LEFT |
#define | LC_LAO |
#define | LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */ |
#define | LC_INDIAN_1_COLUMN |
#define | LC_TIBETAN_1_COLUMN |
#define | LC_UNICODE_SUBSET_2 |
#define | LC_UNICODE_SUBSET_3 |
#define | LC_UNICODE_SUBSET |
#define | LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */ |
#define | LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */ |
#define | LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */ |
#define | LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */ |
#define | LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */ |
#define | LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */ |
#define | LC_INDIAN_2_COLUMN |
#define | LC_TIBETAN 0xfc /* Tibetan (not supported) */ |
#define | PG_ENCODING_BE_LAST PG_KOI8U |
#define | PG_VALID_BE_ENCODING(_enc) ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST) |
#define | PG_ENCODING_IS_CLIENT_ONLY(_enc) ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_) |
#define | PG_VALID_ENCODING(_enc) ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_) |
#define | PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc) |
#define | MAX_CONVERSION_GROWTH 4 |
#define | MAX_CONVERSION_INPUT_LENGTH 16 |
#define | MAX_UNICODE_EQUIVALENT_STRING 16 |
#define | CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding) |
#define | pg_char_to_encoding pg_char_to_encoding_private |
#define | pg_encoding_to_char pg_encoding_to_char_private |
#define | pg_valid_server_encoding pg_valid_server_encoding_private |
#define | pg_valid_server_encoding_id pg_valid_server_encoding_id_private |
#define | pg_utf_mblen pg_utf_mblen_private |
Typedefs | |
---|---|
typedef unsigned int | pg_wchar |
typedef enum pg_enc | pg_enc |
typedef struct pg_enc2name | pg_enc2name |
typedef int(* | mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len) |
typedef int(* | wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len) |
typedef int(* | mblen_converter) (const unsigned char *mbstr) |
typedef int(* | mbdisplaylen_converter) (const unsigned char *mbstr) |
typedef bool(* | mbcharacter_incrementer) (unsigned char *mbstr, int len) |
typedef int(* | mbchar_verifier) (const unsigned char *mbstr, int len) |
typedef int(* | mbstr_verifier) (const unsigned char *mbstr, int len) |
typedef uint32(* | utf_local_conversion_func) (uint32 code) |
Enumerations | |
---|---|
enum | pg_enc { PG_SQL_ASCII = 0 , PG_EUC_JP, PG_EUC_CN, PG_EUC_KR, PG_EUC_TW, PG_EUC_JIS_2004, PG_UTF8, PG_MULE_INTERNAL, PG_LATIN1, PG_LATIN2, PG_LATIN3, PG_LATIN4, PG_LATIN5, PG_LATIN6, PG_LATIN7, PG_LATIN8, PG_LATIN9, PG_LATIN10, PG_WIN1256, PG_WIN1258, PG_WIN866, PG_WIN874, PG_KOI8R, PG_WIN1251, PG_WIN1252, PG_ISO_8859_5, PG_ISO_8859_6, PG_ISO_8859_7, PG_ISO_8859_8, PG_WIN1250, PG_WIN1253, PG_WIN1254, PG_WIN1255, PG_WIN1257, PG_KOI8U, PG_SJIS, PG_BIG5, PG_GBK, PG_UHC, PG_GB18030, PG_JOHAB, PG_SHIFT_JIS_2004, _PG_LAST_ENCODING_ } |
Functions | |
---|---|
static bool | is_valid_unicode_codepoint (pg_wchar c) |
static bool | is_utf16_surrogate_first (pg_wchar c) |
static bool | is_utf16_surrogate_second (pg_wchar c) |
static pg_wchar | surrogate_pair_to_codepoint (pg_wchar first, pg_wchar second) |
static pg_wchar | utf8_to_unicode (const unsigned char *c) |
static unsigned char * | unicode_to_utf8 (pg_wchar c, unsigned char *utf8string) |
static int | unicode_utf8len (pg_wchar c) |
int | pg_char_to_encoding (const char *name) |
const char * | pg_encoding_to_char (int encoding) |
int | pg_valid_server_encoding_id (int encoding) |
void | pg_encoding_set_invalid (int encoding, char *dst) |
int | pg_encoding_mblen (int encoding, const char *mbstr) |
int | pg_encoding_mblen_or_incomplete (int encoding, const char *mbstr, size_t remaining) |
int | pg_encoding_mblen_bounded (int encoding, const char *mbstr) |
int | pg_encoding_dsplen (int encoding, const char *mbstr) |
int | pg_encoding_verifymbchar (int encoding, const char *mbstr, int len) |
int | pg_encoding_verifymbstr (int encoding, const char *mbstr, int len) |
int | pg_encoding_max_length (int encoding) |
int | pg_valid_client_encoding (const char *name) |
int | pg_valid_server_encoding (const char *name) |
bool | is_encoding_supported_by_icu (int encoding) |
const char * | get_encoding_name_for_icu (int encoding) |
bool | pg_utf8_islegal (const unsigned char *source, int length) |
int | pg_utf_mblen (const unsigned char *s) |
int | pg_mule_mblen (const unsigned char *s) |
int | pg_mb2wchar (const char *from, pg_wchar *to) |
int | pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len) |
int | pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len) |
int | pg_wchar2mb (const pg_wchar *from, char *to) |
int | pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len) |
int | pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len) |
int | pg_char_and_wchar_strcmp (const char *s1, const pg_wchar *s2) |
int | pg_wchar_strncmp (const pg_wchar *s1, const pg_wchar *s2, size_t n) |
int | pg_char_and_wchar_strncmp (const char *s1, const pg_wchar *s2, size_t n) |
size_t | pg_wchar_strlen (const pg_wchar *str) |
int | pg_mblen (const char *mbstr) |
int | pg_dsplen (const char *mbstr) |
int | pg_mbstrlen (const char *mbstr) |
int | pg_mbstrlen_with_len (const char *mbstr, int limit) |
int | pg_mbcliplen (const char *mbstr, int len, int limit) |
int | pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit) |
int | pg_mbcharcliplen (const char *mbstr, int len, int limit) |
int | pg_database_encoding_max_length (void) |
mbcharacter_incrementer | pg_database_encoding_character_incrementer (void) |
int | PrepareClientEncoding (int encoding) |
int | SetClientEncoding (int encoding) |
void | InitializeClientEncoding (void) |
int | pg_get_client_encoding (void) |
const char * | pg_get_client_encoding_name (void) |
void | SetDatabaseEncoding (int encoding) |
int | GetDatabaseEncoding (void) |
const char * | GetDatabaseEncodingName (void) |
void | SetMessageEncoding (int encoding) |
int | GetMessageEncoding (void) |
unsigned char * | pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding) |
int | pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError) |
char * | pg_client_to_server (const char *s, int len) |
char * | pg_server_to_client (const char *s, int len) |
char * | pg_any_to_server (const char *s, int len, int encoding) |
char * | pg_server_to_any (const char *s, int len, int encoding) |
void | pg_unicode_to_server (pg_wchar c, unsigned char *s) |
bool | pg_unicode_to_server_noerror (pg_wchar c, unsigned char *s) |
unsigned short | BIG5toCNS (unsigned short big5, unsigned char *lc) |
unsigned short | CNStoBIG5 (unsigned short cns, unsigned char lc) |
int | UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError) |
int | LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError) |
bool | pg_verifymbstr (const char *mbstr, int len, bool noError) |
bool | pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError) |
int | pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError) |
void | check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding) |
pg_noreturn void | report_invalid_encoding (int encoding, const char *mbstr, int len) |
pg_noreturn void | report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len) |
int | local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError) |
int | latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError) |
int | mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError) |
int | latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError) |
int | mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError) |
Variables | |
---|---|
PGDLLIMPORT const pg_enc2name | pg_enc2name_tbl [] |
PGDLLIMPORT const char * | pg_enc2gettext_tbl [] |
PGDLLIMPORT const pg_wchar_tbl | pg_wchar_table [] |
◆ CHECK_ENCODING_CONVERSION_ARGS
| #define CHECK_ENCODING_CONVERSION_ARGS | ( | | srcencoding, | | ----------------------------------------- | - | | ------------ | | | destencoding | | | | | ) | | | |
Value:
(srcencoding), \
(destencoding))
#define PG_GETARG_INT32(n)
void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
Definition at line 507 of file pg_wchar.h.
◆ IS_LC1
| #define IS_LC1 | ( | | c | ) | ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d) | | --------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ IS_LC2
| #define IS_LC2 | ( | | c | ) | ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) | | --------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ IS_LCPRV1
◆ IS_LCPRV1_A_RANGE
| #define IS_LCPRV1_A_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ IS_LCPRV1_B_RANGE
| #define IS_LCPRV1_B_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ IS_LCPRV2
◆ IS_LCPRV2_A_RANGE
| #define IS_LCPRV2_A_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ IS_LCPRV2_B_RANGE
| #define IS_LCPRV2_B_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ ISSJISHEAD
| #define ISSJISHEAD | ( | | c | ) | (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc)) | | ------------------ | - | | --------------------------------------------------------------- | - | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ ISSJISTAIL
| #define ISSJISTAIL | ( | | c | ) | (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc)) | | ------------------ | - | | --------------------------------------------------------------- | - | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
◆ LC_ARABIC_1_COLUMN
#define LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */
◆ LC_ARABIC_2_COLUMN
#define LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */
◆ LC_ARABIC_DIGIT
#define LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */
◆ LC_ASCII_RIGHT_TO_LEFT
#define LC_ASCII_RIGHT_TO_LEFT
◆ LC_BIG5_1
◆ LC_BIG5_2
◆ LC_CNS11643_1
#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */
◆ LC_CNS11643_2
#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */
◆ LC_CNS11643_3
#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */
◆ LC_CNS11643_4
#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */
◆ LC_CNS11643_5
#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */
◆ LC_CNS11643_6
#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */
◆ LC_CNS11643_7
#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */
◆ LC_ETHIOPIC
#define LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */
◆ LC_GB2312_80
#define LC_GB2312_80 0x91 /* Chinese */
◆ LC_INDIAN_1_COLUMN
#define LC_INDIAN_1_COLUMN
◆ LC_INDIAN_2_COLUMN
#define LC_INDIAN_2_COLUMN
◆ LC_IPA
◆ LC_ISO8859_1
#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */
◆ LC_ISO8859_15
#define LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */
◆ LC_ISO8859_2
#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */
◆ LC_ISO8859_3
#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */
◆ LC_ISO8859_4
#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */
◆ LC_ISO8859_5
#define LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */
◆ LC_ISO8859_6
#define LC_ISO8859_6 0x87 /* Arabic (not supported yet) */
◆ LC_ISO8859_7
#define LC_ISO8859_7 0x86 /* Greek (not supported yet) */
◆ LC_ISO8859_8
#define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */
◆ LC_ISO8859_9
#define LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */
◆ LC_JISX0201K
#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */
◆ LC_JISX0201R
#define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */
◆ LC_JISX0208
#define LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */
◆ LC_JISX0208_1978
#define LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */
◆ LC_JISX0212
#define LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */
◆ LC_JISX0213_1
◆ LC_KOI8_R
#define LC_KOI8_R 0x8b /* Cyrillic KOI8-R */
◆ LC_KS5601
#define LC_KS5601 0x93 /* Korean */
◆ LC_LAO
◆ LC_SISHENG
◆ LC_TIBETAN
#define LC_TIBETAN 0xfc /* Tibetan (not supported) */
◆ LC_TIBETAN_1_COLUMN
#define LC_TIBETAN_1_COLUMN
◆ LC_TIS620
#define LC_TIS620 0x85 /* Thai (not supported yet) */
◆ LC_UNICODE_SUBSET
#define LC_UNICODE_SUBSET
◆ LC_UNICODE_SUBSET_2
#define LC_UNICODE_SUBSET_2
◆ LC_UNICODE_SUBSET_3
#define LC_UNICODE_SUBSET_3
◆ LC_VISCII_LOWER
◆ LC_VISCII_UPPER
◆ LCPRV1_A
◆ LCPRV1_B
◆ LCPRV2_A
◆ LCPRV2_B
◆ MAX_CONVERSION_GROWTH
#define MAX_CONVERSION_GROWTH 4
◆ MAX_CONVERSION_INPUT_LENGTH
#define MAX_CONVERSION_INPUT_LENGTH 16
◆ MAX_MULTIBYTE_CHAR_LEN
#define MAX_MULTIBYTE_CHAR_LEN 4
◆ MAX_UNICODE_EQUIVALENT_STRING
#define MAX_UNICODE_EQUIVALENT_STRING 16
◆ pg_char_to_encoding
#define pg_char_to_encoding pg_char_to_encoding_private
◆ PG_ENCODING_BE_LAST
◆ PG_ENCODING_IS_CLIENT_ONLY
◆ pg_encoding_to_char
#define pg_encoding_to_char pg_encoding_to_char_private
◆ pg_utf_mblen
#define pg_utf_mblen pg_utf_mblen_private
◆ PG_VALID_BE_ENCODING
◆ PG_VALID_ENCODING
◆ PG_VALID_FE_ENCODING
◆ pg_valid_server_encoding
#define pg_valid_server_encoding pg_valid_server_encoding_private
◆ pg_valid_server_encoding_id
#define pg_valid_server_encoding_id pg_valid_server_encoding_id_private
◆ SS2
#define SS2 0x8e /* single shift 2 (JIS0201) */
◆ SS3
#define SS3 0x8f /* single shift 3 (JIS0212) */
◆ mb2wchar_with_len_converter
typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)
◆ mbchar_verifier
typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)
◆ mbcharacter_incrementer
typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)
◆ mbdisplaylen_converter
typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)
◆ mblen_converter
typedef int(* mblen_converter) (const unsigned char *mbstr)
◆ mbstr_verifier
typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)
◆ pg_enc
◆ pg_enc2name
◆ utf_local_conversion_func
◆ wchar2mb_with_len_converter
typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)
◆ pg_enc
Enumerator |
---|
PG_SQL_ASCII |
PG_EUC_JP |
PG_EUC_CN |
PG_EUC_KR |
PG_EUC_TW |
PG_EUC_JIS_2004 |
PG_UTF8 |
PG_MULE_INTERNAL |
PG_LATIN1 |
PG_LATIN2 |
PG_LATIN3 |
PG_LATIN4 |
PG_LATIN5 |
PG_LATIN6 |
PG_LATIN7 |
PG_LATIN8 |
PG_LATIN9 |
PG_LATIN10 |
PG_WIN1256 |
PG_WIN1258 |
PG_WIN866 |
PG_WIN874 |
PG_KOI8R |
PG_WIN1251 |
PG_WIN1252 |
PG_ISO_8859_5 |
PG_ISO_8859_6 |
PG_ISO_8859_7 |
PG_ISO_8859_8 |
PG_WIN1250 |
PG_WIN1253 |
PG_WIN1254 |
PG_WIN1255 |
PG_WIN1257 |
PG_KOI8U |
PG_SJIS |
PG_BIG5 |
PG_GBK |
PG_UHC |
PG_GB18030 |
PG_JOHAB |
PG_SHIFT_JIS_2004 |
_PG_LAST_ENCODING_ |
Definition at line 224 of file pg_wchar.h.
227 : If you add some encoding don't forget to update
228 * the pg_enc2name_tbl[] array (in src/common/encnames.c),
229 * the pg_enc2gettext_tbl[] array (in src/common/encnames.c) and
230 * the pg_wchar_table[] array (in src/common/wchar.c) and to check
231 * PG_ENCODING_BE_LAST macro.
232 *
233 * PG_SQL_ASCII is default encoding and must be = 0.
234 *
235 * XXX We must avoid renumbering any backend encoding until libpq's major
236 * version number is increased beyond 5; it turns out that the backend
237 * encoding IDs are effectively part of libpq's ABI as far as 8.2 initdb and
238 * psql are concerned.
239 */
240typedef enum pg_enc
241{
242 PG_SQL_ASCII = 0, /* SQL/ASCII */
243 PG_EUC_JP, /* EUC for Japanese */
244 PG_EUC_CN, /* EUC for Chinese */
245 PG_EUC_KR, /* EUC for Korean */
246 PG_EUC_TW, /* EUC for Taiwan */
247 PG_EUC_JIS_2004, /* EUC-JIS-2004 */
248 PG_UTF8, /* Unicode UTF8 */
249 PG_MULE_INTERNAL, /* Mule internal code */
250 PG_LATIN1, /* ISO-8859-1 Latin 1 */
251 PG_LATIN2, /* ISO-8859-2 Latin 2 */
252 PG_LATIN3, /* ISO-8859-3 Latin 3 */
253 PG_LATIN4, /* ISO-8859-4 Latin 4 */
254 PG_LATIN5, /* ISO-8859-9 Latin 5 */
255 PG_LATIN6, /* ISO-8859-10 Latin6 */
256 PG_LATIN7, /* ISO-8859-13 Latin7 */
257 PG_LATIN8, /* ISO-8859-14 Latin8 */
258 PG_LATIN9, /* ISO-8859-15 Latin9 */
259 PG_LATIN10, /* ISO-8859-16 Latin10 */
260 PG_WIN1256, /* windows-1256 */
261 PG_WIN1258, /* Windows-1258 */
262 PG_WIN866, /* (MS-DOS CP866) */
263 PG_WIN874, /* windows-874 */
264 PG_KOI8R, /* KOI8-R */
265 PG_WIN1251, /* windows-1251 */
266 PG_WIN1252, /* windows-1252 */
267 PG_ISO_8859_5, /* ISO-8859-5 */
268 PG_ISO_8859_6, /* ISO-8859-6 */
269 PG_ISO_8859_7, /* ISO-8859-7 */
270 PG_ISO_8859_8, /* ISO-8859-8 */
271 PG_WIN1250, /* windows-1250 */
272 PG_WIN1253, /* windows-1253 */
273 PG_WIN1254, /* windows-1254 */
◆ BIG5toCNS()
unsigned short BIG5toCNS | ( | unsigned short | big5, |
---|---|---|---|
unsigned char * | lc | ||
) |
Definition at line 292 of file big5.c.
293{
294 unsigned short cns = 0;
295 int i;
296
297 if (big5 < 0xc940U)
298 {
299
300
301 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
302 {
304 {
306 return (b1c4[i][1] | 0x8080U);
307 }
308 }
309
312 }
313 else if (big5 == 0xc94aU)
314 {
315
317 cns = 0x4442;
318 }
319 else
320 {
321
322 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
323 {
325 {
327 return (b2c3[i][1] | 0x8080U);
328 }
329 }
330
333 }
334
335 if (0 == cns)
336 {
337 *lc = 0;
338 return (unsigned short) '?';
339 }
340
341 return cns | 0x8080;
342}
static const codes_t big5Level1ToCnsPlane1[25]
static const codes_t big5Level2ToCnsPlane2[48]
static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)
static const unsigned short b2c3[][2]
static const unsigned short b1c4[][2]
References b1c4, b2c3, big5Level1ToCnsPlane1, big5Level2ToCnsPlane2, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.
Referenced by big52euc_tw(), and big52mic().
◆ check_encoding_conversion_args()
void check_encoding_conversion_args | ( | int | src_encoding, |
---|---|---|---|
int | dest_encoding, | ||
int | len, | ||
int | expected_src_encoding, | ||
int | expected_dest_encoding | ||
) |
Definition at line 1670 of file mbutils.c.
1675{
1677 elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1678 if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1679 elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1683 elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1684 if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1685 elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1688 if (len < 0)
1689 elog(ERROR, "encoding conversion length must not be negative");
1690}
const pg_enc2name pg_enc2name_tbl[]
#define PG_VALID_ENCODING(_enc)
References elog, ERROR, len, name, pg_enc2name_tbl, and PG_VALID_ENCODING.
◆ CNStoBIG5()
unsigned short CNStoBIG5 | ( | unsigned short | cns, |
---|---|---|---|
unsigned char | lc | ||
) |
Definition at line 345 of file big5.c.
346{
347 int i;
348 unsigned int big5 = 0;
349
350 cns &= 0x7f7f;
351
352 switch (lc)
353 {
356 break;
359 break;
361 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
362 {
365 }
366 break;
368 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
369 {
372 }
373 default:
374 break;
375 }
376 return big5;
377}
static const codes_t cnsPlane2ToBig5Level2[49]
static const codes_t cnsPlane1ToBig5Level1[26]
References b1c4, b2c3, BinarySearchRange(), cnsPlane1ToBig5Level1, cnsPlane2ToBig5Level2, i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.
Referenced by euc_tw2big5(), and mic2big5().
◆ get_encoding_name_for_icu()
const char * get_encoding_name_for_icu | ( | int | encoding | ) |
---|
◆ GetDatabaseEncoding()
int GetDatabaseEncoding | ( | void | ) |
---|
Definition at line 1262 of file mbutils.c.
1263{
1265}
static const pg_enc2name * DatabaseEncoding
References DatabaseEncoding, and pg_enc2name::encoding.
Referenced by ascii(), BeginCopyFrom(), BeginCopyTo(), char2wchar(), chr(), CollationCreate(), CollationGetCollid(), compareStrings(), convert_from_utf8(), convert_to_utf8(), CopyConversionError(), CopyConvertBuf(), create_pg_locale_builtin(), create_pg_locale_icu(), create_pg_locale_libc(), cstr2sv(), dblink_connect(), dblink_get_conn(), DefineCollation(), Generic_Text_IC_like(), GenericMatchText(), get_collation_oid(), get_json_object_as_hash(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_recv(), jsonb_from_cstring(), locate_stem_module(), LogicalOutputWrite(), makeJsonLexContext(), p_isspecial(), ParallelWorkerMain(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_generic_charinc(), pg_perm_setlocale(), pg_set_regex_collation(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_verifymbstr(), pgss_store(), PLyUnicode_Bytes(), populate_array_json(), PrepareClientEncoding(), read_extension_script_file(), SetClientEncoding(), str_casefold(), sv2cstr(), text_position_setup(), to_ascii_default(), type_maximum_size(), unicode_assigned(), unicode_norm_form_from_string(), wchar2char(), xml_in(), xml_is_document(), xmlparse(), and xmltotext_with_options().
◆ GetDatabaseEncodingName()
const char * GetDatabaseEncodingName | ( | void | ) |
---|
Definition at line 1268 of file mbutils.c.
References DatabaseEncoding, and pg_enc2name::name.
Referenced by check_client_encoding(), CheckMyDatabase(), connect_pg_server(), dblink_connect(), dblink_get_conn(), get_collation_oid(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_errdetail(), libpqrcv_connect(), locate_stem_module(), pg_unicode_to_server(), ProcessConfigFileInternal(), and regcollationin().
◆ GetMessageEncoding()
int GetMessageEncoding | ( | void | ) |
---|
◆ InitializeClientEncoding()
void InitializeClientEncoding | ( | void | ) |
---|
Definition at line 282 of file mbutils.c.
283{
284 int current_server_encoding;
285
288
291 {
292
293
294
295
297 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
298 errmsg("conversion between %s and %s is not supported",
301 }
302
303
304
305
306
307
309 if (current_server_encoding != PG_UTF8 &&
311 {
312 Oid utf8_to_server_proc;
313
315 utf8_to_server_proc =
317 current_server_encoding);
318
320 {
322
327
329 }
330 }
331}
#define OidIsValid(objectId)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Assert(PointerIsAligned(start, uint64))
int GetDatabaseEncoding(void)
static FmgrInfo * Utf8ToServerConvProc
const char * GetDatabaseEncodingName(void)
int SetClientEncoding(int encoding)
int PrepareClientEncoding(int encoding)
static bool backend_startup_complete
static int pending_client_encoding
void * MemoryContextAlloc(MemoryContext context, Size size)
MemoryContext TopMemoryContext
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
static void AssertCouldGetRelation(void)
References Assert(), AssertCouldGetRelation(), backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), TopMemoryContext, and Utf8ToServerConvProc.
Referenced by InitPostgres().
◆ is_encoding_supported_by_icu()
bool is_encoding_supported_by_icu | ( | int | encoding | ) |
---|
◆ is_utf16_surrogate_first()
static bool is_utf16_surrogate_first ( pg_wchar c) | inlinestatic |
---|
◆ is_utf16_surrogate_second()
static bool is_utf16_surrogate_second ( pg_wchar c) | inlinestatic |
---|
◆ is_valid_unicode_codepoint()
static bool is_valid_unicode_codepoint ( pg_wchar c) | inlinestatic |
---|
◆ latin2mic()
int latin2mic | ( | const unsigned char * | l, |
---|---|---|---|
unsigned char * | p, | ||
int | len, | ||
int | lc, | ||
int | encoding, | ||
bool | noError | ||
) |
Definition at line 89 of file conv.c.
91{
92 const unsigned char *start = l;
93 int c1;
94
95 while (len > 0)
96 {
97 c1 = *l;
98 if (c1 == 0)
99 {
100 if (noError)
101 break;
103 }
105 *p++ = lc;
106 *p++ = c1;
107 l++;
109 }
110 *p = '\0';
111
112 return l - start;
113}
#define IS_HIGHBIT_SET(ch)
void report_invalid_encoding(int encoding, const char *mbstr, int len)
References encoding, IS_HIGHBIT_SET, len, report_invalid_encoding(), and start.
Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().
◆ latin2mic_with_table()
int latin2mic_with_table | ( | const unsigned char * | l, |
---|---|---|---|
unsigned char * | p, | ||
int | len, | ||
int | lc, | ||
int | encoding, | ||
const unsigned char * | tab, | ||
bool | noError | ||
) |
Definition at line 194 of file conv.c.
201{
202 const unsigned char *start = l;
203 unsigned char c1,
204 c2;
205
206 while (len > 0)
207 {
208 c1 = *l;
209 if (c1 == 0)
210 {
211 if (noError)
212 break;
214 }
216 *p++ = c1;
217 else
218 {
220 if (c2)
221 {
222 *p++ = lc;
223 *p++ = c2;
224 }
225 else
226 {
227 if (noError)
228 break;
230 (const char *) l, len);
231 }
232 }
233 l++;
235 }
236 *p = '\0';
237
238 return l - start;
239}
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.
Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().
◆ local2local()
int local2local | ( | const unsigned char * | l, |
---|---|---|---|
unsigned char * | p, | ||
int | len, | ||
int | src_encoding, | ||
int | dest_encoding, | ||
const unsigned char * | tab, | ||
bool | noError | ||
) |
Definition at line 33 of file conv.c.
40{
41 const unsigned char *start = l;
42 unsigned char c1,
43 c2;
44
45 while (len > 0)
46 {
47 c1 = *l;
48 if (c1 == 0)
49 {
50 if (noError)
51 break;
53 }
55 *p++ = c1;
56 else
57 {
59 if (c2)
60 *p++ = c2;
61 else
62 {
63 if (noError)
64 break;
66 (const char *) l, len);
67 }
68 }
69 l++;
71 }
72 *p = '\0';
73
75}
References HIGHBIT, IS_HIGHBIT_SET, len, report_invalid_encoding(), report_untranslatable_char(), and start.
Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().
◆ LocalToUtf()
Definition at line 717 of file conv.c.
724{
726 int l;
728 const unsigned char *start = iso;
729
732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
734
736 {
737 unsigned char b1 = 0;
738 unsigned char b2 = 0;
739 unsigned char b3 = 0;
740 unsigned char b4 = 0;
741
742
743 if (*iso == '\0')
744 break;
745
747 {
748
749 *utf++ = *iso++;
750 l = 1;
751 continue;
752 }
753
755 if (l < 0)
756 break;
757
758
759 if (l == 1)
760 b4 = *iso++;
761 else if (l == 2)
762 {
763 b3 = *iso++;
764 b4 = *iso++;
765 }
766 else if (l == 3)
767 {
768 b2 = *iso++;
769 b3 = *iso++;
770 b4 = *iso++;
771 }
772 else if (l == 4)
773 {
774 b1 = *iso++;
775 b2 = *iso++;
776 b3 = *iso++;
777 b4 = *iso++;
778 }
779 else
780 {
781 elog(ERROR, "unsupported character length %d", l);
782 iiso = 0;
783 }
784 iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
785
786 if (map)
787 {
789
790 if (converted)
791 {
793 continue;
794 }
795
796
797 if (cmap)
798 {
799 cp = bsearch(&iiso, cmap, cmapsize,
801
802 if (cp)
803 {
806 continue;
807 }
808 }
809 }
810
811
812 if (conv_func)
813 {
814 uint32 converted = (*conv_func) (iiso);
815
816 if (converted)
817 {
819 continue;
820 }
821 }
822
823
824 iso -= l;
825 if (noError)
826 break;
828 (const char *) iso, len);
829 }
830
831
832 if (len > 0 && !noError)
834
835 *utf = '\0';
836
837 return iso - start;
838}
static unsigned char * store_coded_char(unsigned char *dest, uint32 code)
static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)
static int compare4(const void *p1, const void *p2)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
References compare4(), elog, encoding, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.
Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().
◆ mic2latin()
int mic2latin | ( | const unsigned char * | mic, |
---|---|---|---|
unsigned char * | p, | ||
int | len, | ||
int | lc, | ||
int | encoding, | ||
bool | noError | ||
) |
Definition at line 127 of file conv.c.
129{
130 const unsigned char *start = mic;
131 int c1;
132
133 while (len > 0)
134 {
135 c1 = *mic;
136 if (c1 == 0)
137 {
138 if (noError)
139 break;
141 }
143 {
144
145 *p++ = c1;
146 mic++;
148 }
149 else
150 {
152
153 if (len < l)
154 {
155 if (noError)
156 break;
159 }
161 {
162 if (noError)
163 break;
165 (const char *) mic, len);
166 }
167 *p++ = mic[1];
168 mic += 2;
169 len -= 2;
170 }
171 }
172 *p = '\0';
173
174 return mic - start;
175}
int pg_mule_mblen(const unsigned char *s)
References encoding, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.
Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().
◆ mic2latin_with_table()
int mic2latin_with_table | ( | const unsigned char * | mic, |
---|---|---|---|
unsigned char * | p, | ||
int | len, | ||
int | lc, | ||
int | encoding, | ||
const unsigned char * | tab, | ||
bool | noError | ||
) |
Definition at line 257 of file conv.c.
264{
265 const unsigned char *start = mic;
266 unsigned char c1,
267 c2;
268
269 while (len > 0)
270 {
271 c1 = *mic;
272 if (c1 == 0)
273 {
274 if (noError)
275 break;
277 }
279 {
280
281 *p++ = c1;
282 mic++;
284 }
285 else
286 {
288
289 if (len < l)
290 {
291 if (noError)
292 break;
295 }
297 (c2 = tab[mic[1] - HIGHBIT]) == 0)
298 {
299 if (noError)
300 break;
302 (const char *) mic, len);
303 break;
304 }
305 *p++ = c2;
306 mic += 2;
307 len -= 2;
308 }
309 }
310 *p = '\0';
311
312 return mic - start;
313}
References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.
Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().
◆ pg_any_to_server()
char * pg_any_to_server | ( | const char * | s, |
---|---|---|---|
int | len, | ||
int | encoding | ||
) |
Definition at line 677 of file mbutils.c.
678{
679 if (len <= 0)
680 return unconstify(char *, s);
681
684 {
685
686
687
690 }
691
693 {
694
695
696
697
698
699
700
701
702
705 else
706 {
707 int i;
708
710 {
713 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
714 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
716 (unsigned char) s[i])));
717 }
718 }
720 }
721
722
725
726
731}
#define unconstify(underlying_type, expr)
static const pg_enc2name * ClientEncoding
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, len, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.
Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), tsearch_readline(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().
◆ pg_char_and_wchar_strcmp()
int pg_char_and_wchar_strcmp | ( | const char * | s1, |
---|---|---|---|
const pg_wchar * | s2 | ||
) |
Definition at line 41 of file wstrcmp.c.
42{
44 if (*s1++ == 0)
45 return 0;
46 return *(const unsigned char *) s1 - *(const pg_wchar *) (s2 - 1);
47}
◆ pg_char_and_wchar_strncmp()
int pg_char_and_wchar_strncmp | ( | const char * | s1, |
---|---|---|---|
const pg_wchar * | s2, | ||
size_t | n | ||
) |
Definition at line 55 of file wstrncmp.c.
56{
57 if (n == 0)
58 return 0;
59 do
60 {
61 if ((pg_wchar) ((unsigned char) *s1) != *s2++)
62 return ((pg_wchar) ((unsigned char) *s1) - *(s2 - 1));
63 if (*s1++ == 0)
64 break;
65 } while (--n != 0);
66 return 0;
67}
Referenced by element(), and lookupcclass().
◆ pg_char_to_encoding()
int pg_char_to_encoding | ( | const char * | name | ) |
---|
Definition at line 549 of file encnames.c.
550{
553 *last = base + nel - 1,
554 *position;
555 int result;
558
559 if (name == NULL || *name == '\0')
560 return -1;
561
563 return -1;
564
566
567 while (last >= base)
568 {
569 position = base + ((last - base) >> 1);
570 result = key[0] - position->name[0];
571
572 if (result == 0)
573 {
574 result = strcmp(key, position->name);
575 if (result == 0)
576 return position->encoding;
577 }
578 if (result < 0)
579 last = position - 1;
580 else
581 base = position + 1;
582 }
583 return -1;
584}
static char * clean_encoding_name(const char *key, char *newkey)
static const pg_encname pg_encname_tbl[]
◆ pg_client_to_server()
char * pg_client_to_server | ( | const char * | s, |
---|---|---|---|
int | len | ||
) |
◆ pg_database_encoding_character_incrementer()
◆ pg_database_encoding_max_length()
int pg_database_encoding_max_length | ( | void | ) |
---|
Definition at line 1547 of file mbutils.c.
1548{
1550}
const pg_wchar_tbl pg_wchar_table[]
References GetDatabaseEncoding(), pg_wchar_tbl::maxmblen, and pg_wchar_table.
Referenced by bpcharlen(), charlen_to_bytelen(), dotrim(), downcase_identifier(), gbt_bpchar_consistent(), gbt_text_compress(), gbt_text_consistent(), generate_trgm_only(), Generic_Text_IC_like(), GenericMatchText(), infix(), init_tsvector_parser(), like_escape(), like_fixed_prefix(), lpad(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), protect_out_of_mem(), regexp_fixed_prefix(), rpad(), setup_regexp_matches(), setup_test_matches(), show_trgm(), strlower_libc(), strlower_libc_mb(), strtitle_libc(), strtitle_libc_mb(), strupper_libc(), strupper_libc_mb(), text_length(), text_position_setup(), text_reverse(), text_substring(), TParserInit(), translate(), and tsvectorout().
◆ pg_do_encoding_conversion()
unsigned char * pg_do_encoding_conversion | ( | unsigned char * | src, |
---|---|---|---|
int | len, | ||
int | src_encoding, | ||
int | dest_encoding | ||
) |
Definition at line 357 of file mbutils.c.
359{
360 unsigned char *result;
361 Oid proc;
362
363 if (len <= 0)
364 return src;
365
366 if (src_encoding == dest_encoding)
367 return src;
368
370 return src;
371
373 {
374
375 (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
376 return src;
377 }
378
380 elog(ERROR, "cannot perform encoding conversion outside a transaction");
381
385 (errcode(ERRCODE_UNDEFINED_FUNCTION),
386 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
389
390
391
392
393
394
395
396
397
398
401 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
402 errmsg("out of memory"),
403 errdetail("String of %d bytes is too long for encoding conversion.",
405
406 result = (unsigned char *)
409
417
418
419
420
421
422
423 if (len > 1000000)
424 {
425 Size resultlen = strlen((char *) result);
426
429 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
430 errmsg("out of memory"),
431 errdetail("String of %d bytes is too long for encoding conversion.",
433
434 result = (unsigned char *) repalloc(result, resultlen + 1);
435 }
436
437 return result;
438}
int errdetail(const char *fmt,...)
#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)
void * repalloc(void *pointer, Size size)
MemoryContext CurrentMemoryContext
void * MemoryContextAllocHuge(MemoryContext context, Size size)
#define MAX_CONVERSION_GROWTH
#define pg_encoding_to_char
static Datum BoolGetDatum(bool X)
static Datum CStringGetDatum(const char *X)
static Datum Int32GetDatum(int32 X)
bool IsTransactionState(void)
References BoolGetDatum(), CStringGetDatum(), CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum(), IsTransactionState(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char, PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().
Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().
◆ pg_do_encoding_conversion_buf()
int pg_do_encoding_conversion_buf | ( | Oid | proc, |
---|---|---|---|
int | src_encoding, | ||
int | dest_encoding, | ||
unsigned char * | src, | ||
int | srclen, | ||
unsigned char * | dest, | ||
int | destlen, | ||
bool | noError | ||
) |
◆ pg_dsplen()
int pg_dsplen | ( | const char * | mbstr | ) |
---|
◆ pg_encoding_dsplen()
int pg_encoding_dsplen | ( | int | encoding, |
---|---|---|---|
const char * | mbstr | ||
) |
◆ pg_encoding_max_length()
int pg_encoding_max_length | ( | int | encoding | ) |
---|
Definition at line 2213 of file wchar.c.
2214{
2216
2217
2218
2219
2220
2224}
References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().
◆ pg_encoding_mb2wchar_with_len()
int pg_encoding_mb2wchar_with_len | ( | int | encoding, |
---|---|---|---|
const char * | from, | ||
pg_wchar * | to, | ||
int | len | ||
) |
◆ pg_encoding_mbcliplen()
int pg_encoding_mbcliplen | ( | int | encoding, |
---|---|---|---|
const char * | mbstr, | ||
int | len, | ||
int | limit | ||
) |
Definition at line 1094 of file mbutils.c.
1096{
1098 int clen = 0;
1099 int l;
1100
1101
1104
1106
1107 while (len > 0 && *mbstr)
1108 {
1109 l = (*mblen_fn) ((const unsigned char *) mbstr);
1110 if ((clen + l) > limit)
1111 break;
1112 clen += l;
1113 if (clen == limit)
1114 break;
1115 len -= l;
1116 mbstr += l;
1117 }
1118 return clen;
1119}
static int cliplen(const char *str, int len, int limit)
int(* mblen_converter)(const unsigned char *mbstr)
int pg_encoding_max_length(int encoding)
References cliplen(), encoding, len, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.
Referenced by pg_mbcliplen().
◆ pg_encoding_mblen()
int pg_encoding_mblen | ( | int | encoding, |
---|---|---|---|
const char * | mbstr | ||
) |
Definition at line 2135 of file wchar.c.
References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), fmtIdEnc(), pg_encoding_mblen_bounded(), pg_encoding_mblen_or_incomplete(), PQescapeInternal(), PQmblen(), PQmblenBounded(), and test_enc_setup().
◆ pg_encoding_mblen_bounded()
int pg_encoding_mblen_bounded | ( | int | encoding, |
---|---|---|---|
const char * | mbstr | ||
) |
◆ pg_encoding_mblen_or_incomplete()
int pg_encoding_mblen_or_incomplete | ( | int | encoding, |
---|---|---|---|
const char * | mbstr, | ||
size_t | remaining | ||
) |
◆ pg_encoding_set_invalid()
void pg_encoding_set_invalid | ( | int | encoding, |
---|---|---|---|
char * | dst | ||
) |
◆ pg_encoding_to_char()
const char * pg_encoding_to_char | ( | int | encoding | ) |
---|
Definition at line 587 of file encnames.c.
588{
590 {
592
594 return p->name;
595 }
596 return "";
597}
◆ pg_encoding_verifymbchar()
int pg_encoding_verifymbchar | ( | int | encoding, |
---|---|---|---|
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 2189 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by appendStringLiteral(), big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), fmtIdEnc(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), PQescapeStringInternal(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().
◆ pg_encoding_verifymbstr()
int pg_encoding_verifymbstr | ( | int | encoding, |
---|---|---|---|
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 2202 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().
◆ pg_encoding_wchar2mb_with_len()
int pg_encoding_wchar2mb_with_len | ( | int | encoding, |
---|---|---|---|
const pg_wchar * | from, | ||
char * | to, | ||
int | len | ||
) |
◆ pg_get_client_encoding()
int pg_get_client_encoding | ( | void | ) |
---|
◆ pg_get_client_encoding_name()
const char * pg_get_client_encoding_name | ( | void | ) |
---|
◆ pg_mb2wchar()
int pg_mb2wchar | ( | const char * | from, |
---|---|---|---|
pg_wchar * | to | ||
) |
◆ pg_mb2wchar_with_len()
int pg_mb2wchar_with_len | ( | const char * | from, |
---|---|---|---|
pg_wchar * | to, | ||
int | len | ||
) |
Definition at line 987 of file mbutils.c.
References DatabaseEncoding, pg_enc2name::encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.
Referenced by CheckAffix(), NIAddAffix(), RE_compile(), RE_compile_and_cache(), RE_execute(), regcomp_auth_token(), regexec_auth_token(), replace_text_regexp(), setup_regexp_matches(), setup_test_matches(), test_re_compile(), and TParserInit().
◆ pg_mbcharcliplen()
int pg_mbcharcliplen | ( | const char * | mbstr, |
---|---|---|---|
int | len, | ||
int | limit | ||
) |
◆ pg_mbcliplen()
int pg_mbcliplen | ( | const char * | mbstr, |
---|---|---|---|
int | len, | ||
int | limit | ||
) |
Definition at line 1084 of file mbutils.c.
1085{
1087 len, limit);
1088}
int pg_encoding_mbcliplen(int encoding, const char *mbstr, int len, int limit)
References DatabaseEncoding, pg_enc2name::encoding, len, and pg_encoding_mbcliplen().
Referenced by appendStringInfoStringQuoted(), bpchar_name(), ChooseIndexColumnNames(), CopyLimitPrintoutLength(), ExecBuildSlotPartitionKeyDescription(), ExecBuildSlotValueDescription(), make_colname_unique(), make_greater_string(), makeMultirangeTypeName(), makeObjectName(), MemoryContextStatsPrint(), nameconcatoid(), namein(), pgstat_clip_activity(), pgstat_report_appname(), PutMemoryContextsStatsTupleStore(), set_rtable_names(), text_name(), text_to_cstring_buffer(), and truncate_identifier().
◆ pg_mblen()
int pg_mblen | ( | const char * | mbstr | ) |
---|
Definition at line 1024 of file mbutils.c.
References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.
Referenced by addCompoundAffixFlagValue(), bit_in(), charlen_to_bytelen(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), hex_decode_safe(), infix(), initTrie(), lpad(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_lquery(), parse_ltree(), parse_or_operator(), parse_re_flags(), parse_test_flags(), pg_base64_decode(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), prssyntaxerror(), px_crypt_shacrypt(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), split_text(), t_isalnum(), t_isalpha(), text_format(), text_position_next(), text_position_next_internal(), text_reverse(), text_substring(), text_to_bits(), textregexreplace(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varbit_in(), varstr_levenshtein(), and wchareq().
◆ pg_mbstrlen()
int pg_mbstrlen | ( | const char * | mbstr | ) |
---|
◆ pg_mbstrlen_with_len()
int pg_mbstrlen_with_len | ( | const char * | mbstr, |
---|---|---|---|
int | limit | ||
) |
Definition at line 1058 of file mbutils.c.
1059{
1060 int len = 0;
1061
1062
1064 return limit;
1065
1066 while (limit > 0 && *mbstr)
1067 {
1069
1070 limit -= l;
1071 mbstr += l;
1073 }
1074 return len;
1075}
References len, pg_database_encoding_max_length(), and pg_mblen().
Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), scanner_errposition(), similar_escape_internal(), text_left(), text_length(), text_position_get_match_pos(), text_right(), text_substring(), unicode_assigned(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().
◆ pg_mule_mblen()
int pg_mule_mblen | ( | const unsigned char * | s | ) |
---|
◆ pg_server_to_any()
char * pg_server_to_any | ( | const char * | s, |
---|---|---|---|
int | len, | ||
int | encoding | ||
) |
Definition at line 750 of file mbutils.c.
751{
752 if (len <= 0)
753 return unconstify(char *, s);
754
757 return unconstify(char *, s);
758
760 {
761
764 }
765
766
769
770
775}
References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, len, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.
Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyToTextLikeStart(), daitch_mokotoff(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_FromStringAndSize(), and utf_e2u().
◆ pg_server_to_client()
char * pg_server_to_client | ( | const char * | s, |
---|---|---|---|
int | len | ||
) |
◆ pg_unicode_to_server()
void pg_unicode_to_server | ( | pg_wchar | c, |
---|---|---|---|
unsigned char * | s | ||
) |
Definition at line 865 of file mbutils.c.
866{
868 int c_as_utf8_len;
869 int server_encoding;
870
871
872
873
874
877 (errcode(ERRCODE_SYNTAX_ERROR),
878 errmsg("invalid Unicode code point")));
879
880
881 if (c <= 0x7F)
882 {
883 s[0] = (unsigned char) c;
884 s[1] = '\0';
885 return;
886 }
887
888
890 if (server_encoding == PG_UTF8)
891 {
894 return;
895 }
896
897
900 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
901 errmsg("conversion between %s and %s is not supported",
904
905
908 c_as_utf8[c_as_utf8_len] = '\0';
909
910
918}
#define FunctionCall6(flinfo, arg1, arg2, arg3, arg4, arg5, arg6)
#define MAX_MULTIBYTE_CHAR_LEN
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
static bool is_valid_unicode_codepoint(pg_wchar c)
References BoolGetDatum(), CStringGetDatum(), ereport, errcode(), errmsg(), ERROR, FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.
Referenced by addunicode(), addUnicodeChar(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().
◆ pg_unicode_to_server_noerror()
bool pg_unicode_to_server_noerror | ( | pg_wchar | c, |
---|---|---|---|
unsigned char * | s | ||
) |
Definition at line 927 of file mbutils.c.
928{
930 int c_as_utf8_len;
931 int converted_len;
932 int server_encoding;
933
934
936 return false;
937
938
939 if (c <= 0x7F)
940 {
941 s[0] = (unsigned char) c;
942 s[1] = '\0';
943 return true;
944 }
945
946
948 if (server_encoding == PG_UTF8)
949 {
952 return true;
953 }
954
955
957 return false;
958
959
962 c_as_utf8[c_as_utf8_len] = '\0';
963
964
972
973
974 return (converted_len == c_as_utf8_len);
975}
References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), FunctionCall6, GetDatabaseEncoding(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.
Referenced by addUnicodeChar(), and json_lex_string().
◆ pg_utf8_islegal()
bool pg_utf8_islegal | ( | const unsigned char * | source, |
---|---|---|---|
int | length | ||
) |
Definition at line 1989 of file wchar.c.
1990{
1991 unsigned char a;
1992
1993 switch (length)
1994 {
1995 default:
1996
1997 return false;
1998 case 4:
2000 if (a < 0x80 || a > 0xBF)
2001 return false;
2002
2003 case 3:
2005 if (a < 0x80 || a > 0xBF)
2006 return false;
2007
2008 case 2:
2011 {
2012 case 0xE0:
2013 if (a < 0xA0 || a > 0xBF)
2014 return false;
2015 break;
2016 case 0xED:
2017 if (a < 0x80 || a > 0x9F)
2018 return false;
2019 break;
2020 case 0xF0:
2021 if (a < 0x90 || a > 0xBF)
2022 return false;
2023 break;
2024 case 0xF4:
2025 if (a < 0x80 || a > 0x8F)
2026 return false;
2027 break;
2028 default:
2029 if (a < 0x80 || a > 0xBF)
2030 return false;
2031 break;
2032 }
2033
2034 case 1:
2036 if (a >= 0x80 && a < 0xC2)
2037 return false;
2038 if (a > 0xF4)
2039 return false;
2040 break;
2041 }
2042 return true;
2043}
static rewind_source * source
Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().
◆ pg_utf_mblen()
int pg_utf_mblen | ( | const unsigned char * | s | ) |
---|
Definition at line 538 of file wchar.c.
539{
541
542 if ((*s & 0x80) == 0)
544 else if ((*s & 0xe0) == 0xc0)
546 else if ((*s & 0xf0) == 0xe0)
548 else if ((*s & 0xf8) == 0xf0)
550#ifdef NOT_USED
551 else if ((*s & 0xfc) == 0xf8)
553 else if ((*s & 0xfe) == 0xfc)
555#endif
556 else
558 return len;
559}
References len.
Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().
◆ pg_valid_client_encoding()
int pg_valid_client_encoding | ( | const char * | name | ) |
---|
◆ pg_valid_server_encoding()
int pg_valid_server_encoding | ( | const char * | name | ) |
---|
◆ pg_valid_server_encoding_id()
int pg_valid_server_encoding_id | ( | int | encoding | ) |
---|
◆ pg_verify_mbstr()
bool pg_verify_mbstr | ( | int | encoding, |
---|---|---|---|
const char * | mbstr, | ||
int | len, | ||
bool | noError | ||
) |
Definition at line 1567 of file mbutils.c.
1568{
1569 int oklen;
1570
1572
1574 if (oklen != len)
1575 {
1576 if (noError)
1577 return false;
1579 }
1580 return true;
1581}
mbstr_verifier mbverifystr
References Assert(), encoding, len, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().
Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), and read_extension_script_file().
◆ pg_verify_mbstr_len()
int pg_verify_mbstr_len | ( | int | encoding, |
---|---|---|---|
const char * | mbstr, | ||
int | len, | ||
bool | noError | ||
) |
Definition at line 1598 of file mbutils.c.
1599{
1601 int mb_len;
1602
1604
1605
1606
1607
1609 {
1610 const char *nullpos = memchr(mbstr, 0, len);
1611
1612 if (nullpos == NULL)
1613 return len;
1614 if (noError)
1615 return -1;
1617 }
1618
1619
1621
1622 mb_len = 0;
1623
1624 while (len > 0)
1625 {
1626 int l;
1627
1628
1630 {
1631 if (*mbstr != '\0')
1632 {
1633 mb_len++;
1634 mbstr++;
1636 continue;
1637 }
1638 if (noError)
1639 return -1;
1641 }
1642
1643 l = (*mbverifychar) ((const unsigned char *) mbstr, len);
1644
1645 if (l < 0)
1646 {
1647 if (noError)
1648 return -1;
1650 }
1651
1652 mbstr += l;
1653 len -= l;
1654 mb_len++;
1655 }
1656 return mb_len;
1657}
int(* mbchar_verifier)(const unsigned char *mbstr, int len)
mbchar_verifier mbverifychar
References Assert(), encoding, IS_HIGHBIT_SET, len, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().
Referenced by length_in_encoding().
◆ pg_verifymbstr()
bool pg_verifymbstr | ( | const char * | mbstr, |
---|---|---|---|
int | len, | ||
bool | noError | ||
) |
Definition at line 1557 of file mbutils.c.
References GetDatabaseEncoding(), len, and pg_verify_mbstr().
Referenced by char2wchar(), CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), and spg_text_leaf_consistent().
◆ pg_wchar2mb()
int pg_wchar2mb | ( | const pg_wchar * | from, |
---|---|---|---|
char * | to | ||
) |
◆ pg_wchar2mb_with_len()
int pg_wchar2mb_with_len | ( | const pg_wchar * | from, |
---|---|---|---|
char * | to, | ||
int | len | ||
) |
◆ pg_wchar_strlen()
size_t pg_wchar_strlen | ( | const pg_wchar * | str | ) |
---|
◆ pg_wchar_strncmp()
Definition at line 40 of file wstrncmp.c.
41{
42 if (n == 0)
43 return 0;
44 do
45 {
48 if (*s1++ == 0)
49 break;
50 } while (--n != 0);
51 return 0;
52}
◆ PrepareClientEncoding()
int PrepareClientEncoding | ( | int | encoding | ) |
---|
Definition at line 111 of file mbutils.c.
112{
113 int current_server_encoding;
115
117 return -1;
118
119
121 return 0;
122
124
125
126
127
128 if (current_server_encoding == encoding ||
131 return 0;
132
134 {
135
136
137
138
139
140
141 Oid to_server_proc,
142 to_client_proc;
145
147 current_server_encoding);
149 return -1;
153 return -1;
154
155
156
157
160 convinfo->s_encoding = current_server_encoding;
166
167
171
172
173
174
175
176
177 return 0;
178 }
179 else
180 {
181
182
183
184
185
186
187
188
190 {
192
193 if (oldinfo->s_encoding == current_server_encoding &&
195 return 0;
196 }
197
198 return -1;
199 }
200}
List * lcons(void *datum, List *list)
static List * ConvProcList
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
References backend_startup_complete, ConvProcInfo::c_encoding, ConvProcList, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.
Referenced by check_client_encoding(), and InitializeClientEncoding().
◆ report_invalid_encoding()
pg_noreturn void report_invalid_encoding | ( | int | encoding, |
---|---|---|---|
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 1699 of file mbutils.c.
1700{
1702 char buf[8 * 5 + 1];
1703 char *p = buf;
1704 int j,
1705 jlimit;
1706
1708 jlimit = Min(jlimit, 8);
1709
1710 for (j = 0; j < jlimit; j++)
1711 {
1712 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1713 if (j < jlimit - 1)
1715 }
1716
1718 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1719 errmsg("invalid byte sequence for encoding \"%s\": %s",
1722}
int pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, size_t remaining)
References buf, encoding, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.
Referenced by big52euc_tw(), big52mic(), CopyConversionError(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), test_enc_conversion(), utf8_to_iso8859_1(), and UtfToLocal().
◆ report_untranslatable_char()
pg_noreturn void report_untranslatable_char | ( | int | src_encoding, |
---|---|---|---|
int | dest_encoding, | ||
const char * | mbstr, | ||
int | len | ||
) |
Definition at line 1731 of file mbutils.c.
1733{
1734 int l;
1735 char buf[8 * 5 + 1];
1736 char *p = buf;
1737 int j,
1738 jlimit;
1739
1740
1741
1742
1743
1744
1745
1746
1749 jlimit = Min(jlimit, 8);
1750
1751 for (j = 0; j < jlimit; j++)
1752 {
1753 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1754 if (j < jlimit - 1)
1756 }
1757
1759 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
1760 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1764}
References buf, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.
Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().
◆ SetClientEncoding()
int SetClientEncoding | ( | int | encoding | ) |
---|
Definition at line 209 of file mbutils.c.
210{
211 int current_server_encoding;
212 bool found;
214
216 return -1;
217
218
220 {
222 return 0;
223 }
224
226
227
228
229
230 if (current_server_encoding == encoding ||
233 {
237 return 0;
238 }
239
240
241
242
243
244
245
246 found = false;
248 {
250
251 if (convinfo->s_encoding == current_server_encoding &&
253 {
254 if (!found)
255 {
256
260 found = true;
261 }
262 else
263 {
264
267 }
268 }
269 }
270
271 if (found)
272 return 0;
273 else
274 return -1;
275}
static FmgrInfo * ToServerConvProc
static FmgrInfo * ToClientConvProc
void pfree(void *pointer)
#define foreach_delete_current(lst, var_or_cell)
References backend_startup_complete, ConvProcInfo::c_encoding, ClientEncoding, ConvProcList, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, ToClientConvProc, and ToServerConvProc.
Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().
◆ SetDatabaseEncoding()
void SetDatabaseEncoding | ( | int | encoding | ) |
---|
◆ SetMessageEncoding()
void SetMessageEncoding | ( | int | encoding | ) |
---|
◆ surrogate_pair_to_codepoint()
◆ unicode_to_utf8()
unsigned char * unicode_to_utf8 ( pg_wchar c, unsigned char * utf8string ) | inlinestatic |
---|
◆ unicode_utf8len()
static int unicode_utf8len ( pg_wchar c) | inlinestatic |
---|
Definition at line 607 of file pg_wchar.h.
609 {
610 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
611 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
612 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
613 utf8string[3] = 0x80 | (c & 0x3F);
614 }
615
616 return utf8string;
617}
Referenced by convert_case(), and initcap_wbnext().
◆ utf8_to_unicode()
pg_wchar utf8_to_unicode ( const unsigned char * c) | inlinestatic |
---|
Definition at line 549 of file pg_wchar.h.
554{
555 return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
556}
557
558
559
560
561
562
563
566{
567 if ((*c & 0x80) == 0)
static pg_wchar utf8_to_unicode(const unsigned char *c)
◆ UtfToLocal()
Definition at line 507 of file conv.c.
513{
515 int l;
517 const unsigned char *start = utf;
518
521 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
523
525 {
526 unsigned char b1 = 0;
527 unsigned char b2 = 0;
528 unsigned char b3 = 0;
529 unsigned char b4 = 0;
530
531
532 if (*utf == '\0')
533 break;
534
536 if (len < l)
537 break;
538
540 break;
541
542 if (l == 1)
543 {
544
545 *iso++ = *utf++;
546 continue;
547 }
548
549
550 if (l == 2)
551 {
552 b3 = *utf++;
553 b4 = *utf++;
554 }
555 else if (l == 3)
556 {
557 b2 = *utf++;
558 b3 = *utf++;
559 b4 = *utf++;
560 }
561 else if (l == 4)
562 {
563 b1 = *utf++;
564 b2 = *utf++;
565 b3 = *utf++;
566 b4 = *utf++;
567 }
568 else
569 {
570 elog(ERROR, "unsupported character length %d", l);
571 iutf = 0;
572 }
573 iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
574
575
576 if (cmap && len > l)
577 {
578 const unsigned char *utf_save = utf;
579 int len_save = len;
580 int l_save = l;
581
582
583 len -= l;
584
586 if (len < l)
587 {
588
589 utf -= l_save;
590 break;
591 }
592
594 {
595 if (!noError)
597 utf -= l_save;
598 break;
599 }
600
601
602 if (l > 1)
603 {
606
607 if (l == 2)
608 {
609 iutf2 = *utf++ << 8;
610 iutf2 |= *utf++;
611 }
612 else if (l == 3)
613 {
614 iutf2 = *utf++ << 16;
615 iutf2 |= *utf++ << 8;
616 iutf2 |= *utf++;
617 }
618 else if (l == 4)
619 {
620 iutf2 = *utf++ << 24;
621 iutf2 |= *utf++ << 16;
622 iutf2 |= *utf++ << 8;
623 iutf2 |= *utf++;
624 }
625 else
626 {
627 elog(ERROR, "unsupported character length %d", l);
628 iutf2 = 0;
629 }
630
631 cutf[0] = iutf;
632 cutf[1] = iutf2;
633
634 cp = bsearch(cutf, cmap, cmapsize,
636
637 if (cp)
638 {
640 continue;
641 }
642 }
643
644
645 utf = utf_save;
646 len = len_save;
647 l = l_save;
648 }
649
650
651 if (map)
652 {
654
655 if (converted)
656 {
658 continue;
659 }
660 }
661
662
663 if (conv_func)
664 {
665 uint32 converted = (*conv_func) (iutf);
666
667 if (converted)
668 {
670 continue;
671 }
672 }
673
674
675 utf -= l;
676 if (noError)
677 break;
679 (const char *) utf, len);
680 }
681
682
683 if (len > 0 && !noError)
685
686 *iso = '\0';
687
688 return utf - start;
689}
static int compare3(const void *p1, const void *p2)
bool pg_utf8_islegal(const unsigned char *source, int length)
References pg_utf_to_local_combined::code, compare3(), elog, encoding, ereport, errcode(), errmsg(), ERROR, len, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, and store_coded_char().
Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().
◆ pg_enc2gettext_tbl
◆ pg_enc2name_tbl
Definition at line 308 of file encnames.c.
Referenced by check_encoding_conversion_args(), InitializeClientEncoding(), pg_any_to_server(), pg_encoding_to_char(), pg_unicode_to_server(), report_invalid_encoding(), report_untranslatable_char(), SetClientEncoding(), SetDatabaseEncoding(), SetMessageEncoding(), and test_enc_setup().
◆ pg_wchar_table
Definition at line 2064 of file wchar.c.
Referenced by pg_database_encoding_max_length(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_wchar2mb(), and pg_wchar2mb_with_len().