PostgreSQL Source Code: src/include/mb/pg_wchar.h File Reference (original) (raw)

Go to the source code of this file.

Data Structures
struct pg_enc2name
struct pg_wchar_tbl
struct pg_mb_radix_tree
struct pg_utf_to_local_combined
struct pg_local_to_utf_combined
Macros
#define MAX_MULTIBYTE_CHAR_LEN 4
#define SS2 0x8e /* single shift 2 (JIS0201) */
#define SS3 0x8f /* single shift 3 (JIS0212) */
#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) |
#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) |
#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */
#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */
#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */
#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */
#define LC_TIS620 0x85 /* Thai (not supported yet) */
#define LC_ISO8859_7 0x86 /* Greek (not supported yet) */
#define LC_ISO8859_6 0x87 /* Arabic (not supported yet) */
#define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */
#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */
#define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */
#define LC_KOI8_R 0x8b /* Cyrillic KOI8-R */
#define LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */
#define LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */
#define LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */
#define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
#define LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */
#define LC_GB2312_80 0x91 /* Chinese */
#define LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */
#define LC_KS5601 0x93 /* Korean */
#define LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */
#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */
#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */
#define LC_JISX0213_1
#define LC_BIG5_1
#define LC_BIG5_2
#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
#define LCPRV1_A 0x9a
#define LCPRV1_B 0x9b
#define IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A |
#define IS_LCPRV1_A_RANGE(c) ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
#define IS_LCPRV1_B_RANGE(c) ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
#define LCPRV2_A 0x9c
#define LCPRV2_B 0x9d
#define IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A |
#define IS_LCPRV2_A_RANGE(c) ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
#define IS_LCPRV2_B_RANGE(c) ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
#define LC_SISHENG
#define LC_IPA
#define LC_VISCII_LOWER
#define LC_VISCII_UPPER
#define LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */
#define LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */
#define LC_ASCII_RIGHT_TO_LEFT
#define LC_LAO
#define LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */
#define LC_INDIAN_1_COLUMN
#define LC_TIBETAN_1_COLUMN
#define LC_UNICODE_SUBSET_2
#define LC_UNICODE_SUBSET_3
#define LC_UNICODE_SUBSET
#define LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */
#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */
#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */
#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */
#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */
#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */
#define LC_INDIAN_2_COLUMN
#define LC_TIBETAN 0xfc /* Tibetan (not supported) */
#define PG_ENCODING_BE_LAST PG_KOI8U
#define PG_VALID_BE_ENCODING(_enc) ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
#define PG_ENCODING_IS_CLIENT_ONLY(_enc) ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
#define PG_VALID_ENCODING(_enc) ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
#define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc)
#define MAX_CONVERSION_GROWTH 4
#define MAX_CONVERSION_INPUT_LENGTH 16
#define MAX_UNICODE_EQUIVALENT_STRING 16
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
#define pg_char_to_encoding pg_char_to_encoding_private
#define pg_encoding_to_char pg_encoding_to_char_private
#define pg_valid_server_encoding pg_valid_server_encoding_private
#define pg_valid_server_encoding_id pg_valid_server_encoding_id_private
#define pg_utf_mblen pg_utf_mblen_private
Typedefs
typedef unsigned int pg_wchar
typedef enum pg_enc pg_enc
typedef struct pg_enc2name pg_enc2name
typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)
typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)
typedef int(* mblen_converter) (const unsigned char *mbstr)
typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)
typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)
typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)
typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)
typedef uint32(* utf_local_conversion_func) (uint32 code)
Enumerations
enum pg_enc { PG_SQL_ASCII = 0 , PG_EUC_JP, PG_EUC_CN, PG_EUC_KR, PG_EUC_TW, PG_EUC_JIS_2004, PG_UTF8, PG_MULE_INTERNAL, PG_LATIN1, PG_LATIN2, PG_LATIN3, PG_LATIN4, PG_LATIN5, PG_LATIN6, PG_LATIN7, PG_LATIN8, PG_LATIN9, PG_LATIN10, PG_WIN1256, PG_WIN1258, PG_WIN866, PG_WIN874, PG_KOI8R, PG_WIN1251, PG_WIN1252, PG_ISO_8859_5, PG_ISO_8859_6, PG_ISO_8859_7, PG_ISO_8859_8, PG_WIN1250, PG_WIN1253, PG_WIN1254, PG_WIN1255, PG_WIN1257, PG_KOI8U, PG_SJIS, PG_BIG5, PG_GBK, PG_UHC, PG_GB18030, PG_JOHAB, PG_SHIFT_JIS_2004, _PG_LAST_ENCODING_ }
Functions
static bool is_valid_unicode_codepoint (pg_wchar c)
static bool is_utf16_surrogate_first (pg_wchar c)
static bool is_utf16_surrogate_second (pg_wchar c)
static pg_wchar surrogate_pair_to_codepoint (pg_wchar first, pg_wchar second)
static pg_wchar utf8_to_unicode (const unsigned char *c)
static unsigned char * unicode_to_utf8 (pg_wchar c, unsigned char *utf8string)
static int unicode_utf8len (pg_wchar c)
int pg_char_to_encoding (const char *name)
const char * pg_encoding_to_char (int encoding)
int pg_valid_server_encoding_id (int encoding)
void pg_encoding_set_invalid (int encoding, char *dst)
int pg_encoding_mblen (int encoding, const char *mbstr)
int pg_encoding_mblen_or_incomplete (int encoding, const char *mbstr, size_t remaining)
int pg_encoding_mblen_bounded (int encoding, const char *mbstr)
int pg_encoding_dsplen (int encoding, const char *mbstr)
int pg_encoding_verifymbchar (int encoding, const char *mbstr, int len)
int pg_encoding_verifymbstr (int encoding, const char *mbstr, int len)
int pg_encoding_max_length (int encoding)
int pg_valid_client_encoding (const char *name)
int pg_valid_server_encoding (const char *name)
bool is_encoding_supported_by_icu (int encoding)
const char * get_encoding_name_for_icu (int encoding)
bool pg_utf8_islegal (const unsigned char *source, int length)
int pg_utf_mblen (const unsigned char *s)
int pg_mule_mblen (const unsigned char *s)
int pg_mb2wchar (const char *from, pg_wchar *to)
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
int pg_wchar2mb (const pg_wchar *from, char *to)
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
int pg_char_and_wchar_strcmp (const char *s1, const pg_wchar *s2)
int pg_wchar_strncmp (const pg_wchar *s1, const pg_wchar *s2, size_t n)
int pg_char_and_wchar_strncmp (const char *s1, const pg_wchar *s2, size_t n)
size_t pg_wchar_strlen (const pg_wchar *str)
int pg_mblen (const char *mbstr)
int pg_dsplen (const char *mbstr)
int pg_mbstrlen (const char *mbstr)
int pg_mbstrlen_with_len (const char *mbstr, int limit)
int pg_mbcliplen (const char *mbstr, int len, int limit)
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
int pg_mbcharcliplen (const char *mbstr, int len, int limit)
int pg_database_encoding_max_length (void)
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
int PrepareClientEncoding (int encoding)
int SetClientEncoding (int encoding)
void InitializeClientEncoding (void)
int pg_get_client_encoding (void)
const char * pg_get_client_encoding_name (void)
void SetDatabaseEncoding (int encoding)
int GetDatabaseEncoding (void)
const char * GetDatabaseEncodingName (void)
void SetMessageEncoding (int encoding)
int GetMessageEncoding (void)
unsigned char * pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
int pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
char * pg_client_to_server (const char *s, int len)
char * pg_server_to_client (const char *s, int len)
char * pg_any_to_server (const char *s, int len, int encoding)
char * pg_server_to_any (const char *s, int len, int encoding)
void pg_unicode_to_server (pg_wchar c, unsigned char *s)
bool pg_unicode_to_server_noerror (pg_wchar c, unsigned char *s)
unsigned short BIG5toCNS (unsigned short big5, unsigned char *lc)
unsigned short CNStoBIG5 (unsigned short cns, unsigned char lc)
int UtfToLocal (const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
int LocalToUtf (const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding, bool noError)
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
pg_noreturn void report_invalid_encoding (int encoding, const char *mbstr, int len)
pg_noreturn void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
int local2local (const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, const unsigned char *tab, bool noError)
int latin2mic (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, bool noError)
int mic2latin (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, bool noError)
int latin2mic_with_table (const unsigned char *l, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
int mic2latin_with_table (const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, const unsigned char *tab, bool noError)
Variables
PGDLLIMPORT const pg_enc2name pg_enc2name_tbl []
PGDLLIMPORT const char * pg_enc2gettext_tbl []
PGDLLIMPORT const pg_wchar_tbl pg_wchar_table []

CHECK_ENCODING_CONVERSION_ARGS

| #define CHECK_ENCODING_CONVERSION_ARGS | ( | | srcencoding, | | ----------------------------------------- | - | | ------------ | | | destencoding | | | | | ) | | | |

Value:

(srcencoding), \

(destencoding))

#define PG_GETARG_INT32(n)

void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)

Definition at line 507 of file pg_wchar.h.

IS_LC1

| #define IS_LC1 | ( | | c | ) | ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d) | | --------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

IS_LC2

| #define IS_LC2 | ( | | c | ) | ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) | | --------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

IS_LCPRV1

IS_LCPRV1_A_RANGE

| #define IS_LCPRV1_A_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

IS_LCPRV1_B_RANGE

| #define IS_LCPRV1_B_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

IS_LCPRV2

IS_LCPRV2_A_RANGE

| #define IS_LCPRV2_A_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

IS_LCPRV2_B_RANGE

| #define IS_LCPRV2_B_RANGE | ( | | c | ) | ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe) | | ---------------------------- | - | | --------------------------------------------------------------- | - | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

ISSJISHEAD

| #define ISSJISHEAD | ( | | c | ) | (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc)) | | ------------------ | - | | --------------------------------------------------------------- | - | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

ISSJISTAIL

| #define ISSJISTAIL | ( | | c | ) | (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc)) | | ------------------ | - | | --------------------------------------------------------------- | - | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

LC_ARABIC_1_COLUMN

#define LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */

LC_ARABIC_2_COLUMN

#define LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */

LC_ARABIC_DIGIT

#define LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */

LC_ASCII_RIGHT_TO_LEFT

#define LC_ASCII_RIGHT_TO_LEFT

LC_BIG5_1

LC_BIG5_2

LC_CNS11643_1

#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */

LC_CNS11643_2

#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */

LC_CNS11643_3

#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */

LC_CNS11643_4

#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */

LC_CNS11643_5

#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */

LC_CNS11643_6

#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */

LC_CNS11643_7

#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */

LC_ETHIOPIC

#define LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */

LC_GB2312_80

#define LC_GB2312_80 0x91 /* Chinese */

LC_INDIAN_1_COLUMN

#define LC_INDIAN_1_COLUMN

LC_INDIAN_2_COLUMN

#define LC_INDIAN_2_COLUMN

LC_IPA

LC_ISO8859_1

#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */

LC_ISO8859_15

#define LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */

LC_ISO8859_2

#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */

LC_ISO8859_3

#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */

LC_ISO8859_4

#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */

LC_ISO8859_5

#define LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */

LC_ISO8859_6

#define LC_ISO8859_6 0x87 /* Arabic (not supported yet) */

LC_ISO8859_7

#define LC_ISO8859_7 0x86 /* Greek (not supported yet) */

LC_ISO8859_8

#define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */

LC_ISO8859_9

#define LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */

LC_JISX0201K

#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */

LC_JISX0201R

#define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */

LC_JISX0208

#define LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */

LC_JISX0208_1978

#define LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */

LC_JISX0212

#define LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */

LC_JISX0213_1

LC_KOI8_R

#define LC_KOI8_R 0x8b /* Cyrillic KOI8-R */

LC_KS5601

#define LC_KS5601 0x93 /* Korean */

LC_LAO

LC_SISHENG

LC_TIBETAN

#define LC_TIBETAN 0xfc /* Tibetan (not supported) */

LC_TIBETAN_1_COLUMN

#define LC_TIBETAN_1_COLUMN

LC_TIS620

#define LC_TIS620 0x85 /* Thai (not supported yet) */

LC_UNICODE_SUBSET

#define LC_UNICODE_SUBSET

LC_UNICODE_SUBSET_2

#define LC_UNICODE_SUBSET_2

LC_UNICODE_SUBSET_3

#define LC_UNICODE_SUBSET_3

LC_VISCII_LOWER

LC_VISCII_UPPER

LCPRV1_A

LCPRV1_B

LCPRV2_A

LCPRV2_B

MAX_CONVERSION_GROWTH

#define MAX_CONVERSION_GROWTH 4

MAX_CONVERSION_INPUT_LENGTH

#define MAX_CONVERSION_INPUT_LENGTH 16

MAX_MULTIBYTE_CHAR_LEN

#define MAX_MULTIBYTE_CHAR_LEN 4

MAX_UNICODE_EQUIVALENT_STRING

#define MAX_UNICODE_EQUIVALENT_STRING 16

pg_char_to_encoding

#define pg_char_to_encoding pg_char_to_encoding_private

PG_ENCODING_BE_LAST

PG_ENCODING_IS_CLIENT_ONLY

pg_encoding_to_char

#define pg_encoding_to_char pg_encoding_to_char_private

pg_utf_mblen

#define pg_utf_mblen pg_utf_mblen_private

PG_VALID_BE_ENCODING

PG_VALID_ENCODING

PG_VALID_FE_ENCODING

pg_valid_server_encoding

#define pg_valid_server_encoding pg_valid_server_encoding_private

pg_valid_server_encoding_id

#define pg_valid_server_encoding_id pg_valid_server_encoding_id_private

SS2

#define SS2 0x8e /* single shift 2 (JIS0201) */

SS3

#define SS3 0x8f /* single shift 3 (JIS0212) */

mb2wchar_with_len_converter

typedef int(* mb2wchar_with_len_converter) (const unsigned char *from, pg_wchar *to, int len)

mbchar_verifier

typedef int(* mbchar_verifier) (const unsigned char *mbstr, int len)

mbcharacter_incrementer

typedef bool(* mbcharacter_incrementer) (unsigned char *mbstr, int len)

mbdisplaylen_converter

typedef int(* mbdisplaylen_converter) (const unsigned char *mbstr)

mblen_converter

typedef int(* mblen_converter) (const unsigned char *mbstr)

mbstr_verifier

typedef int(* mbstr_verifier) (const unsigned char *mbstr, int len)

pg_enc

pg_enc2name

utf_local_conversion_func

wchar2mb_with_len_converter

typedef int(* wchar2mb_with_len_converter) (const pg_wchar *from, unsigned char *to, int len)

pg_enc

Enumerator
PG_SQL_ASCII
PG_EUC_JP
PG_EUC_CN
PG_EUC_KR
PG_EUC_TW
PG_EUC_JIS_2004
PG_UTF8
PG_MULE_INTERNAL
PG_LATIN1
PG_LATIN2
PG_LATIN3
PG_LATIN4
PG_LATIN5
PG_LATIN6
PG_LATIN7
PG_LATIN8
PG_LATIN9
PG_LATIN10
PG_WIN1256
PG_WIN1258
PG_WIN866
PG_WIN874
PG_KOI8R
PG_WIN1251
PG_WIN1252
PG_ISO_8859_5
PG_ISO_8859_6
PG_ISO_8859_7
PG_ISO_8859_8
PG_WIN1250
PG_WIN1253
PG_WIN1254
PG_WIN1255
PG_WIN1257
PG_KOI8U
PG_SJIS
PG_BIG5
PG_GBK
PG_UHC
PG_GB18030
PG_JOHAB
PG_SHIFT_JIS_2004
_PG_LAST_ENCODING_

Definition at line 224 of file pg_wchar.h.

227 : If you add some encoding don't forget to update

228 * the pg_enc2name_tbl[] array (in src/common/encnames.c),

229 * the pg_enc2gettext_tbl[] array (in src/common/encnames.c) and

230 * the pg_wchar_table[] array (in src/common/wchar.c) and to check

231 * PG_ENCODING_BE_LAST macro.

232 *

233 * PG_SQL_ASCII is default encoding and must be = 0.

234 *

235 * XXX We must avoid renumbering any backend encoding until libpq's major

236 * version number is increased beyond 5; it turns out that the backend

237 * encoding IDs are effectively part of libpq's ABI as far as 8.2 initdb and

238 * psql are concerned.

239 */

240typedef enum pg_enc

241{

242 PG_SQL_ASCII = 0, /* SQL/ASCII */

243 PG_EUC_JP, /* EUC for Japanese */

244 PG_EUC_CN, /* EUC for Chinese */

245 PG_EUC_KR, /* EUC for Korean */

246 PG_EUC_TW, /* EUC for Taiwan */

247 PG_EUC_JIS_2004, /* EUC-JIS-2004 */

248 PG_UTF8, /* Unicode UTF8 */

249 PG_MULE_INTERNAL, /* Mule internal code */

250 PG_LATIN1, /* ISO-8859-1 Latin 1 */

251 PG_LATIN2, /* ISO-8859-2 Latin 2 */

252 PG_LATIN3, /* ISO-8859-3 Latin 3 */

253 PG_LATIN4, /* ISO-8859-4 Latin 4 */

254 PG_LATIN5, /* ISO-8859-9 Latin 5 */

255 PG_LATIN6, /* ISO-8859-10 Latin6 */

256 PG_LATIN7, /* ISO-8859-13 Latin7 */

257 PG_LATIN8, /* ISO-8859-14 Latin8 */

258 PG_LATIN9, /* ISO-8859-15 Latin9 */

259 PG_LATIN10, /* ISO-8859-16 Latin10 */

260 PG_WIN1256, /* windows-1256 */

261 PG_WIN1258, /* Windows-1258 */

262 PG_WIN866, /* (MS-DOS CP866) */

263 PG_WIN874, /* windows-874 */

264 PG_KOI8R, /* KOI8-R */

265 PG_WIN1251, /* windows-1251 */

266 PG_WIN1252, /* windows-1252 */

267 PG_ISO_8859_5, /* ISO-8859-5 */

268 PG_ISO_8859_6, /* ISO-8859-6 */

269 PG_ISO_8859_7, /* ISO-8859-7 */

270 PG_ISO_8859_8, /* ISO-8859-8 */

271 PG_WIN1250, /* windows-1250 */

272 PG_WIN1253, /* windows-1253 */

273 PG_WIN1254, /* windows-1254 */

BIG5toCNS()

unsigned short BIG5toCNS ( unsigned short big5,
unsigned char * lc
)

Definition at line 292 of file big5.c.

293{

294 unsigned short cns = 0;

295 int i;

296

297 if (big5 < 0xc940U)

298 {

299

300

301 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)

302 {

303 if (b1c4[i][0] == big5)

304 {

306 return (b1c4[i][1] | 0x8080U);

307 }

308 }

309

312 }

313 else if (big5 == 0xc94aU)

314 {

315

317 cns = 0x4442;

318 }

319 else

320 {

321

322 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)

323 {

324 if (b2c3[i][0] == big5)

325 {

327 return (b2c3[i][1] | 0x8080U);

328 }

329 }

330

333 }

334

335 if (0 == cns)

336 {

337 *lc = 0;

338 return (unsigned short) '?';

339 }

340

341 return cns | 0x8080;

342}

static const codes_t big5Level1ToCnsPlane1[25]

static const codes_t big5Level2ToCnsPlane2[48]

static unsigned short BinarySearchRange(const codes_t *array, int high, unsigned short code)

static const unsigned short b2c3[][2]

static const unsigned short b1c4[][2]

References b1c4, b2c3, big5Level1ToCnsPlane1, big5Level2ToCnsPlane2, BinarySearchRange(), i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by big52euc_tw(), and big52mic().

check_encoding_conversion_args()

void check_encoding_conversion_args ( int src_encoding,
int dest_encoding,
int len,
int expected_src_encoding,
int expected_dest_encoding
)

Definition at line 1670 of file mbutils.c.

1675{

1677 elog(ERROR, "invalid source encoding ID: %d", src_encoding);

1678 if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)

1679 elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",

1683 elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);

1684 if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)

1685 elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",

1688 if (len < 0)

1689 elog(ERROR, "encoding conversion length must not be negative");

1690}

const pg_enc2name pg_enc2name_tbl[]

#define PG_VALID_ENCODING(_enc)

References elog, ERROR, len, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

CNStoBIG5()

unsigned short CNStoBIG5 ( unsigned short cns,
unsigned char lc
)

Definition at line 345 of file big5.c.

346{

347 int i;

348 unsigned int big5 = 0;

349

350 cns &= 0x7f7f;

351

352 switch (lc)

353 {

356 break;

359 break;

361 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)

362 {

363 if (b2c3[i][1] == cns)

364 return b2c3[i][0];

365 }

366 break;

368 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)

369 {

370 if (b1c4[i][1] == cns)

371 return b1c4[i][0];

372 }

373 default:

374 break;

375 }

376 return big5;

377}

static const codes_t cnsPlane2ToBig5Level2[49]

static const codes_t cnsPlane1ToBig5Level1[26]

References b1c4, b2c3, BinarySearchRange(), cnsPlane1ToBig5Level1, cnsPlane2ToBig5Level2, i, LC_CNS11643_1, LC_CNS11643_2, LC_CNS11643_3, and LC_CNS11643_4.

Referenced by euc_tw2big5(), and mic2big5().

get_encoding_name_for_icu()

const char * get_encoding_name_for_icu ( int encoding )

GetDatabaseEncoding()

int GetDatabaseEncoding ( void )

Definition at line 1262 of file mbutils.c.

1263{

1265}

static const pg_enc2name * DatabaseEncoding

References DatabaseEncoding, and pg_enc2name::encoding.

Referenced by ascii(), BeginCopyFrom(), BeginCopyTo(), char2wchar(), chr(), CollationCreate(), CollationGetCollid(), compareStrings(), convert_from_utf8(), convert_to_utf8(), CopyConversionError(), CopyConvertBuf(), create_pg_locale_builtin(), create_pg_locale_icu(), create_pg_locale_libc(), cstr2sv(), dblink_connect(), dblink_get_conn(), DefineCollation(), Generic_Text_IC_like(), GenericMatchText(), get_collation_oid(), get_json_object_as_hash(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_recv(), jsonb_from_cstring(), locate_stem_module(), LogicalOutputWrite(), makeJsonLexContext(), p_isspecial(), ParallelWorkerMain(), pg_database_encoding_character_incrementer(), pg_database_encoding_max_length(), pg_generic_charinc(), pg_perm_setlocale(), pg_set_regex_collation(), pg_unicode_to_server(), pg_unicode_to_server_noerror(), pg_verifymbstr(), pgss_store(), PLyUnicode_Bytes(), populate_array_json(), PrepareClientEncoding(), read_extension_script_file(), SetClientEncoding(), str_casefold(), sv2cstr(), text_position_setup(), to_ascii_default(), type_maximum_size(), unicode_assigned(), unicode_norm_form_from_string(), wchar2char(), xml_in(), xml_is_document(), xmlparse(), and xmltotext_with_options().

GetDatabaseEncodingName()

const char * GetDatabaseEncodingName ( void )

Definition at line 1268 of file mbutils.c.

References DatabaseEncoding, and pg_enc2name::name.

Referenced by check_client_encoding(), CheckMyDatabase(), connect_pg_server(), dblink_connect(), dblink_get_conn(), get_collation_oid(), InitializeClientEncoding(), IsThereCollationInNamespace(), json_errdetail(), libpqrcv_connect(), locate_stem_module(), pg_unicode_to_server(), ProcessConfigFileInternal(), and regcollationin().

GetMessageEncoding()

int GetMessageEncoding ( void )

InitializeClientEncoding()

void InitializeClientEncoding ( void )

Definition at line 282 of file mbutils.c.

283{

284 int current_server_encoding;

285

288

291 {

292

293

294

295

297 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

298 errmsg("conversion between %s and %s is not supported",

301 }

302

303

304

305

306

307

309 if (current_server_encoding != PG_UTF8 &&

311 {

312 Oid utf8_to_server_proc;

313

315 utf8_to_server_proc =

317 current_server_encoding);

318

320 {

322

327

329 }

330 }

331}

#define OidIsValid(objectId)

int errcode(int sqlerrcode)

int errmsg(const char *fmt,...)

#define ereport(elevel,...)

void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)

Assert(PointerIsAligned(start, uint64))

int GetDatabaseEncoding(void)

static FmgrInfo * Utf8ToServerConvProc

const char * GetDatabaseEncodingName(void)

int SetClientEncoding(int encoding)

int PrepareClientEncoding(int encoding)

static bool backend_startup_complete

static int pending_client_encoding

void * MemoryContextAlloc(MemoryContext context, Size size)

MemoryContext TopMemoryContext

Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)

static void AssertCouldGetRelation(void)

References Assert(), AssertCouldGetRelation(), backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), TopMemoryContext, and Utf8ToServerConvProc.

Referenced by InitPostgres().

is_encoding_supported_by_icu()

bool is_encoding_supported_by_icu ( int encoding )

is_utf16_surrogate_first()

static bool is_utf16_surrogate_first ( pg_wchar c) inlinestatic

is_utf16_surrogate_second()

static bool is_utf16_surrogate_second ( pg_wchar c) inlinestatic

is_valid_unicode_codepoint()

static bool is_valid_unicode_codepoint ( pg_wchar c) inlinestatic

latin2mic()

int latin2mic ( const unsigned char * l,
unsigned char * p,
int len,
int lc,
int encoding,
bool noError
)

Definition at line 89 of file conv.c.

91{

92 const unsigned char *start = l;

93 int c1;

94

95 while (len > 0)

96 {

97 c1 = *l;

98 if (c1 == 0)

99 {

100 if (noError)

101 break;

103 }

105 *p++ = lc;

106 *p++ = c1;

107 l++;

109 }

110 *p = '\0';

111

112 return l - start;

113}

#define IS_HIGHBIT_SET(ch)

void report_invalid_encoding(int encoding, const char *mbstr, int len)

References encoding, IS_HIGHBIT_SET, len, report_invalid_encoding(), and start.

Referenced by koi8r_to_mic(), latin1_to_mic(), latin2_to_mic(), latin3_to_mic(), and latin4_to_mic().

latin2mic_with_table()

int latin2mic_with_table ( const unsigned char * l,
unsigned char * p,
int len,
int lc,
int encoding,
const unsigned char * tab,
bool noError
)

Definition at line 194 of file conv.c.

201{

202 const unsigned char *start = l;

203 unsigned char c1,

204 c2;

205

206 while (len > 0)

207 {

208 c1 = *l;

209 if (c1 == 0)

210 {

211 if (noError)

212 break;

214 }

216 *p++ = c1;

217 else

218 {

220 if (c2)

221 {

222 *p++ = lc;

223 *p++ = c2;

224 }

225 else

226 {

227 if (noError)

228 break;

230 (const char *) l, len);

231 }

232 }

233 l++;

235 }

236 *p = '\0';

237

238 return l - start;

239}

void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_mic(), win1250_to_mic(), win1251_to_mic(), and win866_to_mic().

local2local()

int local2local ( const unsigned char * l,
unsigned char * p,
int len,
int src_encoding,
int dest_encoding,
const unsigned char * tab,
bool noError
)

Definition at line 33 of file conv.c.

40{

41 const unsigned char *start = l;

42 unsigned char c1,

43 c2;

44

45 while (len > 0)

46 {

47 c1 = *l;

48 if (c1 == 0)

49 {

50 if (noError)

51 break;

53 }

55 *p++ = c1;

56 else

57 {

59 if (c2)

60 *p++ = c2;

61 else

62 {

63 if (noError)

64 break;

66 (const char *) l, len);

67 }

68 }

69 l++;

71 }

72 *p = '\0';

73

75}

References HIGHBIT, IS_HIGHBIT_SET, len, report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by iso_to_koi8r(), iso_to_win1251(), iso_to_win866(), koi8r_to_iso(), koi8r_to_win1251(), koi8r_to_win866(), latin2_to_win1250(), win1250_to_latin2(), win1251_to_iso(), win1251_to_koi8r(), win1251_to_win866(), win866_to_iso(), win866_to_koi8r(), and win866_to_win1251().

LocalToUtf()

Definition at line 717 of file conv.c.

724{

726 int l;

728 const unsigned char *start = iso;

729

732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

734

735 for (; len > 0; len -= l)

736 {

737 unsigned char b1 = 0;

738 unsigned char b2 = 0;

739 unsigned char b3 = 0;

740 unsigned char b4 = 0;

741

742

743 if (*iso == '\0')

744 break;

745

747 {

748

749 *utf++ = *iso++;

750 l = 1;

751 continue;

752 }

753

755 if (l < 0)

756 break;

757

758

759 if (l == 1)

760 b4 = *iso++;

761 else if (l == 2)

762 {

763 b3 = *iso++;

764 b4 = *iso++;

765 }

766 else if (l == 3)

767 {

768 b2 = *iso++;

769 b3 = *iso++;

770 b4 = *iso++;

771 }

772 else if (l == 4)

773 {

774 b1 = *iso++;

775 b2 = *iso++;

776 b3 = *iso++;

777 b4 = *iso++;

778 }

779 else

780 {

781 elog(ERROR, "unsupported character length %d", l);

782 iiso = 0;

783 }

784 iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);

785

786 if (map)

787 {

789

790 if (converted)

791 {

793 continue;

794 }

795

796

797 if (cmap)

798 {

799 cp = bsearch(&iiso, cmap, cmapsize,

801

802 if (cp)

803 {

806 continue;

807 }

808 }

809 }

810

811

812 if (conv_func)

813 {

814 uint32 converted = (*conv_func) (iiso);

815

816 if (converted)

817 {

819 continue;

820 }

821 }

822

823

824 iso -= l;

825 if (noError)

826 break;

828 (const char *) iso, len);

829 }

830

831

832 if (len > 0 && !noError)

834

835 *utf = '\0';

836

837 return iso - start;

838}

static unsigned char * store_coded_char(unsigned char *dest, uint32 code)

static uint32 pg_mb_radix_conv(const pg_mb_radix_tree *rt, int l, unsigned char b1, unsigned char b2, unsigned char b3, unsigned char b4)

static int compare4(const void *p1, const void *p2)

int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)

References compare4(), elog, encoding, ereport, errcode(), errmsg(), ERROR, IS_HIGHBIT_SET, len, pg_encoding_verifymbchar(), pg_mb_radix_conv(), PG_UTF8, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, store_coded_char(), pg_local_to_utf_combined::utf1, and pg_local_to_utf_combined::utf2.

Referenced by big5_to_utf8(), euc_cn_to_utf8(), euc_jis_2004_to_utf8(), euc_jp_to_utf8(), euc_kr_to_utf8(), euc_tw_to_utf8(), gb18030_to_utf8(), gbk_to_utf8(), iso8859_to_utf8(), johab_to_utf8(), koi8r_to_utf8(), koi8u_to_utf8(), shift_jis_2004_to_utf8(), sjis_to_utf8(), uhc_to_utf8(), and win_to_utf8().

mic2latin()

int mic2latin ( const unsigned char * mic,
unsigned char * p,
int len,
int lc,
int encoding,
bool noError
)

Definition at line 127 of file conv.c.

129{

130 const unsigned char *start = mic;

131 int c1;

132

133 while (len > 0)

134 {

135 c1 = *mic;

136 if (c1 == 0)

137 {

138 if (noError)

139 break;

141 }

143 {

144

145 *p++ = c1;

146 mic++;

148 }

149 else

150 {

152

153 if (len < l)

154 {

155 if (noError)

156 break;

159 }

161 {

162 if (noError)

163 break;

165 (const char *) mic, len);

166 }

167 *p++ = mic[1];

168 mic += 2;

169 len -= 2;

170 }

171 }

172 *p = '\0';

173

174 return mic - start;

175}

int pg_mule_mblen(const unsigned char *s)

References encoding, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_koi8r(), mic_to_latin1(), mic_to_latin2(), mic_to_latin3(), and mic_to_latin4().

mic2latin_with_table()

int mic2latin_with_table ( const unsigned char * mic,
unsigned char * p,
int len,
int lc,
int encoding,
const unsigned char * tab,
bool noError
)

Definition at line 257 of file conv.c.

264{

265 const unsigned char *start = mic;

266 unsigned char c1,

267 c2;

268

269 while (len > 0)

270 {

271 c1 = *mic;

272 if (c1 == 0)

273 {

274 if (noError)

275 break;

277 }

279 {

280

281 *p++ = c1;

282 mic++;

284 }

285 else

286 {

288

289 if (len < l)

290 {

291 if (noError)

292 break;

295 }

297 (c2 = tab[mic[1] - HIGHBIT]) == 0)

298 {

299 if (noError)

300 break;

302 (const char *) mic, len);

303 break;

304 }

305 *p++ = c2;

306 mic += 2;

307 len -= 2;

308 }

309 }

310 *p = '\0';

311

312 return mic - start;

313}

References encoding, HIGHBIT, IS_HIGHBIT_SET, len, PG_MULE_INTERNAL, pg_mule_mblen(), report_invalid_encoding(), report_untranslatable_char(), and start.

Referenced by mic_to_iso(), mic_to_win1250(), mic_to_win1251(), and mic_to_win866().

pg_any_to_server()

char * pg_any_to_server ( const char * s,
int len,
int encoding
)

Definition at line 677 of file mbutils.c.

678{

679 if (len <= 0)

680 return unconstify(char *, s);

681

684 {

685

686

687

690 }

691

693 {

694

695

696

697

698

699

700

701

702

705 else

706 {

707 int i;

708

709 for (i = 0; i < len; i++)

710 {

713 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),

714 errmsg("invalid byte value for encoding \"%s\": 0x%02x",

716 (unsigned char) s[i])));

717 }

718 }

720 }

721

722

725

726

731}

#define unconstify(underlying_type, expr)

static const pg_enc2name * ClientEncoding

unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)

bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)

static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, len, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), tsearch_readline(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().

pg_char_and_wchar_strcmp()

int pg_char_and_wchar_strcmp ( const char * s1,
const pg_wchar * s2
)

Definition at line 41 of file wstrcmp.c.

42{

44 if (*s1++ == 0)

45 return 0;

46 return *(const unsigned char *) s1 - *(const pg_wchar *) (s2 - 1);

47}

References s1, and s2.

pg_char_and_wchar_strncmp()

int pg_char_and_wchar_strncmp ( const char * s1,
const pg_wchar * s2,
size_t n
)

Definition at line 55 of file wstrncmp.c.

56{

57 if (n == 0)

58 return 0;

59 do

60 {

61 if ((pg_wchar) ((unsigned char) *s1) != *s2++)

62 return ((pg_wchar) ((unsigned char) *s1) - *(s2 - 1));

63 if (*s1++ == 0)

64 break;

65 } while (--n != 0);

66 return 0;

67}

References s1, and s2.

Referenced by element(), and lookupcclass().

pg_char_to_encoding()

int pg_char_to_encoding ( const char * name )

Definition at line 549 of file encnames.c.

550{

553 *last = base + nel - 1,

554 *position;

555 int result;

558

559 if (name == NULL || *name == '\0')

560 return -1;

561

563 return -1;

564

566

567 while (last >= base)

568 {

569 position = base + ((last - base) >> 1);

570 result = key[0] - position->name[0];

571

572 if (result == 0)

573 {

574 result = strcmp(key, position->name);

575 if (result == 0)

576 return position->encoding;

577 }

578 if (result < 0)

579 last = position - 1;

580 else

581 base = position + 1;

582 }

583 return -1;

584}

static char * clean_encoding_name(const char *key, char *newkey)

static const pg_encname pg_encname_tbl[]

pg_client_to_server()

char * pg_client_to_server ( const char * s,
int len
)

pg_database_encoding_character_incrementer()

pg_database_encoding_max_length()

int pg_database_encoding_max_length ( void )

Definition at line 1547 of file mbutils.c.

1548{

1550}

const pg_wchar_tbl pg_wchar_table[]

References GetDatabaseEncoding(), pg_wchar_tbl::maxmblen, and pg_wchar_table.

Referenced by bpcharlen(), charlen_to_bytelen(), dotrim(), downcase_identifier(), gbt_bpchar_consistent(), gbt_text_compress(), gbt_text_consistent(), generate_trgm_only(), Generic_Text_IC_like(), GenericMatchText(), infix(), init_tsvector_parser(), like_escape(), like_fixed_prefix(), lpad(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), protect_out_of_mem(), regexp_fixed_prefix(), rpad(), setup_regexp_matches(), setup_test_matches(), show_trgm(), strlower_libc(), strlower_libc_mb(), strtitle_libc(), strtitle_libc_mb(), strupper_libc(), strupper_libc_mb(), text_length(), text_position_setup(), text_reverse(), text_substring(), TParserInit(), translate(), and tsvectorout().

pg_do_encoding_conversion()

unsigned char * pg_do_encoding_conversion ( unsigned char * src,
int len,
int src_encoding,
int dest_encoding
)

Definition at line 357 of file mbutils.c.

359{

360 unsigned char *result;

361 Oid proc;

362

363 if (len <= 0)

364 return src;

365

366 if (src_encoding == dest_encoding)

367 return src;

368

370 return src;

371

373 {

374

375 (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);

376 return src;

377 }

378

380 elog(ERROR, "cannot perform encoding conversion outside a transaction");

381

385 (errcode(ERRCODE_UNDEFINED_FUNCTION),

386 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",

389

390

391

392

393

394

395

396

397

398

401 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

402 errmsg("out of memory"),

403 errdetail("String of %d bytes is too long for encoding conversion.",

405

406 result = (unsigned char *)

409

417

418

419

420

421

422

423 if (len > 1000000)

424 {

425 Size resultlen = strlen((char *) result);

426

429 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

430 errmsg("out of memory"),

431 errdetail("String of %d bytes is too long for encoding conversion.",

433

434 result = (unsigned char *) repalloc(result, resultlen + 1);

435 }

436

437 return result;

438}

int errdetail(const char *fmt,...)

#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)

void * repalloc(void *pointer, Size size)

MemoryContext CurrentMemoryContext

void * MemoryContextAllocHuge(MemoryContext context, Size size)

#define MAX_CONVERSION_GROWTH

#define pg_encoding_to_char

static Datum BoolGetDatum(bool X)

static Datum CStringGetDatum(const char *X)

static Datum Int32GetDatum(int32 X)

bool IsTransactionState(void)

References BoolGetDatum(), CStringGetDatum(), CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum(), IsTransactionState(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char, PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().

pg_do_encoding_conversion_buf()

int pg_do_encoding_conversion_buf ( Oid proc,
int src_encoding,
int dest_encoding,
unsigned char * src,
int srclen,
unsigned char * dest,
int destlen,
bool noError
)

pg_dsplen()

int pg_dsplen ( const char * mbstr )

pg_encoding_dsplen()

int pg_encoding_dsplen ( int encoding,
const char * mbstr
)

pg_encoding_max_length()

int pg_encoding_max_length ( int encoding )

Definition at line 2213 of file wchar.c.

2214{

2216

2217

2218

2219

2220

2224}

References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().

pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len ( int encoding,
const char * from,
pg_wchar * to,
int len
)

pg_encoding_mbcliplen()

int pg_encoding_mbcliplen ( int encoding,
const char * mbstr,
int len,
int limit
)

Definition at line 1094 of file mbutils.c.

1096{

1098 int clen = 0;

1099 int l;

1100

1101

1104

1106

1107 while (len > 0 && *mbstr)

1108 {

1109 l = (*mblen_fn) ((const unsigned char *) mbstr);

1110 if ((clen + l) > limit)

1111 break;

1112 clen += l;

1113 if (clen == limit)

1114 break;

1115 len -= l;

1116 mbstr += l;

1117 }

1118 return clen;

1119}

static int cliplen(const char *str, int len, int limit)

int(* mblen_converter)(const unsigned char *mbstr)

int pg_encoding_max_length(int encoding)

References cliplen(), encoding, len, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen().

pg_encoding_mblen()

int pg_encoding_mblen ( int encoding,
const char * mbstr
)

Definition at line 2135 of file wchar.c.

References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), fmtIdEnc(), pg_encoding_mblen_bounded(), pg_encoding_mblen_or_incomplete(), PQescapeInternal(), PQmblen(), PQmblenBounded(), and test_enc_setup().

pg_encoding_mblen_bounded()

int pg_encoding_mblen_bounded ( int encoding,
const char * mbstr
)

pg_encoding_mblen_or_incomplete()

int pg_encoding_mblen_or_incomplete ( int encoding,
const char * mbstr,
size_t remaining
)

pg_encoding_set_invalid()

void pg_encoding_set_invalid ( int encoding,
char * dst
)

pg_encoding_to_char()

const char * pg_encoding_to_char ( int encoding )

Definition at line 587 of file encnames.c.

588{

590 {

592

594 return p->name;

595 }

596 return "";

597}

pg_encoding_verifymbchar()

int pg_encoding_verifymbchar ( int encoding,
const char * mbstr,
int len
)

Definition at line 2189 of file wchar.c.

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by appendStringLiteral(), big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), fmtIdEnc(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), PQescapeStringInternal(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().

pg_encoding_verifymbstr()

int pg_encoding_verifymbstr ( int encoding,
const char * mbstr,
int len
)

Definition at line 2202 of file wchar.c.

References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.

Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().

pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len ( int encoding,
const pg_wchar * from,
char * to,
int len
)

pg_get_client_encoding()

int pg_get_client_encoding ( void )

pg_get_client_encoding_name()

const char * pg_get_client_encoding_name ( void )

pg_mb2wchar()

int pg_mb2wchar ( const char * from,
pg_wchar * to
)

pg_mb2wchar_with_len()

int pg_mb2wchar_with_len ( const char * from,
pg_wchar * to,
int len
)

Definition at line 987 of file mbutils.c.

References DatabaseEncoding, pg_enc2name::encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

Referenced by CheckAffix(), NIAddAffix(), RE_compile(), RE_compile_and_cache(), RE_execute(), regcomp_auth_token(), regexec_auth_token(), replace_text_regexp(), setup_regexp_matches(), setup_test_matches(), test_re_compile(), and TParserInit().

pg_mbcharcliplen()

int pg_mbcharcliplen ( const char * mbstr,
int len,
int limit
)

pg_mbcliplen()

int pg_mbcliplen ( const char * mbstr,
int len,
int limit
)

Definition at line 1084 of file mbutils.c.

1085{

1087 len, limit);

1088}

int pg_encoding_mbcliplen(int encoding, const char *mbstr, int len, int limit)

References DatabaseEncoding, pg_enc2name::encoding, len, and pg_encoding_mbcliplen().

Referenced by appendStringInfoStringQuoted(), bpchar_name(), ChooseIndexColumnNames(), CopyLimitPrintoutLength(), ExecBuildSlotPartitionKeyDescription(), ExecBuildSlotValueDescription(), make_colname_unique(), make_greater_string(), makeMultirangeTypeName(), makeObjectName(), MemoryContextStatsPrint(), nameconcatoid(), namein(), pgstat_clip_activity(), pgstat_report_appname(), PutMemoryContextsStatsTupleStore(), set_rtable_names(), text_name(), text_to_cstring_buffer(), and truncate_identifier().

pg_mblen()

int pg_mblen ( const char * mbstr )

Definition at line 1024 of file mbutils.c.

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), bit_in(), charlen_to_bytelen(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), hex_decode_safe(), infix(), initTrie(), lpad(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_lquery(), parse_ltree(), parse_or_operator(), parse_re_flags(), parse_test_flags(), pg_base64_decode(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), prssyntaxerror(), px_crypt_shacrypt(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), split_text(), t_isalnum(), t_isalpha(), text_format(), text_position_next(), text_position_next_internal(), text_reverse(), text_substring(), text_to_bits(), textregexreplace(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varbit_in(), varstr_levenshtein(), and wchareq().

pg_mbstrlen()

int pg_mbstrlen ( const char * mbstr )

pg_mbstrlen_with_len()

int pg_mbstrlen_with_len ( const char * mbstr,
int limit
)

Definition at line 1058 of file mbutils.c.

1059{

1060 int len = 0;

1061

1062

1064 return limit;

1065

1066 while (limit > 0 && *mbstr)

1067 {

1069

1070 limit -= l;

1071 mbstr += l;

1073 }

1074 return len;

1075}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), scanner_errposition(), similar_escape_internal(), text_left(), text_length(), text_position_get_match_pos(), text_right(), text_substring(), unicode_assigned(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().

pg_mule_mblen()

int pg_mule_mblen ( const unsigned char * s )

pg_server_to_any()

char * pg_server_to_any ( const char * s,
int len,
int encoding
)

Definition at line 750 of file mbutils.c.

751{

752 if (len <= 0)

753 return unconstify(char *, s);

754

757 return unconstify(char *, s);

758

760 {

761

764 }

765

766

769

770

775}

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, len, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyToTextLikeStart(), daitch_mokotoff(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_FromStringAndSize(), and utf_e2u().

pg_server_to_client()

char * pg_server_to_client ( const char * s,
int len
)

pg_unicode_to_server()

void pg_unicode_to_server ( pg_wchar c,
unsigned char * s
)

Definition at line 865 of file mbutils.c.

866{

868 int c_as_utf8_len;

869 int server_encoding;

870

871

872

873

874

877 (errcode(ERRCODE_SYNTAX_ERROR),

878 errmsg("invalid Unicode code point")));

879

880

881 if (c <= 0x7F)

882 {

883 s[0] = (unsigned char) c;

884 s[1] = '\0';

885 return;

886 }

887

888

890 if (server_encoding == PG_UTF8)

891 {

894 return;

895 }

896

897

900 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

901 errmsg("conversion between %s and %s is not supported",

904

905

908 c_as_utf8[c_as_utf8_len] = '\0';

909

910

918}

#define FunctionCall6(flinfo, arg1, arg2, arg3, arg4, arg5, arg6)

#define MAX_MULTIBYTE_CHAR_LEN

static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)

static bool is_valid_unicode_codepoint(pg_wchar c)

References BoolGetDatum(), CStringGetDatum(), ereport, errcode(), errmsg(), ERROR, FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addunicode(), addUnicodeChar(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().

pg_unicode_to_server_noerror()

bool pg_unicode_to_server_noerror ( pg_wchar c,
unsigned char * s
)

Definition at line 927 of file mbutils.c.

928{

930 int c_as_utf8_len;

931 int converted_len;

932 int server_encoding;

933

934

936 return false;

937

938

939 if (c <= 0x7F)

940 {

941 s[0] = (unsigned char) c;

942 s[1] = '\0';

943 return true;

944 }

945

946

948 if (server_encoding == PG_UTF8)

949 {

952 return true;

953 }

954

955

957 return false;

958

959

962 c_as_utf8[c_as_utf8_len] = '\0';

963

964

972

973

974 return (converted_len == c_as_utf8_len);

975}

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), FunctionCall6, GetDatabaseEncoding(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addUnicodeChar(), and json_lex_string().

pg_utf8_islegal()

bool pg_utf8_islegal ( const unsigned char * source,
int length
)

Definition at line 1989 of file wchar.c.

1990{

1991 unsigned char a;

1992

1993 switch (length)

1994 {

1995 default:

1996

1997 return false;

1998 case 4:

2000 if (a < 0x80 || a > 0xBF)

2001 return false;

2002

2003 case 3:

2005 if (a < 0x80 || a > 0xBF)

2006 return false;

2007

2008 case 2:

2011 {

2012 case 0xE0:

2013 if (a < 0xA0 || a > 0xBF)

2014 return false;

2015 break;

2016 case 0xED:

2017 if (a < 0x80 || a > 0x9F)

2018 return false;

2019 break;

2020 case 0xF0:

2021 if (a < 0x90 || a > 0xBF)

2022 return false;

2023 break;

2024 case 0xF4:

2025 if (a < 0x80 || a > 0x8F)

2026 return false;

2027 break;

2028 default:

2029 if (a < 0x80 || a > 0xBF)

2030 return false;

2031 break;

2032 }

2033

2034 case 1:

2036 if (a >= 0x80 && a < 0xC2)

2037 return false;

2038 if (a > 0xF4)

2039 return false;

2040 break;

2041 }

2042 return true;

2043}

static rewind_source * source

References a, and source.

Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().

pg_utf_mblen()

int pg_utf_mblen ( const unsigned char * s )

Definition at line 538 of file wchar.c.

539{

541

542 if ((*s & 0x80) == 0)

544 else if ((*s & 0xe0) == 0xc0)

546 else if ((*s & 0xf0) == 0xe0)

548 else if ((*s & 0xf8) == 0xf0)

550#ifdef NOT_USED

551 else if ((*s & 0xfc) == 0xf8)

553 else if ((*s & 0xfe) == 0xfc)

555#endif

556 else

558 return len;

559}

References len.

Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().

pg_valid_client_encoding()

int pg_valid_client_encoding ( const char * name )

pg_valid_server_encoding()

int pg_valid_server_encoding ( const char * name )

pg_valid_server_encoding_id()

int pg_valid_server_encoding_id ( int encoding )

pg_verify_mbstr()

bool pg_verify_mbstr ( int encoding,
const char * mbstr,
int len,
bool noError
)

Definition at line 1567 of file mbutils.c.

1568{

1569 int oklen;

1570

1572

1574 if (oklen != len)

1575 {

1576 if (noError)

1577 return false;

1579 }

1580 return true;

1581}

mbstr_verifier mbverifystr

References Assert(), encoding, len, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), and read_extension_script_file().

pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int encoding,
const char * mbstr,
int len,
bool noError
)

Definition at line 1598 of file mbutils.c.

1599{

1601 int mb_len;

1602

1604

1605

1606

1607

1609 {

1610 const char *nullpos = memchr(mbstr, 0, len);

1611

1612 if (nullpos == NULL)

1613 return len;

1614 if (noError)

1615 return -1;

1617 }

1618

1619

1621

1622 mb_len = 0;

1623

1624 while (len > 0)

1625 {

1626 int l;

1627

1628

1630 {

1631 if (*mbstr != '\0')

1632 {

1633 mb_len++;

1634 mbstr++;

1636 continue;

1637 }

1638 if (noError)

1639 return -1;

1641 }

1642

1643 l = (*mbverifychar) ((const unsigned char *) mbstr, len);

1644

1645 if (l < 0)

1646 {

1647 if (noError)

1648 return -1;

1650 }

1651

1652 mbstr += l;

1653 len -= l;

1654 mb_len++;

1655 }

1656 return mb_len;

1657}

int(* mbchar_verifier)(const unsigned char *mbstr, int len)

mbchar_verifier mbverifychar

References Assert(), encoding, IS_HIGHBIT_SET, len, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding().

pg_verifymbstr()

bool pg_verifymbstr ( const char * mbstr,
int len,
bool noError
)

Definition at line 1557 of file mbutils.c.

References GetDatabaseEncoding(), len, and pg_verify_mbstr().

Referenced by char2wchar(), CopyReadAttributesText(), plperl_spi_exec(), plperl_spi_prepare(), plperl_spi_query(), PLy_cursor_query(), PLy_output(), PLy_spi_execute_query(), PLy_spi_prepare(), PLyObject_AsString(), read_text_file(), and spg_text_leaf_consistent().

pg_wchar2mb()

int pg_wchar2mb ( const pg_wchar * from,
char * to
)

pg_wchar2mb_with_len()

int pg_wchar2mb_with_len ( const pg_wchar * from,
char * to,
int len
)

pg_wchar_strlen()

size_t pg_wchar_strlen ( const pg_wchar * str )

pg_wchar_strncmp()

Definition at line 40 of file wstrncmp.c.

41{

42 if (n == 0)

43 return 0;

44 do

45 {

46 if (*s1 != *s2++)

47 return (*s1 - *(s2 - 1));

48 if (*s1++ == 0)

49 break;

50 } while (--n != 0);

51 return 0;

52}

References s1, and s2.

PrepareClientEncoding()

int PrepareClientEncoding ( int encoding )

Definition at line 111 of file mbutils.c.

112{

113 int current_server_encoding;

115

117 return -1;

118

119

121 return 0;

122

124

125

126

127

128 if (current_server_encoding == encoding ||

131 return 0;

132

134 {

135

136

137

138

139

140

141 Oid to_server_proc,

142 to_client_proc;

145

147 current_server_encoding);

149 return -1;

153 return -1;

154

155

156

157

160 convinfo->s_encoding = current_server_encoding;

166

167

171

172

173

174

175

176

177 return 0;

178 }

179 else

180 {

181

182

183

184

185

186

187

188

190 {

192

193 if (oldinfo->s_encoding == current_server_encoding &&

195 return 0;

196 }

197

198 return -1;

199 }

200}

List * lcons(void *datum, List *list)

static List * ConvProcList

static MemoryContext MemoryContextSwitchTo(MemoryContext context)

References backend_startup_complete, ConvProcInfo::c_encoding, ConvProcList, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), and InitializeClientEncoding().

report_invalid_encoding()

pg_noreturn void report_invalid_encoding ( int encoding,
const char * mbstr,
int len
)

Definition at line 1699 of file mbutils.c.

1700{

1702 char buf[8 * 5 + 1];

1703 char *p = buf;

1704 int j,

1705 jlimit;

1706

1707 jlimit = Min(l, len);

1708 jlimit = Min(jlimit, 8);

1709

1710 for (j = 0; j < jlimit; j++)

1711 {

1712 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);

1713 if (j < jlimit - 1)

1715 }

1716

1718 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),

1719 errmsg("invalid byte sequence for encoding \"%s\": %s",

1722}

int pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, size_t remaining)

References buf, encoding, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), CopyConversionError(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), test_enc_conversion(), utf8_to_iso8859_1(), and UtfToLocal().

report_untranslatable_char()

pg_noreturn void report_untranslatable_char ( int src_encoding,
int dest_encoding,
const char * mbstr,
int len
)

Definition at line 1731 of file mbutils.c.

1733{

1734 int l;

1735 char buf[8 * 5 + 1];

1736 char *p = buf;

1737 int j,

1738 jlimit;

1739

1740

1741

1742

1743

1744

1745

1746

1748 jlimit = Min(l, len);

1749 jlimit = Min(jlimit, 8);

1750

1751 for (j = 0; j < jlimit; j++)

1752 {

1753 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);

1754 if (j < jlimit - 1)

1756 }

1757

1759 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),

1760 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",

1764}

References buf, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().

SetClientEncoding()

int SetClientEncoding ( int encoding )

Definition at line 209 of file mbutils.c.

210{

211 int current_server_encoding;

212 bool found;

214

216 return -1;

217

218

220 {

222 return 0;

223 }

224

226

227

228

229

230 if (current_server_encoding == encoding ||

233 {

237 return 0;

238 }

239

240

241

242

243

244

245

246 found = false;

248 {

250

251 if (convinfo->s_encoding == current_server_encoding &&

253 {

254 if (!found)

255 {

256

260 found = true;

261 }

262 else

263 {

264

267 }

268 }

269 }

270

271 if (found)

272 return 0;

273 else

274 return -1;

275}

static FmgrInfo * ToServerConvProc

static FmgrInfo * ToClientConvProc

void pfree(void *pointer)

#define foreach_delete_current(lst, var_or_cell)

References backend_startup_complete, ConvProcInfo::c_encoding, ClientEncoding, ConvProcList, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, ToClientConvProc, and ToServerConvProc.

Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().

SetDatabaseEncoding()

void SetDatabaseEncoding ( int encoding )

SetMessageEncoding()

void SetMessageEncoding ( int encoding )

surrogate_pair_to_codepoint()

unicode_to_utf8()

unsigned char * unicode_to_utf8 ( pg_wchar c, unsigned char * utf8string ) inlinestatic

unicode_utf8len()

static int unicode_utf8len ( pg_wchar c) inlinestatic

Definition at line 607 of file pg_wchar.h.

609 {

610 utf8string[0] = 0xF0 | ((c >> 18) & 0x07);

611 utf8string[1] = 0x80 | ((c >> 12) & 0x3F);

612 utf8string[2] = 0x80 | ((c >> 6) & 0x3F);

613 utf8string[3] = 0x80 | (c & 0x3F);

614 }

615

616 return utf8string;

617}

Referenced by convert_case(), and initcap_wbnext().

utf8_to_unicode()

pg_wchar utf8_to_unicode ( const unsigned char * c) inlinestatic

Definition at line 549 of file pg_wchar.h.

554{

555 return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);

556}

557

558

559

560

561

562

563

566{

567 if ((*c & 0x80) == 0)

static pg_wchar utf8_to_unicode(const unsigned char *c)

UtfToLocal()

Definition at line 507 of file conv.c.

513{

515 int l;

517 const unsigned char *start = utf;

518

521 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

523

524 for (; len > 0; len -= l)

525 {

526 unsigned char b1 = 0;

527 unsigned char b2 = 0;

528 unsigned char b3 = 0;

529 unsigned char b4 = 0;

530

531

532 if (*utf == '\0')

533 break;

534

536 if (len < l)

537 break;

538

540 break;

541

542 if (l == 1)

543 {

544

545 *iso++ = *utf++;

546 continue;

547 }

548

549

550 if (l == 2)

551 {

552 b3 = *utf++;

553 b4 = *utf++;

554 }

555 else if (l == 3)

556 {

557 b2 = *utf++;

558 b3 = *utf++;

559 b4 = *utf++;

560 }

561 else if (l == 4)

562 {

563 b1 = *utf++;

564 b2 = *utf++;

565 b3 = *utf++;

566 b4 = *utf++;

567 }

568 else

569 {

570 elog(ERROR, "unsupported character length %d", l);

571 iutf = 0;

572 }

573 iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);

574

575

576 if (cmap && len > l)

577 {

578 const unsigned char *utf_save = utf;

579 int len_save = len;

580 int l_save = l;

581

582

583 len -= l;

584

586 if (len < l)

587 {

588

589 utf -= l_save;

590 break;

591 }

592

594 {

595 if (!noError)

597 utf -= l_save;

598 break;

599 }

600

601

602 if (l > 1)

603 {

606

607 if (l == 2)

608 {

609 iutf2 = *utf++ << 8;

610 iutf2 |= *utf++;

611 }

612 else if (l == 3)

613 {

614 iutf2 = *utf++ << 16;

615 iutf2 |= *utf++ << 8;

616 iutf2 |= *utf++;

617 }

618 else if (l == 4)

619 {

620 iutf2 = *utf++ << 24;

621 iutf2 |= *utf++ << 16;

622 iutf2 |= *utf++ << 8;

623 iutf2 |= *utf++;

624 }

625 else

626 {

627 elog(ERROR, "unsupported character length %d", l);

628 iutf2 = 0;

629 }

630

631 cutf[0] = iutf;

632 cutf[1] = iutf2;

633

634 cp = bsearch(cutf, cmap, cmapsize,

636

637 if (cp)

638 {

640 continue;

641 }

642 }

643

644

645 utf = utf_save;

646 len = len_save;

647 l = l_save;

648 }

649

650

651 if (map)

652 {

654

655 if (converted)

656 {

658 continue;

659 }

660 }

661

662

663 if (conv_func)

664 {

665 uint32 converted = (*conv_func) (iutf);

666

667 if (converted)

668 {

670 continue;

671 }

672 }

673

674

675 utf -= l;

676 if (noError)

677 break;

679 (const char *) utf, len);

680 }

681

682

683 if (len > 0 && !noError)

685

686 *iso = '\0';

687

688 return utf - start;

689}

static int compare3(const void *p1, const void *p2)

bool pg_utf8_islegal(const unsigned char *source, int length)

References pg_utf_to_local_combined::code, compare3(), elog, encoding, ereport, errcode(), errmsg(), ERROR, len, pg_mb_radix_conv(), PG_UTF8, pg_utf8_islegal(), pg_utf_mblen, PG_VALID_ENCODING, report_invalid_encoding(), report_untranslatable_char(), start, and store_coded_char().

Referenced by utf8_to_big5(), utf8_to_euc_cn(), utf8_to_euc_jis_2004(), utf8_to_euc_jp(), utf8_to_euc_kr(), utf8_to_euc_tw(), utf8_to_gb18030(), utf8_to_gbk(), utf8_to_iso8859(), utf8_to_johab(), utf8_to_koi8r(), utf8_to_koi8u(), utf8_to_shift_jis_2004(), utf8_to_sjis(), utf8_to_uhc(), and utf8_to_win().

pg_enc2gettext_tbl

pg_enc2name_tbl

Definition at line 308 of file encnames.c.

Referenced by check_encoding_conversion_args(), InitializeClientEncoding(), pg_any_to_server(), pg_encoding_to_char(), pg_unicode_to_server(), report_invalid_encoding(), report_untranslatable_char(), SetClientEncoding(), SetDatabaseEncoding(), SetMessageEncoding(), and test_enc_setup().

pg_wchar_table

Definition at line 2064 of file wchar.c.

Referenced by pg_database_encoding_max_length(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_wchar2mb(), and pg_wchar2mb_with_len().