LLVM: include/llvm/Support/ConvertUTF.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105#ifndef LLVM_SUPPORT_CONVERTUTF_H
106#define LLVM_SUPPORT_CONVERTUTF_H
107
109#include
110#include
111
112#if defined(_WIN32)
113#include <system_error>
114#endif
115
116
117
118
119namespace llvm {
120
121
122
123
124
125
126
127
128
129using UTF32 = unsigned int;
130using UTF16 = unsigned short;
131using UTF8 = unsigned char;
133
134
135#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
136#define UNI_MAX_BMP (UTF32)0x0000FFFF
137#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
138#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
139#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
140
141#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
142
143#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
144#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
145
146#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF
147#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000
148
155
157
159 const UTF8 *sourceEnd,
160 UTF16 **targetStart,
161 UTF16 *targetEnd,
163
164
165
166
167
169 const UTF8 *sourceEnd,
170 UTF32 **targetStart,
171 UTF32 *targetEnd,
173
174
175
176
177
179 const UTF8 *sourceEnd,
180 UTF32 **targetStart,
181 UTF32 *targetEnd,
183
185 const UTF16 *sourceEnd,
186 UTF8 **targetStart,
187 UTF8 *targetEnd,
189
191 const UTF32 *sourceEnd,
192 UTF8 **targetStart,
193 UTF8 *targetEnd,
195
197 const UTF16 *sourceEnd,
198 UTF32 **targetStart,
199 UTF32 *targetEnd,
201
203 const UTF32 *sourceEnd,
204 UTF16 **targetStart,
205 UTF16 *targetEnd,
207
209
211
213 const UTF8 *sourceEnd);
214
216
217
218
219
220template class ArrayRef;
221template class SmallVectorImpl;
222class StringRef;
223
224
225
226
227
228
229
230
231
232
234 char *&ResultPtr, const UTF8 *&ErrorPtr);
235
236
237
238
239
241
242
243
244
245
247
248
249
250
251
253 std::string &Result);
254
255
256
257
258
259
260
261
262
263
264
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
283 const UTF8 *sourceEnd,
286 if (*source == sourceEnd)
292}
293
294
295
296
297
299
300
301
302
303
304
305
306
308 std::string &Out);
309
310
311
312
313
314
315
316
318
319
320
321
322
323
324
325
327 std::string &Out);
328
329
330
331
332
333
334
335
337
338
339
340
341
342
344 SmallVectorImpl &DstUTF16);
345
349
350#if defined(_WIN32)
351namespace sys {
352namespace windows {
353LLVM_ABI std::error_code UTF8ToUTF16(StringRef utf8,
354 SmallVectorImpl<wchar_t> &utf16);
355
356LLVM_ABI std::error_code CurCPToUTF16(StringRef utf8,
357 SmallVectorImpl<wchar_t> &utf16);
358LLVM_ABI std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
359 SmallVectorImpl &utf8);
360
361LLVM_ABI std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
362 SmallVectorImpl &utf8);
363}
364}
365#endif
366
367}
368
369#endif
StringRef - Represent a constant reference to a string, i.e.
This is an optimization pass for GlobalISel generic memory operations.
unsigned char UTF8
Definition ConvertUTF.h:131
unsigned char Boolean
Definition ConvertUTF.h:132
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
unsigned short UTF16
Definition ConvertUTF.h:130
LLVM_ABI ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
LLVM_ABI bool IsSingleCodeUnitUTF16Codepoint(unsigned)
LLVM_ABI bool IsSingleCodeUnitUTF32Codepoint(unsigned)
LLVM_ABI unsigned getNumBytesForUTF8(UTF8 firstByte)
LLVM_ABI bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)
Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark.
LLVM_ABI ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
ConversionResult convertUTF8Sequence(const UTF8 **source, const UTF8 *sourceEnd, UTF32 *target, ConversionFlags flags)
Convert the first UTF8 sequence in the given source buffer to a UTF32 code point.
Definition ConvertUTF.h:282
unsigned int UTF32
Definition ConvertUTF.h:129
ConversionFlags
Definition ConvertUTF.h:156
@ lenientConversion
Definition ConvertUTF.h:156
@ strictConversion
Definition ConvertUTF.h:156
ConversionResult
Definition ConvertUTF.h:149
@ targetExhausted
Definition ConvertUTF.h:152
@ sourceExhausted
Definition ConvertUTF.h:151
@ conversionOK
Definition ConvertUTF.h:150
@ sourceIllegal
Definition ConvertUTF.h:153
LLVM_ABI bool convertWideToUTF8(const std::wstring &Source, std::string &Result)
Converts a std::wstring to a UTF-8 encoded std::string.
LLVM_ABI bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
LLVM_ABI ConversionResult ConvertUTF32toUTF16(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
LLVM_ABI bool IsSingleCodeUnitUTF8Codepoint(unsigned)
LLVM_ABI Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
LLVM_ABI ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
LLVM_ABI bool convertUTF32ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
LLVM_ABI bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)
Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.
LLVM_ABI ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
LLVM_ABI unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd)
LLVM_ABI ConversionResult ConvertUTF16toUTF32(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
LLVM_ABI bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)
Converts a UTF-8 string into a UTF-16 string with native endianness.
LLVM_ABI bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)
Convert an Unicode code point to UTF8 sequence.
LLVM_ABI ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)