LLVM: include/llvm/Support/ConvertUTF.h Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105#ifndef LLVM_SUPPORT_CONVERTUTF_H

106#define LLVM_SUPPORT_CONVERTUTF_H

107

109#include

110#include

111

112#if defined(_WIN32)

113#include <system_error>

114#endif

115

116

117

118

119namespace llvm {

120

121

122

123

124

125

126

127

128

129using UTF32 = unsigned int;

130using UTF16 = unsigned short;

131using UTF8 = unsigned char;

133

134

135#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD

136#define UNI_MAX_BMP (UTF32)0x0000FFFF

137#define UNI_MAX_UTF16 (UTF32)0x0010FFFF

138#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF

139#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF

140

141#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4

142

143#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF

144#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE

145

146#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF

147#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000

148

155

157

159 const UTF8 *sourceEnd,

160 UTF16 **targetStart,

161 UTF16 *targetEnd,

163

164

165

166

167

169 const UTF8 *sourceEnd,

170 UTF32 **targetStart,

171 UTF32 *targetEnd,

173

174

175

176

177

179 const UTF8 *sourceEnd,

180 UTF32 **targetStart,

181 UTF32 *targetEnd,

183

185 const UTF16 *sourceEnd,

186 UTF8 **targetStart,

187 UTF8 *targetEnd,

189

191 const UTF32 *sourceEnd,

192 UTF8 **targetStart,

193 UTF8 *targetEnd,

195

197 const UTF16 *sourceEnd,

198 UTF32 **targetStart,

199 UTF32 *targetEnd,

201

203 const UTF32 *sourceEnd,

204 UTF16 **targetStart,

205 UTF16 *targetEnd,

207

209

211

213 const UTF8 *sourceEnd);

214

216

217

218

219

220template class ArrayRef;

221template class SmallVectorImpl;

222class StringRef;

223

224

225

226

227

228

229

230

231

232

234 char *&ResultPtr, const UTF8 *&ErrorPtr);

235

236

237

238

239

241

242

243

244

245

247

248

249

250

251

253 std::string &Result);

254

255

256

257

258

259

260

261

262

263

264

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

283 const UTF8 *sourceEnd,

286 if (*source == sourceEnd)

292}

293

294

295

296

297

299

300

301

302

303

304

305

306

308 std::string &Out);

309

310

311

312

313

314

315

316

318

319

320

321

322

323

324

325

327 std::string &Out);

328

329

330

331

332

333

334

335

337

338

339

340

341

342

344 SmallVectorImpl &DstUTF16);

345

349

350#if defined(_WIN32)

351namespace sys {

352namespace windows {

353LLVM_ABI std::error_code UTF8ToUTF16(StringRef utf8,

354 SmallVectorImpl<wchar_t> &utf16);

355

356LLVM_ABI std::error_code CurCPToUTF16(StringRef utf8,

357 SmallVectorImpl<wchar_t> &utf16);

358LLVM_ABI std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,

359 SmallVectorImpl &utf8);

360

361LLVM_ABI std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,

362 SmallVectorImpl &utf8);

363}

364}

365#endif

366

367}

368

369#endif

StringRef - Represent a constant reference to a string, i.e.

This is an optimization pass for GlobalISel generic memory operations.

unsigned char UTF8

Definition ConvertUTF.h:131

unsigned char Boolean

Definition ConvertUTF.h:132

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

unsigned short UTF16

Definition ConvertUTF.h:130

LLVM_ABI ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)

Convert a partial UTF8 sequence to UTF32.

LLVM_ABI bool IsSingleCodeUnitUTF16Codepoint(unsigned)

LLVM_ABI bool IsSingleCodeUnitUTF32Codepoint(unsigned)

LLVM_ABI unsigned getNumBytesForUTF8(UTF8 firstByte)

LLVM_ABI bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)

Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark.

LLVM_ABI ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)

Convert a partial UTF8 sequence to UTF32.

ConversionResult convertUTF8Sequence(const UTF8 **source, const UTF8 *sourceEnd, UTF32 *target, ConversionFlags flags)

Convert the first UTF8 sequence in the given source buffer to a UTF32 code point.

Definition ConvertUTF.h:282

unsigned int UTF32

Definition ConvertUTF.h:129

ConversionFlags

Definition ConvertUTF.h:156

@ lenientConversion

Definition ConvertUTF.h:156

@ strictConversion

Definition ConvertUTF.h:156

ConversionResult

Definition ConvertUTF.h:149

@ targetExhausted

Definition ConvertUTF.h:152

@ sourceExhausted

Definition ConvertUTF.h:151

@ conversionOK

Definition ConvertUTF.h:150

@ sourceIllegal

Definition ConvertUTF.h:153

LLVM_ABI bool convertWideToUTF8(const std::wstring &Source, std::string &Result)

Converts a std::wstring to a UTF-8 encoded std::string.

LLVM_ABI bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)

Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.

LLVM_ABI ConversionResult ConvertUTF32toUTF16(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)

LLVM_ABI bool IsSingleCodeUnitUTF8Codepoint(unsigned)

LLVM_ABI Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)

LLVM_ABI ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)

LLVM_ABI bool convertUTF32ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)

Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.

LLVM_ABI bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)

Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.

LLVM_ABI ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)

ArrayRef(const T &OneElt) -> ArrayRef< T >

LLVM_ABI Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)

LLVM_ABI unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd)

LLVM_ABI ConversionResult ConvertUTF16toUTF32(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)

LLVM_ABI bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)

Converts a UTF-8 string into a UTF-16 string with native endianness.

LLVM_ABI bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)

Convert an Unicode code point to UTF8 sequence.

LLVM_ABI ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)