LLVM: lib/Support/ConvertUTFWrapper.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

13#include

14#include

15

16namespace llvm {

17

19 char *&ResultPtr, const UTF8 *&ErrorPtr) {

20 assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);

22

23 if (WideCharWidth == 1) {

24 const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.begin());

27 ErrorPtr = Pos;

28 } else {

29 memcpy(ResultPtr, Source.data(), Source.size());

30 ResultPtr += Source.size();

31 }

32 } else if (WideCharWidth == 2) {

33 const UTF8 *sourceStart = (const UTF8*)Source.data();

34

35

36 UTF16 *targetStart = reinterpret_cast<UTF16 *>(ResultPtr);

38 result =

40 &targetStart, targetStart + Source.size(), flags);

42 ResultPtr = reinterpret_cast<char *>(targetStart);

43 else

44 ErrorPtr = sourceStart;

45 } else if (WideCharWidth == 4) {

46 const UTF8 *sourceStart = (const UTF8 *)Source.data();

47

48

49 UTF32 *targetStart = reinterpret_cast<UTF32 *>(ResultPtr);

51 result =

53 &targetStart, targetStart + Source.size(), flags);

55 ResultPtr = reinterpret_cast<char *>(targetStart);

56 else

57 ErrorPtr = sourceStart;

58 }

60 "ConvertUTF8toUTFXX exhausted target buffer");

62}

63

65 const UTF32 *SourceStart = &Source;

66 const UTF32 *SourceEnd = SourceStart + 1;

67 UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);

68 UTF8 *TargetEnd = TargetStart + 4;

70 &SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);

72 return false;

73

74 ResultPtr = reinterpret_cast<char *>(TargetStart);

75 return true;

76}

77

79 return (S.size() >= 2 && ((S[0] == '\xff' && S[1] == '\xfe') ||

80 (S[0] == '\xfe' && S[1] == '\xff')));

81}

82

85

86

87 if (SrcBytes.size() % 2)

88 return false;

89

90

91 if (SrcBytes.empty())

92 return true;

93

94 const UTF16 *Src = reinterpret_cast<const UTF16 *>(SrcBytes.begin());

95 const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(SrcBytes.end());

96

97 assert((uintptr_t)Src % sizeof(UTF16) == 0);

98

99

100 std::vector ByteSwapped;

102 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);

103 for (UTF16 &I : ByteSwapped)

105 Src = &ByteSwapped[0];

106 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;

107 }

108

109

111 Src++;

112

113

114

116 UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);

117 UTF8 *DstEnd = Dst + Out.size();

118

122

124 Out.clear();

125 return false;

126 }

127

128 Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);

129 Out.push_back(0);

130 Out.pop_back();

131 return true;

132}

133

137 Src.size() * sizeof(UTF16)),

138 Out);

139}

140

143

144

145 if (SrcBytes.size() % 4)

146 return false;

147

148

149 if (SrcBytes.empty())

150 return true;

151

152 const UTF32 *Src = reinterpret_cast<const UTF32 *>(SrcBytes.begin());

153 const UTF32 *SrcEnd = reinterpret_cast<const UTF32 *>(SrcBytes.end());

154

155 assert((uintptr_t)Src % sizeof(UTF32) == 0);

156

157

158 std::vector ByteSwapped;

160 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);

161 for (UTF32 &I : ByteSwapped)

163 Src = &ByteSwapped[0];

164 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;

165 }

166

167

169 Src++;

170

171

172

174 UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);

175 UTF8 *DstEnd = Dst + Out.size();

176

180

182 Out.clear();

183 return false;

184 }

185

186 Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);

187 Out.push_back(0);

188 Out.pop_back();

189 return true;

190}

191

195 Src.size() * sizeof(UTF32)),

196 Out);

197}

198

202

203

204 if (SrcUTF8.empty()) {

207 return true;

208 }

209

210 const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());

211 const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());

212

213

214

215

216

217

219 UTF16 *Dst = &DstUTF16[0];

220 UTF16 *DstEnd = Dst + DstUTF16.size();

221

225

227 DstUTF16.clear();

228 return false;

229 }

230

231 DstUTF16.resize(Dst - &DstUTF16[0]);

234 return true;

235}

236

237static_assert(sizeof(wchar_t) == 1 || sizeof(wchar_t) == 2 ||

238 sizeof(wchar_t) == 4,

239 "Expected wchar_t to be 1, 2, or 4 bytes");

240

241template

243 TResult &Result) {

244

245

246

247 Result.resize(Source.size() + 1);

248 char *ResultPtr = reinterpret_cast<char *>(&Result[0]);

249 const UTF8 *ErrorPtr;

250 if (ConvertUTF8toWide(sizeof(wchar_t), Source, ResultPtr, ErrorPtr)) {

251 Result.clear();

252 return false;

253 }

254 Result.resize(reinterpret_cast<wchar_t *>(ResultPtr) - &Result[0]);

255 return true;

256}

257

261

263 if (!Source) {

264 Result.clear();

265 return true;

266 }

268}

269

271 if (sizeof(wchar_t) == 1) {

272 const UTF8 *Start = reinterpret_cast<const UTF8 *>(Source.data());

273 const UTF8 *End =

274 reinterpret_cast<const UTF8 *>(Source.data() + Source.size());

276 return false;

277 Result.resize(Source.size());

278 memcpy(&Result[0], Source.data(), Source.size());

279 return true;

280 } else if (sizeof(wchar_t) == 2) {

283 Source.size()),

284 Result);

285 } else if (sizeof(wchar_t) == 4) {

286 const UTF32 *Start = reinterpret_cast<const UTF32 *>(Source.data());

287 const UTF32 *End =

288 reinterpret_cast<const UTF32 *>(Source.data() + Source.size());

290 UTF8 *ResultPtr = reinterpret_cast<UTF8 *>(&Result[0]);

291 UTF8 *ResultEnd = reinterpret_cast<UTF8 *>(&Result[0] + Result.size());

294 Result.resize(reinterpret_cast<char *>(ResultPtr) - &Result[0]);

295 return true;

296 } else {

297 Result.clear();

298 return false;

299 }

300 } else {

302 "Control should never reach this point; see static_assert further up");

303 }

304}

305

307

309 return V <= 0xD7FF || (V >= 0xE000 && V <= 0xFFFF);

310}

311

313 return V <= 0xD7FF || (V >= 0xE000 && V <= 0x10FFFF);

314}

315

316}

317

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED

#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE

#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT

#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE

#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

StringRef - Represent a constant reference to a string, i.e.

constexpr bool empty() const

empty - Check if the string is empty.

constexpr size_t size() const

size - Get the string size.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)

Convert a partial UTF8 sequence to UTF32.

LLVM_ABI bool IsSingleCodeUnitUTF16Codepoint(unsigned)

Definition ConvertUTFWrapper.cpp:308

constexpr T byteswap(T V) noexcept

Reverses the bytes in the given integer value V.

LLVM_ABI bool IsSingleCodeUnitUTF32Codepoint(unsigned)

Definition ConvertUTFWrapper.cpp:312

LLVM_ABI bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)

Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark.

Definition ConvertUTFWrapper.cpp:78

LLVM_ABI bool convertWideToUTF8(const std::wstring &Source, std::string &Result)

Converts a std::wstring to a UTF-8 encoded std::string.

Definition ConvertUTFWrapper.cpp:270

LLVM_ABI bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)

Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.

Definition ConvertUTFWrapper.cpp:83

static bool ConvertUTF8toWideInternal(llvm::StringRef Source, TResult &Result)

Definition ConvertUTFWrapper.cpp:242

LLVM_ABI bool IsSingleCodeUnitUTF8Codepoint(unsigned)

Definition ConvertUTFWrapper.cpp:306

LLVM_ABI ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)

LLVM_ABI bool convertUTF32ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)

Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.

Definition ConvertUTFWrapper.cpp:141

LLVM_ABI bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)

Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.

Definition ConvertUTFWrapper.cpp:18

LLVM_ABI ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)

LLVM_ABI Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)

LLVM_ABI bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)

Converts a UTF-8 string into a UTF-16 string with native endianness.

Definition ConvertUTFWrapper.cpp:199

LLVM_ABI bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)

Convert an Unicode code point to UTF8 sequence.

Definition ConvertUTFWrapper.cpp:64

LLVM_ABI ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)