LLVM: lib/Support/TextEncoding.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

20#include <system_error>

21

22#if HAVE_ICU

23#include <unicode/ucnv.h>

24#elif HAVE_ICONV

25#include <iconv.h>

26#endif

27

28using namespace llvm;

29

30

31

34 bool PrevDigit = false;

35 for (auto Ch : CSName) {

38 if (Ch != '0' || PrevDigit) {

41 }

42 }

43 }

44}

45

46

50 if (Normalized.equals("utf8"))

52 if (Normalized.equals("ibm1047"))

54 return std::nullopt;

55}

56

57[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output,

58 size_t &OutputLength,

60

61

62

63 Capacity =

64 (Capacity < Result.max_size() / 2) ? 2 * Capacity : Result.max_size();

65 Result.resize(0);

66 Result.resize_for_overwrite(Capacity);

67 Output = static_cast<char *>(Result.data());

68 OutputLength = Capacity;

69}

70

71namespace {

72enum ConversionType {

73 UTF8ToIBM1047,

74 IBM1047ToUTF8,

75};

76

77

78

79

80

81

82class TextEncodingConverterTable final

84 const ConversionType ConvType;

85

86public:

87 TextEncodingConverterTable(ConversionType ConvType) : ConvType(ConvType) {}

88

89 std::error_code convertString(StringRef Source,

90 SmallVectorImpl &Result) override;

91

92 void reset() override {}

93};

94

95std::error_code

96TextEncodingConverterTable::convertString(StringRef Source,

98 switch (ConvType) {

99 case IBM1047ToUTF8:

101 return std::error_code();

102 case UTF8ToIBM1047:

104 }

106 return std::error_code();

107}

108

109#if HAVE_ICU

110struct UConverterDeleter {

111 void operator()(UConverter *Converter) const {

114 }

115};

116using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;

117

118class TextEncodingConverterICU final

119 : public details::TextEncodingConverterImplBase {

120 UConverterUniquePtr FromConvDesc;

121 UConverterUniquePtr ToConvDesc;

122

123public:

124 TextEncodingConverterICU(UConverterUniquePtr FromConverter,

125 UConverterUniquePtr ToConverter)

126 : FromConvDesc(std::move(FromConverter)),

127 ToConvDesc(std::move(ToConverter)) {}

128

129 std::error_code convertString(StringRef Source,

130 SmallVectorImpl &Result) override;

131

132 void reset() override;

133};

134

135

136

137

138

139

140std::error_code

141TextEncodingConverterICU::convertString(StringRef Source,

142 SmallVectorImpl &Result) {

143

144 size_t InputLength = Source.size();

145 const char *In = InputLength ? const_cast<char *>(Source.data()) : "";

146

147

148 size_t Capacity = Result.capacity();

149 size_t OutputLength = Capacity;

150 Result.resize_for_overwrite(Capacity);

151 char *Output;

152 UErrorCode EC = U_ZERO_ERROR;

153

154 ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,

155 &EC);

156 ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,

157 NULL, &EC);

158 assert(U_SUCCESS(EC));

159

160 do {

161 EC = U_ZERO_ERROR;

162 const char *Input = In;

163

164 Output = static_cast<char *>(Result.data());

165 ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output, Result.end(), &Input,

166 In + InputLength, NULL,

167 NULL, NULL,

168 NULL, true,

169 true, &EC);

170 if (U_FAILURE(EC)) {

171 if (EC == U_BUFFER_OVERFLOW_ERROR) {

172 if (Capacity < Result.max_size()) {

173 HandleOverflow(Capacity, Output, OutputLength, Result);

174 continue;

175 } else {

177 return std::error_code(E2BIG, std::generic_category());

178 }

179 }

180

182 return std::error_code(EILSEQ, std::generic_category());

183 }

184 break;

185 } while (true);

186

188 return std::error_code();

189}

190

191void TextEncodingConverterICU::reset() {

192 ucnv_reset(&*FromConvDesc);

193 ucnv_reset(&*ToConvDesc);

194}

195

196#elif HAVE_ICONV

197class TextEncodingConverterIconv final

198 : public details::TextEncodingConverterImplBase {

199 class UniqueIconvT {

200 iconv_t ConvDesc;

201

202 public:

203 operator iconv_t() const { return ConvDesc; }

204 UniqueIconvT(iconv_t CD) : ConvDesc(CD) {}

205 ~UniqueIconvT() {

206 if (ConvDesc != (iconv_t)-1) {

207 iconv_close(ConvDesc);

208 ConvDesc = (iconv_t)-1;

209 }

210 }

211 UniqueIconvT(UniqueIconvT &&Other) : ConvDesc(Other.ConvDesc) {

212 Other.ConvDesc = (iconv_t)-1;

213 }

214 UniqueIconvT &operator=(UniqueIconvT &&Other) {

215 if (&Other != this) {

216 ConvDesc = Other.ConvDesc;

217 Other.ConvDesc = (iconv_t)-1;

218 }

219 return *this;

220 }

221 };

222 UniqueIconvT ConvDesc;

223

224public:

225 TextEncodingConverterIconv(UniqueIconvT ConvDesc)

226 : ConvDesc(std::move(ConvDesc)) {}

227

228 std::error_code convertString(StringRef Source,

229 SmallVectorImpl &Result) override;

230

231 void reset() override;

232};

233

234

235

236

237

238std::error_code

239TextEncodingConverterIconv::convertString(StringRef Source,

240 SmallVectorImpl &Result) {

241

242 size_t Capacity = Result.capacity();

243 char *Output = static_cast<char *>(Result.data());

244 size_t OutputLength = Capacity;

245 Result.resize_for_overwrite(Capacity);

246

247 size_t Ret;

248

249 auto HandleError = [&Capacity, &Output, &OutputLength, &Result,

250 this](size_t Ret) {

251 if (Ret == static_cast<size_t>(-1)) {

252

253 if (errno == E2BIG && Capacity < Result.max_size()) {

254 HandleOverflow(Capacity, Output, OutputLength, Result);

255

256 reset();

257 return std::error_code();

258 } else {

259

261 return std::error_code(errno, std::generic_category());

262 }

263 } else {

264

265

266

267

268 return std::make_error_code(std::errc::illegal_byte_sequence);

269 }

270 };

271

272 do {

273 size_t InputLength = Source.size();

274 char *Input = const_cast<char *>(Source.data());

275 Ret = iconv(ConvDesc, &Input, &InputLength, &Output, &OutputLength);

276 if (Ret != 0) {

277 if (auto EC = HandleError(Ret))

278 return EC;

279 continue;

280 }

281

282 Ret = iconv(ConvDesc, nullptr, nullptr, &Output, &OutputLength);

283 if (Ret != 0) {

284 if (auto EC = HandleError(Ret))

285 return EC;

286 continue;

287 }

288 break;

289 } while (true);

290

291

293 return std::error_code();

294}

295

296inline void TextEncodingConverterIconv::reset() {

297 iconv(ConvDesc, nullptr, nullptr, nullptr, nullptr);

298}

299

300#endif

301}

302

303ErrorOr

305

306

307 if (CPFrom == CPTo)

308 return std::make_error_code(std::errc::invalid_argument);

309

315 else

316 return std::make_error_code(std::errc::invalid_argument);

317

318 return TextEncodingConverter(

319 std::make_unique(Conversion));

320}

321

324 std::optional FromEncoding = getKnownEncoding(From);

325 std::optional ToEncoding = getKnownEncoding(To);

326 if (FromEncoding && ToEncoding) {

328 create(*FromEncoding, *ToEncoding);

329 if (Converter)

330 return Converter;

331 }

332#if HAVE_ICU

333 UErrorCode EC = U_ZERO_ERROR;

334 UConverterUniquePtr FromConvDesc(ucnv_open(From.str().c_str(), &EC));

335 if (U_FAILURE(EC))

336 return std::make_error_code(std::errc::invalid_argument);

337

338 UConverterUniquePtr ToConvDesc(ucnv_open(To.str().c_str(), &EC));

339 if (U_FAILURE(EC))

340 return std::make_error_code(std::errc::invalid_argument);

341

342 auto Converter = std::make_unique(

343 std::move(FromConvDesc), std::move(ToConvDesc));

344 return TextEncodingConverter(std::move(Converter));

345#elif HAVE_ICONV

346 iconv_t ConvDesc = iconv_open(To.str().c_str(), From.str().c_str());

347 if (ConvDesc == (iconv_t)-1)

348 return std::make_error_code(std::errc::invalid_argument);

349 return TextEncodingConverter(

350 std::make_unique(ConvDesc));

351#else

352 return std::make_error_code(std::errc::invalid_argument);

353#endif

354}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file provides utility functions for converting between EBCDIC-1047 and UTF-8.

This file defines the SmallString class.

This file defines the SmallVector class.

static void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl< char > &Result)

Definition TextEncoding.cpp:57

static std::optional< TextEncoding > getKnownEncoding(StringRef Name)

Definition TextEncoding.cpp:47

static void normalizeCharSetName(StringRef CSName, SmallVectorImpl< char > &Normalized)

Definition TextEncoding.cpp:32

This file provides a utility class to convert between different character set encodings.

Represents either an error or a value T.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

bool equals(StringRef RHS) const

Check for string equality.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

StringRef - Represent a constant reference to a string, i.e.

std::string str() const

str - Get the contents as an std::string.

static LLVM_ABI ErrorOr< TextEncodingConverter > create(TextEncoding From, TextEncoding To)

Creates a TextEncodingConverter instance.

Definition TextEncoding.cpp:304

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_ABI std::error_code convertToEBCDIC(StringRef Source, SmallVectorImpl< char > &Result)

LLVM_ABI void convertToUTF8(StringRef Source, SmallVectorImpl< char > &Result)

This is an optimization pass for GlobalISel generic memory operations.

char toLower(char x)

Returns the corresponding lowercase character if x is uppercase.

@ IBM1047

IBM EBCDIC 1047 character set encoding.

@ UTF8

UTF-8 character set encoding.

bool isDigit(char C)

Checks if character C is one of the 10 decimal digits.

bool isAlnum(char C)

Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.