LLVM: lib/Support/TextEncoding.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
20#include <system_error>
21
22#if HAVE_ICU
23#include <unicode/ucnv.h>
24#elif HAVE_ICONV
25#include <iconv.h>
26#endif
27
28using namespace llvm;
29
30
31
34 bool PrevDigit = false;
35 for (auto Ch : CSName) {
38 if (Ch != '0' || PrevDigit) {
41 }
42 }
43 }
44}
45
46
50 if (Normalized.equals("utf8"))
52 if (Normalized.equals("ibm1047"))
54 return std::nullopt;
55}
56
57[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output,
58 size_t &OutputLength,
60
61
62
63 Capacity =
64 (Capacity < Result.max_size() / 2) ? 2 * Capacity : Result.max_size();
65 Result.resize(0);
66 Result.resize_for_overwrite(Capacity);
67 Output = static_cast<char *>(Result.data());
68 OutputLength = Capacity;
69}
70
71namespace {
72enum ConversionType {
73 UTF8ToIBM1047,
74 IBM1047ToUTF8,
75};
76
77
78
79
80
81
82class TextEncodingConverterTable final
84 const ConversionType ConvType;
85
86public:
87 TextEncodingConverterTable(ConversionType ConvType) : ConvType(ConvType) {}
88
89 std::error_code convertString(StringRef Source,
90 SmallVectorImpl &Result) override;
91
92 void reset() override {}
93};
94
95std::error_code
96TextEncodingConverterTable::convertString(StringRef Source,
98 switch (ConvType) {
99 case IBM1047ToUTF8:
101 return std::error_code();
102 case UTF8ToIBM1047:
104 }
106 return std::error_code();
107}
108
109#if HAVE_ICU
110struct UConverterDeleter {
111 void operator()(UConverter *Converter) const {
114 }
115};
116using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
117
118class TextEncodingConverterICU final
119 : public details::TextEncodingConverterImplBase {
120 UConverterUniquePtr FromConvDesc;
121 UConverterUniquePtr ToConvDesc;
122
123public:
124 TextEncodingConverterICU(UConverterUniquePtr FromConverter,
125 UConverterUniquePtr ToConverter)
126 : FromConvDesc(std::move(FromConverter)),
127 ToConvDesc(std::move(ToConverter)) {}
128
129 std::error_code convertString(StringRef Source,
130 SmallVectorImpl &Result) override;
131
132 void reset() override;
133};
134
135
136
137
138
139
140std::error_code
141TextEncodingConverterICU::convertString(StringRef Source,
142 SmallVectorImpl &Result) {
143
144 size_t InputLength = Source.size();
145 const char *In = InputLength ? const_cast<char *>(Source.data()) : "";
146
147
148 size_t Capacity = Result.capacity();
149 size_t OutputLength = Capacity;
150 Result.resize_for_overwrite(Capacity);
151 char *Output;
152 UErrorCode EC = U_ZERO_ERROR;
153
154 ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
155 &EC);
156 ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
157 NULL, &EC);
158 assert(U_SUCCESS(EC));
159
160 do {
161 EC = U_ZERO_ERROR;
162 const char *Input = In;
163
164 Output = static_cast<char *>(Result.data());
165 ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output, Result.end(), &Input,
166 In + InputLength, NULL,
167 NULL, NULL,
168 NULL, true,
169 true, &EC);
170 if (U_FAILURE(EC)) {
171 if (EC == U_BUFFER_OVERFLOW_ERROR) {
172 if (Capacity < Result.max_size()) {
173 HandleOverflow(Capacity, Output, OutputLength, Result);
174 continue;
175 } else {
177 return std::error_code(E2BIG, std::generic_category());
178 }
179 }
180
182 return std::error_code(EILSEQ, std::generic_category());
183 }
184 break;
185 } while (true);
186
188 return std::error_code();
189}
190
191void TextEncodingConverterICU::reset() {
192 ucnv_reset(&*FromConvDesc);
193 ucnv_reset(&*ToConvDesc);
194}
195
196#elif HAVE_ICONV
197class TextEncodingConverterIconv final
198 : public details::TextEncodingConverterImplBase {
199 class UniqueIconvT {
200 iconv_t ConvDesc;
201
202 public:
203 operator iconv_t() const { return ConvDesc; }
204 UniqueIconvT(iconv_t CD) : ConvDesc(CD) {}
205 ~UniqueIconvT() {
206 if (ConvDesc != (iconv_t)-1) {
207 iconv_close(ConvDesc);
208 ConvDesc = (iconv_t)-1;
209 }
210 }
211 UniqueIconvT(UniqueIconvT &&Other) : ConvDesc(Other.ConvDesc) {
212 Other.ConvDesc = (iconv_t)-1;
213 }
214 UniqueIconvT &operator=(UniqueIconvT &&Other) {
215 if (&Other != this) {
216 ConvDesc = Other.ConvDesc;
217 Other.ConvDesc = (iconv_t)-1;
218 }
219 return *this;
220 }
221 };
222 UniqueIconvT ConvDesc;
223
224public:
225 TextEncodingConverterIconv(UniqueIconvT ConvDesc)
226 : ConvDesc(std::move(ConvDesc)) {}
227
228 std::error_code convertString(StringRef Source,
229 SmallVectorImpl &Result) override;
230
231 void reset() override;
232};
233
234
235
236
237
238std::error_code
239TextEncodingConverterIconv::convertString(StringRef Source,
240 SmallVectorImpl &Result) {
241
242 size_t Capacity = Result.capacity();
243 char *Output = static_cast<char *>(Result.data());
244 size_t OutputLength = Capacity;
245 Result.resize_for_overwrite(Capacity);
246
247 size_t Ret;
248
249 auto HandleError = [&Capacity, &Output, &OutputLength, &Result,
250 this](size_t Ret) {
251 if (Ret == static_cast<size_t>(-1)) {
252
253 if (errno == E2BIG && Capacity < Result.max_size()) {
254 HandleOverflow(Capacity, Output, OutputLength, Result);
255
256 reset();
257 return std::error_code();
258 } else {
259
261 return std::error_code(errno, std::generic_category());
262 }
263 } else {
264
265
266
267
268 return std::make_error_code(std::errc::illegal_byte_sequence);
269 }
270 };
271
272 do {
273 size_t InputLength = Source.size();
274 char *Input = const_cast<char *>(Source.data());
275 Ret = iconv(ConvDesc, &Input, &InputLength, &Output, &OutputLength);
276 if (Ret != 0) {
277 if (auto EC = HandleError(Ret))
278 return EC;
279 continue;
280 }
281
282 Ret = iconv(ConvDesc, nullptr, nullptr, &Output, &OutputLength);
283 if (Ret != 0) {
284 if (auto EC = HandleError(Ret))
285 return EC;
286 continue;
287 }
288 break;
289 } while (true);
290
291
293 return std::error_code();
294}
295
296inline void TextEncodingConverterIconv::reset() {
297 iconv(ConvDesc, nullptr, nullptr, nullptr, nullptr);
298}
299
300#endif
301}
302
303ErrorOr
305
306
307 if (CPFrom == CPTo)
308 return std::make_error_code(std::errc::invalid_argument);
309
315 else
316 return std::make_error_code(std::errc::invalid_argument);
317
318 return TextEncodingConverter(
319 std::make_unique(Conversion));
320}
321
324 std::optional FromEncoding = getKnownEncoding(From);
325 std::optional ToEncoding = getKnownEncoding(To);
326 if (FromEncoding && ToEncoding) {
328 create(*FromEncoding, *ToEncoding);
329 if (Converter)
330 return Converter;
331 }
332#if HAVE_ICU
333 UErrorCode EC = U_ZERO_ERROR;
334 UConverterUniquePtr FromConvDesc(ucnv_open(From.str().c_str(), &EC));
335 if (U_FAILURE(EC))
336 return std::make_error_code(std::errc::invalid_argument);
337
338 UConverterUniquePtr ToConvDesc(ucnv_open(To.str().c_str(), &EC));
339 if (U_FAILURE(EC))
340 return std::make_error_code(std::errc::invalid_argument);
341
342 auto Converter = std::make_unique(
343 std::move(FromConvDesc), std::move(ToConvDesc));
344 return TextEncodingConverter(std::move(Converter));
345#elif HAVE_ICONV
346 iconv_t ConvDesc = iconv_open(To.str().c_str(), From.str().c_str());
347 if (ConvDesc == (iconv_t)-1)
348 return std::make_error_code(std::errc::invalid_argument);
349 return TextEncodingConverter(
350 std::make_unique(ConvDesc));
351#else
352 return std::make_error_code(std::errc::invalid_argument);
353#endif
354}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file provides utility functions for converting between EBCDIC-1047 and UTF-8.
This file defines the SmallString class.
This file defines the SmallVector class.
static void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl< char > &Result)
Definition TextEncoding.cpp:57
static std::optional< TextEncoding > getKnownEncoding(StringRef Name)
Definition TextEncoding.cpp:47
static void normalizeCharSetName(StringRef CSName, SmallVectorImpl< char > &Normalized)
Definition TextEncoding.cpp:32
This file provides a utility class to convert between different character set encodings.
Represents either an error or a value T.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
bool equals(StringRef RHS) const
Check for string equality.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
static LLVM_ABI ErrorOr< TextEncodingConverter > create(TextEncoding From, TextEncoding To)
Creates a TextEncodingConverter instance.
Definition TextEncoding.cpp:304
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI std::error_code convertToEBCDIC(StringRef Source, SmallVectorImpl< char > &Result)
LLVM_ABI void convertToUTF8(StringRef Source, SmallVectorImpl< char > &Result)
This is an optimization pass for GlobalISel generic memory operations.
char toLower(char x)
Returns the corresponding lowercase character if x is uppercase.
@ IBM1047
IBM EBCDIC 1047 character set encoding.
@ UTF8
UTF-8 character set encoding.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.