bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260) · python/cpython@eb81795 (original) (raw)
`@@ -22,13 +22,23 @@
`
22
22
`else:
`
23
23
`C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
`
24
24
``
25
``
`-
XXX (ncoghlan): The above is probably still wrong for:
`
``
25
`+
Note that the above is probably still wrong in some cases, such as:
`
26
26
`# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
`
27
27
`# * AIX and any other platforms that use latin-1 in the C locale
`
``
28
`+
`
``
29
`+
Options for dealing with this:
`
``
30
`+
* Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
`
``
31
`+
* Fix the test expectations to match the actual platform behaviour
`
28
32
``
29
33
`# In order to get the warning messages to match up as expected, the candidate
`
30
34
`# order here must much the target locale order in Python/pylifecycle.c
`
31
``
`-
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
`
``
35
`+
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
`
``
36
+
``
37
`+
XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
`
``
38
`+
problems encountered on *BSD systems with those test cases
`
``
39
`+
For additional details see:
`
``
40
`+
nl_langinfo CODESET error: https://bugs.python.org/issue30647
`
``
41
`+
locale handling differences: https://bugs.python.org/issue30672
`
32
42
``
33
43
`# There's no reliable cross-platform way of checking locale alias
`
34
44
`# lists, so the only way of knowing which of these locales will work
`
`@@ -40,28 +50,39 @@ def _set_locale_in_subprocess(locale_name):
`
40
50
`result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
`
41
51
`return result.rc == 0
`
42
52
``
43
``
`-
_EncodingDetails = namedtuple("EncodingDetails",
`
44
``
`-
"fsencoding stdin_info stdout_info stderr_info")
`
``
53
`+
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
`
``
54
`+
_EncodingDetails = namedtuple("EncodingDetails", _fields)
`
45
55
``
46
56
`class EncodingDetails(_EncodingDetails):
`
``
57
`+
XXX (ncoghlan): Using JSON for child state reporting may be less fragile
`
47
58
`CHILD_PROCESS_SCRIPT = ";".join([
`
48
``
`-
"import sys",
`
``
59
`+
"import sys, os",
`
49
60
`"print(sys.getfilesystemencoding())",
`
50
61
`"print(sys.stdin.encoding + ':' + sys.stdin.errors)",
`
51
62
`"print(sys.stdout.encoding + ':' + sys.stdout.errors)",
`
52
63
`"print(sys.stderr.encoding + ':' + sys.stderr.errors)",
`
``
64
`+
"print(os.environ.get('LANG', 'not set'))",
`
``
65
`+
"print(os.environ.get('LC_CTYPE', 'not set'))",
`
``
66
`+
"print(os.environ.get('LC_ALL', 'not set'))",
`
53
67
` ])
`
54
68
``
55
69
`@classmethod
`
56
``
`-
def get_expected_details(cls, fs_encoding, stream_encoding):
`
``
70
`+
def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):
`
57
71
`"""Returns expected child process details for a given encoding"""
`
58
72
`_stream = stream_encoding + ":{}"
`
59
73
`# stdin and stdout should use surrogateescape either because the
`
60
74
`# coercion triggered, or because the C locale was detected
`
61
75
`stream_info = 2*[_stream.format("surrogateescape")]
`
62
76
`# stderr should always use backslashreplace
`
63
77
`stream_info.append(_stream.format("backslashreplace"))
`
64
``
`-
return dict(cls(fs_encoding, *stream_info)._asdict())
`
``
78
`+
expected_lang = env_vars.get("LANG", "not set").lower()
`
``
79
`+
if coercion_expected:
`
``
80
`+
expected_lc_ctype = CLI_COERCION_TARGET.lower()
`
``
81
`+
else:
`
``
82
`+
expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()
`
``
83
`+
expected_lc_all = env_vars.get("LC_ALL", "not set").lower()
`
``
84
`+
env_info = expected_lang, expected_lc_ctype, expected_lc_all
`
``
85
`+
return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
`
65
86
``
66
87
`@staticmethod
`
67
88
`def _handle_output_variations(data):
`
`@@ -97,64 +118,20 @@ def get_child_details(cls, env_vars):
`
97
118
`result.fail(py_cmd)
`
98
119
`# All subprocess outputs in this test case should be pure ASCII
`
99
120
`adjusted_output = cls._handle_output_variations(result.out)
`
100
``
`-
stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines()
`
``
121
`+
stdout_lines = adjusted_output.decode("ascii").splitlines()
`
101
122
`child_encoding_details = dict(cls(*stdout_lines)._asdict())
`
102
123
`stderr_lines = result.err.decode("ascii").rstrip().splitlines()
`
103
124
`return child_encoding_details, stderr_lines
`
104
125
``
105
126
``
106
``
`-
class _ChildProcessEncodingTestCase(unittest.TestCase):
`
107
``
`-
Base class to check for expected encoding details in a child process
`
108
``
-
109
``
`-
def _check_child_encoding_details(self,
`
110
``
`-
env_vars,
`
111
``
`-
expected_fs_encoding,
`
112
``
`-
expected_stream_encoding,
`
113
``
`-
expected_warning):
`
114
``
`-
"""Check the C locale handling for the given process environment
`
115
``
-
116
``
`-
Parameters:
`
117
``
`-
expected_fs_encoding: expected sys.getfilesystemencoding() result
`
118
``
`-
expected_stream_encoding: expected encoding for standard streams
`
119
``
`-
expected_warning: stderr output to expect (if any)
`
120
``
`-
"""
`
121
``
`-
result = EncodingDetails.get_child_details(env_vars)
`
122
``
`-
encoding_details, stderr_lines = result
`
123
``
`-
self.assertEqual(encoding_details,
`
124
``
`-
EncodingDetails.get_expected_details(
`
125
``
`-
expected_fs_encoding,
`
126
``
`-
expected_stream_encoding))
`
127
``
`-
self.assertEqual(stderr_lines, expected_warning)
`
128
``
-
129
127
`# Details of the shared library warning emitted at runtime
`
130
``
`-
LIBRARY_C_LOCALE_WARNING = (
`
``
128
`+
LEGACY_LOCALE_WARNING = (
`
131
129
`"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
`
132
130
`"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
`
133
131
`"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
`
134
132
`"locales is recommended."
`
135
133
`)
`
136
134
``
137
``
`-
@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"),
`
138
``
`-
"C locale runtime warning disabled at build time")
`
139
``
`-
class LocaleWarningTests(_ChildProcessEncodingTestCase):
`
140
``
`-
Test warning emitted when running in the C locale
`
141
``
-
142
``
`-
def test_library_c_locale_warning(self):
`
143
``
`-
self.maxDiff = None
`
144
``
`-
for locale_to_set in ("C", "POSIX", "invalid.ascii"):
`
145
``
`-
XXX (ncoghlan): Mac OS X doesn't behave as expected in the
`
146
``
`-
POSIX locale, so we skip that for now
`
147
``
`-
if sys.platform == "darwin" and locale_to_set == "POSIX":
`
148
``
`-
continue
`
149
``
`-
var_dict = {
`
150
``
`-
"LC_ALL": locale_to_set
`
151
``
`-
}
`
152
``
`-
with self.subTest(forced_locale=locale_to_set):
`
153
``
`-
self._check_child_encoding_details(var_dict,
`
154
``
`-
C_LOCALE_FS_ENCODING,
`
155
``
`-
C_LOCALE_STREAM_ENCODING,
`
156
``
`-
[LIBRARY_C_LOCALE_WARNING])
`
157
``
-
158
135
`# Details of the CLI locale coercion warning emitted at runtime
`
159
136
`CLI_COERCION_WARNING_FMT = (
`
160
137
`"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
`
`@@ -163,9 +140,13 @@ def test_library_c_locale_warning(self):
`
163
140
``
164
141
``
165
142
`AVAILABLE_TARGETS = None
`
``
143
`+
CLI_COERCION_TARGET = None
`
``
144
`+
CLI_COERCION_WARNING = None
`
166
145
``
167
146
`def setUpModule():
`
168
147
`global AVAILABLE_TARGETS
`
``
148
`+
global CLI_COERCION_TARGET
`
``
149
`+
global CLI_COERCION_WARNING
`
169
150
``
170
151
`if AVAILABLE_TARGETS is not None:
`
171
152
`# initialization already done
`
`@@ -177,26 +158,57 @@ def setUpModule():
`
177
158
`if _set_locale_in_subprocess(target_locale):
`
178
159
`AVAILABLE_TARGETS.append(target_locale)
`
179
160
``
``
161
`+
if AVAILABLE_TARGETS:
`
``
162
`+
Coercion is expected to use the first available target locale
`
``
163
`+
CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
`
``
164
`+
CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
`
180
165
``
181
166
``
182
``
`-
class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase):
`
183
``
`-
Base class for test cases that rely on coercion targets being defined
`
``
167
`+
class _LocaleHandlingTestCase(unittest.TestCase):
`
``
168
`+
Base class to check expected locale handling behaviour
`
184
169
``
185
``
`-
@classmethod
`
186
``
`-
def setUpClass(cls):
`
187
``
`-
if not AVAILABLE_TARGETS:
`
188
``
`-
raise unittest.SkipTest("No C-with-UTF-8 locale available")
`
``
170
`+
def _check_child_encoding_details(self,
`
``
171
`+
env_vars,
`
``
172
`+
expected_fs_encoding,
`
``
173
`+
expected_stream_encoding,
`
``
174
`+
expected_warnings,
`
``
175
`+
coercion_expected):
`
``
176
`+
"""Check the C locale handling for the given process environment
`
189
177
``
``
178
`+
Parameters:
`
``
179
`+
expected_fs_encoding: expected sys.getfilesystemencoding() result
`
``
180
`+
expected_stream_encoding: expected encoding for standard streams
`
``
181
`+
expected_warning: stderr output to expect (if any)
`
``
182
`+
"""
`
``
183
`+
result = EncodingDetails.get_child_details(env_vars)
`
``
184
`+
encoding_details, stderr_lines = result
`
``
185
`+
expected_details = EncodingDetails.get_expected_details(
`
``
186
`+
coercion_expected,
`
``
187
`+
expected_fs_encoding,
`
``
188
`+
expected_stream_encoding,
`
``
189
`+
env_vars
`
``
190
`+
)
`
``
191
`+
self.assertEqual(encoding_details, expected_details)
`
``
192
`+
if expected_warnings is None:
`
``
193
`+
expected_warnings = []
`
``
194
`+
self.assertEqual(stderr_lines, expected_warnings)
`
190
195
``
191
``
`-
class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
`
``
196
+
``
197
`+
class LocaleConfigurationTests(_LocaleHandlingTestCase):
`
192
198
`# Test explicit external configuration via the process environment
`
193
199
``
``
200
`+
def setUpClass():
`
``
201
`+
This relies on setupModule() having been run, so it can't be
`
``
202
`+
handled via the @unittest.skipUnless decorator
`
``
203
`+
if not AVAILABLE_TARGETS:
`
``
204
`+
raise unittest.SkipTest("No C-with-UTF-8 locale available")
`
``
205
+
194
206
`def test_external_target_locale_configuration(self):
`
``
207
+
195
208
`# Explicitly setting a target locale should give the same behaviour as
`
196
209
`# is seen when implicitly coercing to that target locale
`
197
210
`self.maxDiff = None
`
198
211
``
199
``
`-
expected_warning = []
`
200
212
`expected_fs_encoding = "utf-8"
`
201
213
`expected_stream_encoding = "utf-8"
`
202
214
``
`@@ -209,6 +221,7 @@ def test_external_target_locale_configuration(self):
`
209
221
`for locale_to_set in AVAILABLE_TARGETS:
`
210
222
`# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
`
211
223
`# expected, so skip that combination for now
`
``
224
`+
See https://bugs.python.org/issue30672 for discussion
`
212
225
`if env_var == "LANG" and locale_to_set == "UTF-8":
`
213
226
`continue
`
214
227
``
`@@ -219,17 +232,23 @@ def test_external_target_locale_configuration(self):
`
219
232
`self._check_child_encoding_details(var_dict,
`
220
233
`expected_fs_encoding,
`
221
234
`expected_stream_encoding,
`
222
``
`-
expected_warning)
`
``
235
`+
expected_warnings=None,
`
``
236
`+
coercion_expected=False)
`
223
237
``
224
238
``
225
239
``
226
240
`@test.support.cpython_only
`
227
241
`@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
`
228
242
`"C locale coercion disabled at build time")
`
229
``
`-
class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
`
``
243
`+
class LocaleCoercionTests(_LocaleHandlingTestCase):
`
230
244
`# Test implicit reconfiguration of the environment during CLI startup
`
231
245
``
232
``
`-
def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale):
`
``
246
`+
def _check_c_locale_coercion(self,
`
``
247
`+
fs_encoding, stream_encoding,
`
``
248
`+
coerce_c_locale,
`
``
249
`+
expected_warnings=None,
`
``
250
`+
coercion_expected=True,
`
``
251
`+
**extra_vars):
`
233
252
`"""Check the C locale handling for various configurations
`
234
253
``
235
254
` Parameters:
`
`@@ -238,27 +257,31 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
`
238
257
` coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
`
239
258
` None: don't set the variable at all
`
240
259
` str: the value set in the child's environment
`
``
260
`+
expected_warnings: expected warning lines on stderr
`
``
261
`+
extra_vars: additional environment variables to set in subprocess
`
241
262
` """
`
242
``
-
243
``
`-
Check for expected warning on stderr if C locale is coerced
`
244
263
`self.maxDiff = None
`
245
264
``
246
``
`-
expected_warning = []
`
247
``
`-
if coerce_c_locale != "0":
`
248
``
`-
Expect coercion to use the first available locale
`
249
``
`-
warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0])
`
250
``
`-
expected_warning.append(warning_msg)
`
``
265
`+
if not AVAILABLE_TARGETS:
`
``
266
`+
Locale coercion is disabled when there aren't any target locales
`
``
267
`+
fs_encoding = C_LOCALE_FS_ENCODING
`
``
268
`+
stream_encoding = C_LOCALE_STREAM_ENCODING
`
``
269
`+
coercion_expected = False
`
``
270
`+
if expected_warnings:
`
``
271
`+
expected_warnings = [LEGACY_LOCALE_WARNING]
`
251
272
``
252
273
`base_var_dict = {
`
253
274
`"LANG": "",
`
254
275
`"LC_CTYPE": "",
`
255
276
`"LC_ALL": "",
`
256
277
` }
`
``
278
`+
base_var_dict.update(extra_vars)
`
257
279
`for env_var in ("LANG", "LC_CTYPE"):
`
258
280
`for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
`
259
``
`-
XXX (ncoghlan): Mac OS X doesn't behave as expected in the
`
``
281
`+
XXX (ncoghlan): *BSD platforms don't behave as expected in the
`
260
282
`# POSIX locale, so we skip that for now
`
261
``
`-
if sys.platform == "darwin" and locale_to_set == "POSIX":
`
``
283
`+
See https://bugs.python.org/issue30672 for discussion
`
``
284
`+
if locale_to_set == "POSIX":
`
262
285
`continue
`
263
286
`with self.subTest(env_var=env_var,
`
264
287
`nominal_locale=locale_to_set,
`
`@@ -267,33 +290,62 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
`
267
290
`var_dict[env_var] = locale_to_set
`
268
291
`if coerce_c_locale is not None:
`
269
292
`var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
`
``
293
`+
Check behaviour on successful coercion
`
270
294
`self._check_child_encoding_details(var_dict,
`
271
295
`fs_encoding,
`
272
296
`stream_encoding,
`
273
``
`-
expected_warning)
`
``
297
`+
expected_warnings,
`
``
298
`+
coercion_expected)
`
274
299
``
275
300
`def test_test_PYTHONCOERCECLOCALE_not_set(self):
`
276
301
`# This should coerce to the first available target locale by default
`
277
302
`self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
`
278
303
``
279
304
`def test_PYTHONCOERCECLOCALE_not_zero(self):
`
280
``
`-
Any string other that "0" is considered "set" for our purposes
`
``
305
`+
Any string other than "0" is considered "set" for our purposes
`
281
306
`# and hence should result in the locale coercion being enabled
`
282
307
`for setting in ("", "1", "true", "false"):
`
283
308
`self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
`
284
309
``
``
310
`+
def test_PYTHONCOERCECLOCALE_set_to_warn(self):
`
``
311
`+
PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
`
``
312
`+
self._check_c_locale_coercion("utf-8", "utf-8",
`
``
313
`+
coerce_c_locale="warn",
`
``
314
`+
expected_warnings=[CLI_COERCION_WARNING])
`
``
315
+
``
316
+
285
317
`def test_PYTHONCOERCECLOCALE_set_to_zero(self):
`
286
318
`# The setting "0" should result in the locale coercion being disabled
`
287
319
`self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
`
288
320
`C_LOCALE_STREAM_ENCODING,
`
289
``
`-
coerce_c_locale="0")
`
``
321
`+
coerce_c_locale="0",
`
``
322
`+
coercion_expected=False)
`
``
323
`+
Setting LC_ALL=C shouldn't make any difference to the behaviour
`
``
324
`+
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
`
``
325
`+
C_LOCALE_STREAM_ENCODING,
`
``
326
`+
coerce_c_locale="0",
`
``
327
`+
LC_ALL="C",
`
``
328
`+
coercion_expected=False)
`
290
329
``
``
330
`+
def test_LC_ALL_set_to_C(self):
`
``
331
`+
Setting LC_ALL should render the locale coercion ineffective
`
``
332
`+
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
`
``
333
`+
C_LOCALE_STREAM_ENCODING,
`
``
334
`+
coerce_c_locale=None,
`
``
335
`+
LC_ALL="C",
`
``
336
`+
coercion_expected=False)
`
``
337
`+
And result in a warning about a lack of locale compatibility
`
``
338
`+
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
`
``
339
`+
C_LOCALE_STREAM_ENCODING,
`
``
340
`+
coerce_c_locale="warn",
`
``
341
`+
LC_ALL="C",
`
``
342
`+
expected_warnings=[LEGACY_LOCALE_WARNING],
`
``
343
`+
coercion_expected=False)
`
291
344
``
292
345
`def test_main():
`
293
346
`test.support.run_unittest(
`
294
347
`LocaleConfigurationTests,
`
295
``
`-
LocaleCoercionTests,
`
296
``
`-
LocaleWarningTests
`
``
348
`+
LocaleCoercionTests
`
297
349
` )
`
298
350
`test.support.reap_children()
`
299
351
``