bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260) · python/cpython@eb81795 (original) (raw)

`@@ -22,13 +22,23 @@

`

22

22

`else:

`

23

23

`C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING

`

24

24

``

25

``

`-

XXX (ncoghlan): The above is probably still wrong for:

`

``

25

`+

Note that the above is probably still wrong in some cases, such as:

`

26

26

`# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set

`

27

27

`# * AIX and any other platforms that use latin-1 in the C locale

`

``

28

`+

`

``

29

`+

Options for dealing with this:

`

``

30

`+

* Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)

`

``

31

`+

* Fix the test expectations to match the actual platform behaviour

`

28

32

``

29

33

`# In order to get the warning messages to match up as expected, the candidate

`

30

34

`# order here must much the target locale order in Python/pylifecycle.c

`

31

``

`-

_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")

`

``

35

`+

_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")

`

``

36

+

``

37

`+

XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to

`

``

38

`+

problems encountered on *BSD systems with those test cases

`

``

39

`+

For additional details see:

`

``

40

`+

nl_langinfo CODESET error: https://bugs.python.org/issue30647

`

``

41

`+

locale handling differences: https://bugs.python.org/issue30672

`

32

42

``

33

43

`# There's no reliable cross-platform way of checking locale alias

`

34

44

`# lists, so the only way of knowing which of these locales will work

`

`@@ -40,28 +50,39 @@ def _set_locale_in_subprocess(locale_name):

`

40

50

`result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)

`

41

51

`return result.rc == 0

`

42

52

``

43

``

`-

_EncodingDetails = namedtuple("EncodingDetails",

`

44

``

`-

"fsencoding stdin_info stdout_info stderr_info")

`

``

53

`+

_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"

`

``

54

`+

_EncodingDetails = namedtuple("EncodingDetails", _fields)

`

45

55

``

46

56

`class EncodingDetails(_EncodingDetails):

`

``

57

`+

XXX (ncoghlan): Using JSON for child state reporting may be less fragile

`

47

58

`CHILD_PROCESS_SCRIPT = ";".join([

`

48

``

`-

"import sys",

`

``

59

`+

"import sys, os",

`

49

60

`"print(sys.getfilesystemencoding())",

`

50

61

`"print(sys.stdin.encoding + ':' + sys.stdin.errors)",

`

51

62

`"print(sys.stdout.encoding + ':' + sys.stdout.errors)",

`

52

63

`"print(sys.stderr.encoding + ':' + sys.stderr.errors)",

`

``

64

`+

"print(os.environ.get('LANG', 'not set'))",

`

``

65

`+

"print(os.environ.get('LC_CTYPE', 'not set'))",

`

``

66

`+

"print(os.environ.get('LC_ALL', 'not set'))",

`

53

67

` ])

`

54

68

``

55

69

`@classmethod

`

56

``

`-

def get_expected_details(cls, fs_encoding, stream_encoding):

`

``

70

`+

def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):

`

57

71

`"""Returns expected child process details for a given encoding"""

`

58

72

`_stream = stream_encoding + ":{}"

`

59

73

`# stdin and stdout should use surrogateescape either because the

`

60

74

`# coercion triggered, or because the C locale was detected

`

61

75

`stream_info = 2*[_stream.format("surrogateescape")]

`

62

76

`# stderr should always use backslashreplace

`

63

77

`stream_info.append(_stream.format("backslashreplace"))

`

64

``

`-

return dict(cls(fs_encoding, *stream_info)._asdict())

`

``

78

`+

expected_lang = env_vars.get("LANG", "not set").lower()

`

``

79

`+

if coercion_expected:

`

``

80

`+

expected_lc_ctype = CLI_COERCION_TARGET.lower()

`

``

81

`+

else:

`

``

82

`+

expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()

`

``

83

`+

expected_lc_all = env_vars.get("LC_ALL", "not set").lower()

`

``

84

`+

env_info = expected_lang, expected_lc_ctype, expected_lc_all

`

``

85

`+

return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())

`

65

86

``

66

87

`@staticmethod

`

67

88

`def _handle_output_variations(data):

`

`@@ -97,64 +118,20 @@ def get_child_details(cls, env_vars):

`

97

118

`result.fail(py_cmd)

`

98

119

`# All subprocess outputs in this test case should be pure ASCII

`

99

120

`adjusted_output = cls._handle_output_variations(result.out)

`

100

``

`-

stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines()

`

``

121

`+

stdout_lines = adjusted_output.decode("ascii").splitlines()

`

101

122

`child_encoding_details = dict(cls(*stdout_lines)._asdict())

`

102

123

`stderr_lines = result.err.decode("ascii").rstrip().splitlines()

`

103

124

`return child_encoding_details, stderr_lines

`

104

125

``

105

126

``

106

``

`-

class _ChildProcessEncodingTestCase(unittest.TestCase):

`

107

``

`-

Base class to check for expected encoding details in a child process

`

108

``

-

109

``

`-

def _check_child_encoding_details(self,

`

110

``

`-

env_vars,

`

111

``

`-

expected_fs_encoding,

`

112

``

`-

expected_stream_encoding,

`

113

``

`-

expected_warning):

`

114

``

`-

"""Check the C locale handling for the given process environment

`

115

``

-

116

``

`-

Parameters:

`

117

``

`-

expected_fs_encoding: expected sys.getfilesystemencoding() result

`

118

``

`-

expected_stream_encoding: expected encoding for standard streams

`

119

``

`-

expected_warning: stderr output to expect (if any)

`

120

``

`-

"""

`

121

``

`-

result = EncodingDetails.get_child_details(env_vars)

`

122

``

`-

encoding_details, stderr_lines = result

`

123

``

`-

self.assertEqual(encoding_details,

`

124

``

`-

EncodingDetails.get_expected_details(

`

125

``

`-

expected_fs_encoding,

`

126

``

`-

expected_stream_encoding))

`

127

``

`-

self.assertEqual(stderr_lines, expected_warning)

`

128

``

-

129

127

`# Details of the shared library warning emitted at runtime

`

130

``

`-

LIBRARY_C_LOCALE_WARNING = (

`

``

128

`+

LEGACY_LOCALE_WARNING = (

`

131

129

`"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "

`

132

130

`"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "

`

133

131

`"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "

`

134

132

`"locales is recommended."

`

135

133

`)

`

136

134

``

137

``

`-

@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"),

`

138

``

`-

"C locale runtime warning disabled at build time")

`

139

``

`-

class LocaleWarningTests(_ChildProcessEncodingTestCase):

`

140

``

`-

Test warning emitted when running in the C locale

`

141

``

-

142

``

`-

def test_library_c_locale_warning(self):

`

143

``

`-

self.maxDiff = None

`

144

``

`-

for locale_to_set in ("C", "POSIX", "invalid.ascii"):

`

145

``

`-

XXX (ncoghlan): Mac OS X doesn't behave as expected in the

`

146

``

`-

POSIX locale, so we skip that for now

`

147

``

`-

if sys.platform == "darwin" and locale_to_set == "POSIX":

`

148

``

`-

continue

`

149

``

`-

var_dict = {

`

150

``

`-

"LC_ALL": locale_to_set

`

151

``

`-

}

`

152

``

`-

with self.subTest(forced_locale=locale_to_set):

`

153

``

`-

self._check_child_encoding_details(var_dict,

`

154

``

`-

C_LOCALE_FS_ENCODING,

`

155

``

`-

C_LOCALE_STREAM_ENCODING,

`

156

``

`-

[LIBRARY_C_LOCALE_WARNING])

`

157

``

-

158

135

`# Details of the CLI locale coercion warning emitted at runtime

`

159

136

`CLI_COERCION_WARNING_FMT = (

`

160

137

`"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "

`

`@@ -163,9 +140,13 @@ def test_library_c_locale_warning(self):

`

163

140

``

164

141

``

165

142

`AVAILABLE_TARGETS = None

`

``

143

`+

CLI_COERCION_TARGET = None

`

``

144

`+

CLI_COERCION_WARNING = None

`

166

145

``

167

146

`def setUpModule():

`

168

147

`global AVAILABLE_TARGETS

`

``

148

`+

global CLI_COERCION_TARGET

`

``

149

`+

global CLI_COERCION_WARNING

`

169

150

``

170

151

`if AVAILABLE_TARGETS is not None:

`

171

152

`# initialization already done

`

`@@ -177,26 +158,57 @@ def setUpModule():

`

177

158

`if _set_locale_in_subprocess(target_locale):

`

178

159

`AVAILABLE_TARGETS.append(target_locale)

`

179

160

``

``

161

`+

if AVAILABLE_TARGETS:

`

``

162

`+

Coercion is expected to use the first available target locale

`

``

163

`+

CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]

`

``

164

`+

CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)

`

180

165

``

181

166

``

182

``

`-

class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase):

`

183

``

`-

Base class for test cases that rely on coercion targets being defined

`

``

167

`+

class _LocaleHandlingTestCase(unittest.TestCase):

`

``

168

`+

Base class to check expected locale handling behaviour

`

184

169

``

185

``

`-

@classmethod

`

186

``

`-

def setUpClass(cls):

`

187

``

`-

if not AVAILABLE_TARGETS:

`

188

``

`-

raise unittest.SkipTest("No C-with-UTF-8 locale available")

`

``

170

`+

def _check_child_encoding_details(self,

`

``

171

`+

env_vars,

`

``

172

`+

expected_fs_encoding,

`

``

173

`+

expected_stream_encoding,

`

``

174

`+

expected_warnings,

`

``

175

`+

coercion_expected):

`

``

176

`+

"""Check the C locale handling for the given process environment

`

189

177

``

``

178

`+

Parameters:

`

``

179

`+

expected_fs_encoding: expected sys.getfilesystemencoding() result

`

``

180

`+

expected_stream_encoding: expected encoding for standard streams

`

``

181

`+

expected_warning: stderr output to expect (if any)

`

``

182

`+

"""

`

``

183

`+

result = EncodingDetails.get_child_details(env_vars)

`

``

184

`+

encoding_details, stderr_lines = result

`

``

185

`+

expected_details = EncodingDetails.get_expected_details(

`

``

186

`+

coercion_expected,

`

``

187

`+

expected_fs_encoding,

`

``

188

`+

expected_stream_encoding,

`

``

189

`+

env_vars

`

``

190

`+

)

`

``

191

`+

self.assertEqual(encoding_details, expected_details)

`

``

192

`+

if expected_warnings is None:

`

``

193

`+

expected_warnings = []

`

``

194

`+

self.assertEqual(stderr_lines, expected_warnings)

`

190

195

``

191

``

`-

class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):

`

``

196

+

``

197

`+

class LocaleConfigurationTests(_LocaleHandlingTestCase):

`

192

198

`# Test explicit external configuration via the process environment

`

193

199

``

``

200

`+

def setUpClass():

`

``

201

`+

This relies on setupModule() having been run, so it can't be

`

``

202

`+

handled via the @unittest.skipUnless decorator

`

``

203

`+

if not AVAILABLE_TARGETS:

`

``

204

`+

raise unittest.SkipTest("No C-with-UTF-8 locale available")

`

``

205

+

194

206

`def test_external_target_locale_configuration(self):

`

``

207

+

195

208

`# Explicitly setting a target locale should give the same behaviour as

`

196

209

`# is seen when implicitly coercing to that target locale

`

197

210

`self.maxDiff = None

`

198

211

``

199

``

`-

expected_warning = []

`

200

212

`expected_fs_encoding = "utf-8"

`

201

213

`expected_stream_encoding = "utf-8"

`

202

214

``

`@@ -209,6 +221,7 @@ def test_external_target_locale_configuration(self):

`

209

221

`for locale_to_set in AVAILABLE_TARGETS:

`

210

222

`# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as

`

211

223

`# expected, so skip that combination for now

`

``

224

`+

See https://bugs.python.org/issue30672 for discussion

`

212

225

`if env_var == "LANG" and locale_to_set == "UTF-8":

`

213

226

`continue

`

214

227

``

`@@ -219,17 +232,23 @@ def test_external_target_locale_configuration(self):

`

219

232

`self._check_child_encoding_details(var_dict,

`

220

233

`expected_fs_encoding,

`

221

234

`expected_stream_encoding,

`

222

``

`-

expected_warning)

`

``

235

`+

expected_warnings=None,

`

``

236

`+

coercion_expected=False)

`

223

237

``

224

238

``

225

239

``

226

240

`@test.support.cpython_only

`

227

241

`@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),

`

228

242

`"C locale coercion disabled at build time")

`

229

``

`-

class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):

`

``

243

`+

class LocaleCoercionTests(_LocaleHandlingTestCase):

`

230

244

`# Test implicit reconfiguration of the environment during CLI startup

`

231

245

``

232

``

`-

def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale):

`

``

246

`+

def _check_c_locale_coercion(self,

`

``

247

`+

fs_encoding, stream_encoding,

`

``

248

`+

coerce_c_locale,

`

``

249

`+

expected_warnings=None,

`

``

250

`+

coercion_expected=True,

`

``

251

`+

**extra_vars):

`

233

252

`"""Check the C locale handling for various configurations

`

234

253

``

235

254

` Parameters:

`

`@@ -238,27 +257,31 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale

`

238

257

` coerce_c_locale: setting to use for PYTHONCOERCECLOCALE

`

239

258

` None: don't set the variable at all

`

240

259

` str: the value set in the child's environment

`

``

260

`+

expected_warnings: expected warning lines on stderr

`

``

261

`+

extra_vars: additional environment variables to set in subprocess

`

241

262

` """

`

242

``

-

243

``

`-

Check for expected warning on stderr if C locale is coerced

`

244

263

`self.maxDiff = None

`

245

264

``

246

``

`-

expected_warning = []

`

247

``

`-

if coerce_c_locale != "0":

`

248

``

`-

Expect coercion to use the first available locale

`

249

``

`-

warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0])

`

250

``

`-

expected_warning.append(warning_msg)

`

``

265

`+

if not AVAILABLE_TARGETS:

`

``

266

`+

Locale coercion is disabled when there aren't any target locales

`

``

267

`+

fs_encoding = C_LOCALE_FS_ENCODING

`

``

268

`+

stream_encoding = C_LOCALE_STREAM_ENCODING

`

``

269

`+

coercion_expected = False

`

``

270

`+

if expected_warnings:

`

``

271

`+

expected_warnings = [LEGACY_LOCALE_WARNING]

`

251

272

``

252

273

`base_var_dict = {

`

253

274

`"LANG": "",

`

254

275

`"LC_CTYPE": "",

`

255

276

`"LC_ALL": "",

`

256

277

` }

`

``

278

`+

base_var_dict.update(extra_vars)

`

257

279

`for env_var in ("LANG", "LC_CTYPE"):

`

258

280

`for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):

`

259

``

`-

XXX (ncoghlan): Mac OS X doesn't behave as expected in the

`

``

281

`+

XXX (ncoghlan): *BSD platforms don't behave as expected in the

`

260

282

`# POSIX locale, so we skip that for now

`

261

``

`-

if sys.platform == "darwin" and locale_to_set == "POSIX":

`

``

283

`+

See https://bugs.python.org/issue30672 for discussion

`

``

284

`+

if locale_to_set == "POSIX":

`

262

285

`continue

`

263

286

`with self.subTest(env_var=env_var,

`

264

287

`nominal_locale=locale_to_set,

`

`@@ -267,33 +290,62 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale

`

267

290

`var_dict[env_var] = locale_to_set

`

268

291

`if coerce_c_locale is not None:

`

269

292

`var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale

`

``

293

`+

Check behaviour on successful coercion

`

270

294

`self._check_child_encoding_details(var_dict,

`

271

295

`fs_encoding,

`

272

296

`stream_encoding,

`

273

``

`-

expected_warning)

`

``

297

`+

expected_warnings,

`

``

298

`+

coercion_expected)

`

274

299

``

275

300

`def test_test_PYTHONCOERCECLOCALE_not_set(self):

`

276

301

`# This should coerce to the first available target locale by default

`

277

302

`self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)

`

278

303

``

279

304

`def test_PYTHONCOERCECLOCALE_not_zero(self):

`

280

``

`-

Any string other that "0" is considered "set" for our purposes

`

``

305

`+

Any string other than "0" is considered "set" for our purposes

`

281

306

`# and hence should result in the locale coercion being enabled

`

282

307

`for setting in ("", "1", "true", "false"):

`

283

308

`self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)

`

284

309

``

``

310

`+

def test_PYTHONCOERCECLOCALE_set_to_warn(self):

`

``

311

`+

PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales

`

``

312

`+

self._check_c_locale_coercion("utf-8", "utf-8",

`

``

313

`+

coerce_c_locale="warn",

`

``

314

`+

expected_warnings=[CLI_COERCION_WARNING])

`

``

315

+

``

316

+

285

317

`def test_PYTHONCOERCECLOCALE_set_to_zero(self):

`

286

318

`# The setting "0" should result in the locale coercion being disabled

`

287

319

`self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,

`

288

320

`C_LOCALE_STREAM_ENCODING,

`

289

``

`-

coerce_c_locale="0")

`

``

321

`+

coerce_c_locale="0",

`

``

322

`+

coercion_expected=False)

`

``

323

`+

Setting LC_ALL=C shouldn't make any difference to the behaviour

`

``

324

`+

self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,

`

``

325

`+

C_LOCALE_STREAM_ENCODING,

`

``

326

`+

coerce_c_locale="0",

`

``

327

`+

LC_ALL="C",

`

``

328

`+

coercion_expected=False)

`

290

329

``

``

330

`+

def test_LC_ALL_set_to_C(self):

`

``

331

`+

Setting LC_ALL should render the locale coercion ineffective

`

``

332

`+

self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,

`

``

333

`+

C_LOCALE_STREAM_ENCODING,

`

``

334

`+

coerce_c_locale=None,

`

``

335

`+

LC_ALL="C",

`

``

336

`+

coercion_expected=False)

`

``

337

`+

And result in a warning about a lack of locale compatibility

`

``

338

`+

self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,

`

``

339

`+

C_LOCALE_STREAM_ENCODING,

`

``

340

`+

coerce_c_locale="warn",

`

``

341

`+

LC_ALL="C",

`

``

342

`+

expected_warnings=[LEGACY_LOCALE_WARNING],

`

``

343

`+

coercion_expected=False)

`

291

344

``

292

345

`def test_main():

`

293

346

`test.support.run_unittest(

`

294

347

`LocaleConfigurationTests,

`

295

``

`-

LocaleCoercionTests,

`

296

``

`-

LocaleWarningTests

`

``

348

`+

LocaleCoercionTests

`

297

349

` )

`

298

350

`test.support.reap_children()

`

299

351

``