bpo-32174: Let .chm document display non-ASCII characters properly (G… · python/cpython@6261ae9 (original) (raw)

File tree

3 files changed

lines changed

3 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@
14 14 # ---------------------
15 15
16 16 extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
17 -'pyspecific', 'c_annotations']
17 +'pyspecific', 'c_annotations', 'escape4chm']
18 18
19 19 # General substitutions.
20 20 project = 'Python'
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
1 +"""
2 +Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
3 +effect on some MBCS Windows systems.
4 +
5 +https://bugs.python.org/issue32174
6 +"""
7 +
8 +import re
9 +from html.entities import codepoint2name
10 +
11 +# escape the characters which codepoint > 0x7F
12 +def _process(string):
13 +def escape(matchobj):
14 +codepoint = ord(matchobj.group(0))
15 +
16 +name = codepoint2name.get(codepoint)
17 +if name is None:
18 +return '&#%d;' % codepoint
19 +else:
20 +return '&%s;' % name
21 +
22 +return re.sub(r'[^\x00-\x7F]', escape, string)
23 +
24 +def escape_for_chm(app, pagename, templatename, context, doctree):
25 +# only works for .chm output
26 +if not hasattr(app.builder, 'name') or app.builder.name != 'htmlhelp':
27 +return
28 +
29 +# escape the `body` part to 7-bit ASCII
30 +body = context.get('body')
31 +if body is not None:
32 +context['body'] = _process(body)
33 +
34 +def setup(app):
35 +# `html-page-context` event emitted when the HTML builder has
36 +# created a context dictionary to render a template with.
37 +app.connect('html-page-context', escape_for_chm)
38 +
39 +return {'version': '1.0', 'parallel_read_safe': True}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1 +chm document displays non-ASCII charaters properly on some MBCS Windows
2 +systems.