bpo-32174: Let .chm document display non-ASCII characters properly (GH-9758)

Let .chm document display non-ASCII characters properly

Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual effect on some MBCS Windows systems.
This commit is contained in:
animalize 2018-10-08 16:20:54 -05:00 committed by Steve Dower
parent 60d230c78f
commit 6261ae9b01
3 changed files with 42 additions and 1 deletions

View file

@ -14,7 +14,7 @@
# ---------------------
extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
'pyspecific', 'c_annotations']
'pyspecific', 'c_annotations', 'escape4chm']
# General substitutions.
project = 'Python'

View file

@ -0,0 +1,39 @@
"""
Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
effect on some MBCS Windows systems.
https://bugs.python.org/issue32174
"""
import re
from html.entities import codepoint2name
# escape the characters which codepoint > 0x7F
def _process(string):
def escape(matchobj):
codepoint = ord(matchobj.group(0))
name = codepoint2name.get(codepoint)
if name is None:
return '&#%d;' % codepoint
else:
return '&%s;' % name
return re.sub(r'[^\x00-\x7F]', escape, string)
def escape_for_chm(app, pagename, templatename, context, doctree):
# only works for .chm output
if not hasattr(app.builder, 'name') or app.builder.name != 'htmlhelp':
return
# escape the `body` part to 7-bit ASCII
body = context.get('body')
if body is not None:
context['body'] = _process(body)
def setup(app):
# `html-page-context` event emitted when the HTML builder has
# created a context dictionary to render a template with.
app.connect('html-page-context', escape_for_chm)
return {'version': '1.0', 'parallel_read_safe': True}

View file

@ -0,0 +1,2 @@
chm document displays non-ASCII charaters properly on some MBCS Windows
systems.