Optimize escape_ascii · qinheping/verify-rust-std@86c7526 (original) (raw)
`@@ -18,38 +18,106 @@ const fn backslash(a: ascii::Char) -> ([ascii::Char; N], Range<u
`
18
18
`(output, 0..2)
`
19
19
`}
`
20
20
``
``
21
`+
#[inline]
`
``
22
`+
const fn hex_escape(byte: u8) -> ([ascii::Char; N], Range) {
`
``
23
`+
const { assert!(N >= 4) };
`
``
24
+
``
25
`+
let mut output = [ascii::Char::Null; N];
`
``
26
+
``
27
`+
let hi = HEX_DIGITS[(byte >> 4) as usize];
`
``
28
`+
let lo = HEX_DIGITS[(byte & 0xf) as usize];
`
``
29
+
``
30
`+
output[0] = ascii::Char::ReverseSolidus;
`
``
31
`+
output[1] = ascii::Char::SmallX;
`
``
32
`+
output[2] = hi;
`
``
33
`+
output[3] = lo;
`
``
34
+
``
35
`+
(output, 0..4)
`
``
36
`+
}
`
``
37
+
``
38
`+
#[inline]
`
``
39
`+
const fn verbatim(a: ascii::Char) -> ([ascii::Char; N], Range) {
`
``
40
`+
const { assert!(N >= 1) };
`
``
41
+
``
42
`+
let mut output = [ascii::Char::Null; N];
`
``
43
+
``
44
`+
output[0] = a;
`
``
45
+
``
46
`+
(output, 0..1)
`
``
47
`+
}
`
``
48
+
21
49
`/// Escapes an ASCII character.
`
22
50
`///
`
23
51
`/// Returns a buffer and the length of the escaped representation.
`
24
52
`const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) {
`
25
53
`const { assert!(N >= 4) };
`
26
54
``
27
``
`-
match byte {
`
28
``
`-
b'\t' => backslash(ascii::Char::SmallT),
`
29
``
`-
b'\r' => backslash(ascii::Char::SmallR),
`
30
``
`-
b'\n' => backslash(ascii::Char::SmallN),
`
31
``
`-
b'\' => backslash(ascii::Char::ReverseSolidus),
`
32
``
`-
b''' => backslash(ascii::Char::Apostrophe),
`
33
``
`-
b'"' => backslash(ascii::Char::QuotationMark),
`
34
``
`-
byte => {
`
35
``
`-
let mut output = [ascii::Char::Null; N];
`
36
``
-
37
``
`-
if let Some(c) = byte.as_ascii()
`
38
``
`-
&& !byte.is_ascii_control()
`
39
``
`-
{
`
40
``
`-
output[0] = c;
`
41
``
`-
(output, 0..1)
`
42
``
`-
} else {
`
43
``
`-
let hi = HEX_DIGITS[(byte >> 4) as usize];
`
44
``
`-
let lo = HEX_DIGITS[(byte & 0xf) as usize];
`
``
55
`+
#[cfg(feature = "optimize_for_size")]
`
``
56
`+
{
`
``
57
`+
match byte {
`
``
58
`+
b'\t' => backslash(ascii::Char::SmallT),
`
``
59
`+
b'\r' => backslash(ascii::Char::SmallR),
`
``
60
`+
b'\n' => backslash(ascii::Char::SmallN),
`
``
61
`+
b'\' => backslash(ascii::Char::ReverseSolidus),
`
``
62
`+
b''' => backslash(ascii::Char::Apostrophe),
`
``
63
`+
b'"' => backslash(ascii::Char::QuotationMark),
`
``
64
`+
0x00..=0x1F | 0x7F => hex_escape(byte),
`
``
65
`+
_ => match ascii::Char::from_u8(byte) {
`
``
66
`+
Some(a) => verbatim(a),
`
``
67
`+
None => hex_escape(byte),
`
``
68
`+
},
`
``
69
`+
}
`
``
70
`+
}
`
``
71
+
``
72
`+
#[cfg(not(feature = "optimize_for_size"))]
`
``
73
`+
{
`
``
74
`+
/// Lookup table helps us determine how to display character.
`
``
75
`+
///
`
``
76
`+
/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
`
``
77
`+
/// indicate whether the result is escaped or unescaped.
`
``
78
`+
///
`
``
79
`+
/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
`
``
80
`+
/// escaped NUL will not occur.
`
``
81
`+
const LOOKUP: [u8; 256] = {
`
``
82
`+
let mut arr = [0; 256];
`
``
83
`+
let mut idx = 0;
`
``
84
`+
while idx <= 255 {
`
``
85
`+
arr[idx] = match idx as u8 {
`
``
86
`+
// use 8th bit to indicate escaped
`
``
87
`+
b'\t' => 0x80 | b't',
`
``
88
`+
b'\r' => 0x80 | b'r',
`
``
89
`+
b'\n' => 0x80 | b'n',
`
``
90
`+
b'\' => 0x80 | b'\',
`
``
91
`+
b''' => 0x80 | b''',
`
``
92
`+
b'"' => 0x80 | b'"',
`
``
93
+
``
94
`+
// use NUL to indicate hex-escaped
`
``
95
`+
0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',
`
``
96
+
``
97
`+
idx => idx,
`
``
98
`+
};
`
``
99
`+
idx += 1;
`
``
100
`+
}
`
``
101
`+
arr
`
``
102
`+
};
`
45
103
``
46
``
`-
output[0] = ascii::Char::ReverseSolidus;
`
47
``
`-
output[1] = ascii::Char::SmallX;
`
48
``
`-
output[2] = hi;
`
49
``
`-
output[3] = lo;
`
``
104
`+
let lookup = LOOKUP[byte as usize];
`
50
105
``
51
``
`-
(output, 0..4)
`
``
106
`+
// 8th bit indicates escape
`
``
107
`+
let lookup_escaped = lookup & 0x80 != 0;
`
``
108
+
``
109
`+
// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
`
``
110
`+
let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };
`
``
111
+
``
112
`+
if lookup_escaped {
`
``
113
`+
// NUL indicates hex-escaped
`
``
114
`+
if matches!(lookup_ascii, ascii::Char::Null) {
`
``
115
`+
hex_escape(byte)
`
``
116
`+
} else {
`
``
117
`+
backslash(lookup_ascii)
`
52
118
`}
`
``
119
`+
} else {
`
``
120
`+
verbatim(lookup_ascii)
`
53
121
`}
`
54
122
`}
`
55
123
`}
`