Auto merge of #125679 - clarfonthey:escape_ascii, r=joboet · qinheping/verify-rust-std@9c1e162 (original) (raw)

`@@ -18,38 +18,106 @@ const fn backslash(a: ascii::Char) -> ([ascii::Char; N], Range<u

18

`(output, 0..2)

19

20

21

#[inline]

22

const fn hex_escape(byte: u8) -> ([ascii::Char; N], Range) {

23

const { assert!(N >= 4) };

24

+

25

let mut output = [ascii::Char::Null; N];

26

+

27

let hi = HEX_DIGITS[(byte >> 4) as usize];

28

let lo = HEX_DIGITS[(byte & 0xf) as usize];

29

+

30

output[0] = ascii::Char::ReverseSolidus;

31

output[1] = ascii::Char::SmallX;

32

output[2] = hi;

33

output[3] = lo;

34

+

35

(output, 0..4)

36

}

37

+

38

#[inline]

39

const fn verbatim(a: ascii::Char) -> ([ascii::Char; N], Range) {

40

const { assert!(N >= 1) };

41

+

42

let mut output = [ascii::Char::Null; N];

43

+

44

output[0] = a;

45

+

46

(output, 0..1)

47

}

48

+

21

49

`/// Escapes an ASCII character.

22

50

`///

23

51

`/// Returns a buffer and the length of the escaped representation.

24

52

`const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) {

25

53

`const { assert!(N >= 4) };

26

54

27

match byte {

28

b'\t' => backslash(ascii::Char::SmallT),

29

b'\r' => backslash(ascii::Char::SmallR),

30

b'\n' => backslash(ascii::Char::SmallN),

31

b'\' => backslash(ascii::Char::ReverseSolidus),

32

b''' => backslash(ascii::Char::Apostrophe),

33

b'"' => backslash(ascii::Char::QuotationMark),

34

byte => {

35

let mut output = [ascii::Char::Null; N];

36

-

37

if let Some(c) = byte.as_ascii()

38

&& !byte.is_ascii_control()

39

{

40

output[0] = c;

41

(output, 0..1)

42

} else {

43

let hi = HEX_DIGITS[(byte >> 4) as usize];

44

let lo = HEX_DIGITS[(byte & 0xf) as usize];

55

#[cfg(feature = "optimize_for_size")]

56

{

57

match byte {

58

b'\t' => backslash(ascii::Char::SmallT),

59

b'\r' => backslash(ascii::Char::SmallR),

60

b'\n' => backslash(ascii::Char::SmallN),

61

b'\' => backslash(ascii::Char::ReverseSolidus),

62

b''' => backslash(ascii::Char::Apostrophe),

63

b'"' => backslash(ascii::Char::QuotationMark),

64

0x00..=0x1F | 0x7F => hex_escape(byte),

65

_ => match ascii::Char::from_u8(byte) {

66

Some(a) => verbatim(a),

67

None => hex_escape(byte),

68

69

}

70

}

71

+

72

#[cfg(not(feature = "optimize_for_size"))]

73

{

74

/// Lookup table helps us determine how to display character.

75

///

76

/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to

77

/// indicate whether the result is escaped or unescaped.

78

///

79

/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since

80

/// escaped NUL will not occur.

81

const LOOKUP: [u8; 256] = {

82

let mut arr = [0; 256];

83

let mut idx = 0;

84

while idx <= 255 {

85

arr[idx] = match idx as u8 {

86

// use 8th bit to indicate escaped

87

b'\t' => 0x80 | b't',

88

b'\r' => 0x80 | b'r',

89

b'\n' => 0x80 | b'n',

90

b'\' => 0x80 | b'\',

91

b''' => 0x80 | b''',

92

b'"' => 0x80 | b'"',

93

+

94

// use NUL to indicate hex-escaped

95

0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',

96

+

97

idx => idx,

98

};

99

idx += 1;

100

}

101

arr

102

};

45

103

46

output[0] = ascii::Char::ReverseSolidus;

47

output[1] = ascii::Char::SmallX;

48

output[2] = hi;

49

output[3] = lo;

104

let lookup = LOOKUP[byte as usize];

50

105

51

(output, 0..4)

106

// 8th bit indicates escape

107

let lookup_escaped = lookup & 0x80 != 0;

108

+

109

// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.

110

let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };

111

+

112

if lookup_escaped {

113

// NUL indicates hex-escaped

114

if matches!(lookup_ascii, ascii::Char::Null) {

115

hex_escape(byte)

116

} else {

117

backslash(lookup_ascii)

52

118

119

} else {

120

verbatim(lookup_ascii)

53

121

54

122

55

123