Auto merge of #125679 - clarfonthey:escape_ascii, r=joboet · qinheping/verify-rust-std@9c1e162 (original) (raw)

`@@ -18,38 +18,106 @@ const fn backslash(a: ascii::Char) -> ([ascii::Char; N], Range<u

`

18

18

`(output, 0..2)

`

19

19

`}

`

20

20

``

``

21

`+

#[inline]

`

``

22

`+

const fn hex_escape(byte: u8) -> ([ascii::Char; N], Range) {

`

``

23

`+

const { assert!(N >= 4) };

`

``

24

+

``

25

`+

let mut output = [ascii::Char::Null; N];

`

``

26

+

``

27

`+

let hi = HEX_DIGITS[(byte >> 4) as usize];

`

``

28

`+

let lo = HEX_DIGITS[(byte & 0xf) as usize];

`

``

29

+

``

30

`+

output[0] = ascii::Char::ReverseSolidus;

`

``

31

`+

output[1] = ascii::Char::SmallX;

`

``

32

`+

output[2] = hi;

`

``

33

`+

output[3] = lo;

`

``

34

+

``

35

`+

(output, 0..4)

`

``

36

`+

}

`

``

37

+

``

38

`+

#[inline]

`

``

39

`+

const fn verbatim(a: ascii::Char) -> ([ascii::Char; N], Range) {

`

``

40

`+

const { assert!(N >= 1) };

`

``

41

+

``

42

`+

let mut output = [ascii::Char::Null; N];

`

``

43

+

``

44

`+

output[0] = a;

`

``

45

+

``

46

`+

(output, 0..1)

`

``

47

`+

}

`

``

48

+

21

49

`/// Escapes an ASCII character.

`

22

50

`///

`

23

51

`/// Returns a buffer and the length of the escaped representation.

`

24

52

`const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) {

`

25

53

`const { assert!(N >= 4) };

`

26

54

``

27

``

`-

match byte {

`

28

``

`-

b'\t' => backslash(ascii::Char::SmallT),

`

29

``

`-

b'\r' => backslash(ascii::Char::SmallR),

`

30

``

`-

b'\n' => backslash(ascii::Char::SmallN),

`

31

``

`-

b'\' => backslash(ascii::Char::ReverseSolidus),

`

32

``

`-

b''' => backslash(ascii::Char::Apostrophe),

`

33

``

`-

b'"' => backslash(ascii::Char::QuotationMark),

`

34

``

`-

byte => {

`

35

``

`-

let mut output = [ascii::Char::Null; N];

`

36

``

-

37

``

`-

if let Some(c) = byte.as_ascii()

`

38

``

`-

&& !byte.is_ascii_control()

`

39

``

`-

{

`

40

``

`-

output[0] = c;

`

41

``

`-

(output, 0..1)

`

42

``

`-

} else {

`

43

``

`-

let hi = HEX_DIGITS[(byte >> 4) as usize];

`

44

``

`-

let lo = HEX_DIGITS[(byte & 0xf) as usize];

`

``

55

`+

#[cfg(feature = "optimize_for_size")]

`

``

56

`+

{

`

``

57

`+

match byte {

`

``

58

`+

b'\t' => backslash(ascii::Char::SmallT),

`

``

59

`+

b'\r' => backslash(ascii::Char::SmallR),

`

``

60

`+

b'\n' => backslash(ascii::Char::SmallN),

`

``

61

`+

b'\' => backslash(ascii::Char::ReverseSolidus),

`

``

62

`+

b''' => backslash(ascii::Char::Apostrophe),

`

``

63

`+

b'"' => backslash(ascii::Char::QuotationMark),

`

``

64

`+

0x00..=0x1F | 0x7F => hex_escape(byte),

`

``

65

`+

_ => match ascii::Char::from_u8(byte) {

`

``

66

`+

Some(a) => verbatim(a),

`

``

67

`+

None => hex_escape(byte),

`

``

68

`+

},

`

``

69

`+

}

`

``

70

`+

}

`

``

71

+

``

72

`+

#[cfg(not(feature = "optimize_for_size"))]

`

``

73

`+

{

`

``

74

`+

/// Lookup table helps us determine how to display character.

`

``

75

`+

///

`

``

76

`+

/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to

`

``

77

`+

/// indicate whether the result is escaped or unescaped.

`

``

78

`+

///

`

``

79

`+

/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since

`

``

80

`+

/// escaped NUL will not occur.

`

``

81

`+

const LOOKUP: [u8; 256] = {

`

``

82

`+

let mut arr = [0; 256];

`

``

83

`+

let mut idx = 0;

`

``

84

`+

while idx <= 255 {

`

``

85

`+

arr[idx] = match idx as u8 {

`

``

86

`+

// use 8th bit to indicate escaped

`

``

87

`+

b'\t' => 0x80 | b't',

`

``

88

`+

b'\r' => 0x80 | b'r',

`

``

89

`+

b'\n' => 0x80 | b'n',

`

``

90

`+

b'\' => 0x80 | b'\',

`

``

91

`+

b''' => 0x80 | b''',

`

``

92

`+

b'"' => 0x80 | b'"',

`

``

93

+

``

94

`+

// use NUL to indicate hex-escaped

`

``

95

`+

0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',

`

``

96

+

``

97

`+

idx => idx,

`

``

98

`+

};

`

``

99

`+

idx += 1;

`

``

100

`+

}

`

``

101

`+

arr

`

``

102

`+

};

`

45

103

``

46

``

`-

output[0] = ascii::Char::ReverseSolidus;

`

47

``

`-

output[1] = ascii::Char::SmallX;

`

48

``

`-

output[2] = hi;

`

49

``

`-

output[3] = lo;

`

``

104

`+

let lookup = LOOKUP[byte as usize];

`

50

105

``

51

``

`-

(output, 0..4)

`

``

106

`+

// 8th bit indicates escape

`

``

107

`+

let lookup_escaped = lookup & 0x80 != 0;

`

``

108

+

``

109

`+

// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.

`

``

110

`+

let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };

`

``

111

+

``

112

`+

if lookup_escaped {

`

``

113

`+

// NUL indicates hex-escaped

`

``

114

`+

if matches!(lookup_ascii, ascii::Char::Null) {

`

``

115

`+

hex_escape(byte)

`

``

116

`+

} else {

`

``

117

`+

backslash(lookup_ascii)

`

52

118

`}

`

``

119

`+

} else {

`

``

120

`+

verbatim(lookup_ascii)

`

53

121

`}

`

54

122

`}

`

55

123

`}

`