Rollup merge of #130659 - bjoernager:const-char-encode-utf16, r=dtolnay · qinheping/verify-rust-std@337c634 (original) (raw)
`@@ -638,8 +638,7 @@ impl char {
`
638
638
`#[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
`
639
639
`#[inline]
`
640
640
`pub const fn len_utf16(self) -> usize {
`
641
``
`-
let ch = self as u32;
`
642
``
`-
if (ch & 0xFFFF) == ch { 1 } else { 2 }
`
``
641
`+
len_utf16(self as u32)
`
643
642
`}
`
644
643
``
645
644
`/// Encodes this character as UTF-8 into the provided byte buffer,
`
`@@ -709,8 +708,9 @@ impl char {
`
709
708
`/// '𝕊'.encode_utf16(&mut b);
`
710
709
```` /// ```
````
711
710
`#[stable(feature = "unicode_encode_char", since = "1.15.0")]
`
``
711
`+
#[rustc_const_unstable(feature = "const_char_encode_utf16", issue = "130660")]
`
712
712
`#[inline]
`
713
``
`-
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
`
``
713
`+
pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
`
714
714
`encode_utf16_raw(self as u32, dst)
`
715
715
`}
`
716
716
``
`@@ -1747,7 +1747,12 @@ const fn len_utf8(code: u32) -> usize {
`
1747
1747
`}
`
1748
1748
`}
`
1749
1749
``
1750
``
`-
/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
`
``
1750
`+
#[inline]
`
``
1751
`+
const fn len_utf16(code: u32) -> usize {
`
``
1752
`+
if (code & 0xFFFF) == code { 1 } else { 2 }
`
``
1753
`+
}
`
``
1754
+
``
1755
`` +
/// Encodes a raw u32
value as UTF-8 into the provided byte buffer,
``
1751
1756
`/// and then returns the subslice of the buffer that contains the encoded character.
`
1752
1757
`///
`
1753
1758
`` /// Unlike char::encode_utf8
, this method also handles codepoints in the surrogate range.
``
`@@ -1801,7 +1806,7 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
`
1801
1806
`unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
`
1802
1807
`}
`
1803
1808
``
1804
``
`` -
/// Encodes a raw u32 value as UTF-16 into the provided u16
buffer,
``
``
1809
`` +
/// Encodes a raw u32
value as UTF-16 into the provided u16
buffer,
``
1805
1810
`/// and then returns the subslice of the buffer that contains the encoded character.
`
1806
1811
`///
`
1807
1812
`` /// Unlike char::encode_utf16
, this method also handles codepoints in the surrogate range.
``
`@@ -1812,28 +1817,33 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
`
1812
1817
`/// Panics if the buffer is not large enough.
`
1813
1818
`` /// A buffer of length 2 is large enough to encode any char
.
``
1814
1819
`#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
`
``
1820
`+
#[rustc_const_unstable(feature = "const_char_encode_utf16", issue = "130660")]
`
1815
1821
`#[doc(hidden)]
`
1816
1822
`#[inline]
`
1817
``
`-
pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
`
1818
``
`-
// SAFETY: each arm checks whether there are enough bits to write into
`
1819
``
`-
unsafe {
`
1820
``
`-
if (code & 0xFFFF) == code && !dst.is_empty() {
`
1821
``
`-
// The BMP falls through
`
1822
``
`-
*dst.get_unchecked_mut(0) = code as u16;
`
1823
``
`-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
`
1824
``
`-
} else if dst.len() >= 2 {
`
1825
``
`-
// Supplementary planes break into surrogates.
`
``
1823
`+
pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
`
``
1824
`+
const fn panic_at_const(_code: u32, _len: usize, _dst_len: usize) {
`
``
1825
`+
// Note that we cannot format in constant expressions.
`
``
1826
`+
panic!("encode_utf16: buffer does not have enough bytes to encode code point");
`
``
1827
`+
}
`
``
1828
`+
fn panic_at_rt(code: u32, len: usize, dst_len: usize) {
`
``
1829
`+
panic!(
`
``
1830
`+
"encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
`
``
1831
`+
);
`
``
1832
`+
}
`
``
1833
`+
let len = len_utf16(code);
`
``
1834
`+
match (len, &mut *dst) {
`
``
1835
`+
(1, [a, ..]) => {
`
``
1836
`+
*a = code as u16;
`
``
1837
`+
}
`
``
1838
`+
(2, [a, b, ..]) => {
`
1826
1839
` code -= 0x1_0000;
`
1827
``
`-
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
`
1828
``
`-
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
`
1829
``
`-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
`
1830
``
`-
} else {
`
1831
``
`-
panic!(
`
1832
``
`-
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
`
1833
``
`-
char::from_u32_unchecked(code).len_utf16(),
`
1834
``
`-
code,
`
1835
``
`-
dst.len(),
`
1836
``
`-
)
`
``
1840
+
``
1841
`+
*a = (code >> 10) as u16 | 0xD800;
`
``
1842
`+
*b = (code & 0x3FF) as u16 | 0xDC00;
`
1837
1843
`}
`
1838
``
`-
}
`
``
1844
`+
// FIXME(const-hack): We would prefer to have streamlined panics when formatters become const-friendly.
`
``
1845
`+
_ => const_eval_select((code, len, dst.len()), panic_at_const, panic_at_rt),
`
``
1846
`+
};
`
``
1847
`` +
// SAFETY: <&mut [u16]>::as_mut_ptr
is guaranteed to return a valid pointer and len
has been tested to be within bounds.
``
``
1848
`+
unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
`
1839
1849
`}
`