Rollup merge of #130659 - bjoernager:const-char-encode-utf16, r=dtolnay · qinheping/verify-rust-std@337c634 (original) (raw)

`@@ -638,8 +638,7 @@ impl char {

638

`#[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]

639

`#[inline]

640

`pub const fn len_utf16(self) -> usize {

641

let ch = self as u32;

642

if (ch & 0xFFFF) == ch { 1 } else { 2 }

641

len_utf16(self as u32)

643

642

644

643

645

644

`/// Encodes this character as UTF-8 into the provided byte buffer,

`@@ -709,8 +708,9 @@ impl char {

709

708

`/// '𝕊'.encode_utf16(&mut b);

710

709

```` /// ```

````

711

710

`#[stable(feature = "unicode_encode_char", since = "1.15.0")]

711

#[rustc_const_unstable(feature = "const_char_encode_utf16", issue = "130660")]

712

`#[inline]

713

pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {

713

pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {

714

`encode_utf16_raw(self as u32, dst)

715

716

`@@ -1747,7 +1747,12 @@ const fn len_utf8(code: u32) -> usize {

1747

1748

1749

1750

/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,

1750

#[inline]

1751

const fn len_utf16(code: u32) -> usize {

1752

if (code & 0xFFFF) == code { 1 } else { 2 }

1753

}

1754

+

1755

`` +

/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,

1751

1756

`/// and then returns the subslice of the buffer that contains the encoded character.

1752

1757

`///

1753

1758

`` /// Unlike char::encode_utf8, this method also handles codepoints in the surrogate range.

`@@ -1801,7 +1806,7 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {

1801

1806

`unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }

1802

1807

1803

1808

1804

`` -

/// Encodes a raw u32 value as UTF-16 into the provided u16 buffer,

1809

`` +

/// Encodes a raw u32 value as UTF-16 into the provided u16 buffer,

1805

1810

`/// and then returns the subslice of the buffer that contains the encoded character.

1806

1811

`///

1807

1812

`` /// Unlike char::encode_utf16, this method also handles codepoints in the surrogate range.

`@@ -1812,28 +1817,33 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {

1812

1817

`/// Panics if the buffer is not large enough.

1813

1818

`` /// A buffer of length 2 is large enough to encode any char.

1814

1819

`#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]

1820

#[rustc_const_unstable(feature = "const_char_encode_utf16", issue = "130660")]

1815

1821

`#[doc(hidden)]

1816

1822

`#[inline]

1817

pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {

1818

// SAFETY: each arm checks whether there are enough bits to write into

1819

unsafe {

1820

if (code & 0xFFFF) == code && !dst.is_empty() {

1821

// The BMP falls through

1822

*dst.get_unchecked_mut(0) = code as u16;

1823

slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)

1824

} else if dst.len() >= 2 {

1825

// Supplementary planes break into surrogates.

1823

pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {

1824

const fn panic_at_const(_code: u32, _len: usize, _dst_len: usize) {

1825

// Note that we cannot format in constant expressions.

1826

panic!("encode_utf16: buffer does not have enough bytes to encode code point");

1827

}

1828

fn panic_at_rt(code: u32, len: usize, dst_len: usize) {

1829

panic!(

1830

"encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",

1831

);

1832

}

1833

let len = len_utf16(code);

1834

match (len, &mut *dst) {

1835

(1, [a, ..]) => {

1836

*a = code as u16;

1837

}

1838

(2, [a, b, ..]) => {

1826

1839

` code -= 0x1_0000;

1827

*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);

1828

*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);

1829

slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)

1830

} else {

1831

panic!(

1832

"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",

1833

char::from_u32_unchecked(code).len_utf16(),

1834

code,

1835

dst.len(),

1836

)

1840

+

1841

*a = (code >> 10) as u16 | 0xD800;

1842

*b = (code & 0x3FF) as u16 | 0xDC00;

1837

1843

1838

}

1844

// FIXME(const-hack): We would prefer to have streamlined panics when formatters become const-friendly.

1845

_ => const_eval_select((code, len, dst.len()), panic_at_const, panic_at_rt),

1846

};

1847

`` +

// SAFETY: <&mut [u16]>::as_mut_ptr is guaranteed to return a valid pointer and len has been tested to be within bounds.

1848

unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }

1839

1849