Rollup merge of #130659 - bjoernager:const-char-encode-utf16, r=dtolnay · qinheping/verify-rust-std@337c634 (original) (raw)

`@@ -638,8 +638,7 @@ impl char {

`

638

638

`#[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]

`

639

639

`#[inline]

`

640

640

`pub const fn len_utf16(self) -> usize {

`

641

``

`-

let ch = self as u32;

`

642

``

`-

if (ch & 0xFFFF) == ch { 1 } else { 2 }

`

``

641

`+

len_utf16(self as u32)

`

643

642

`}

`

644

643

``

645

644

`/// Encodes this character as UTF-8 into the provided byte buffer,

`

`@@ -709,8 +708,9 @@ impl char {

`

709

708

`/// '𝕊'.encode_utf16(&mut b);

`

710

709

```` /// ```

````

711

710

`#[stable(feature = "unicode_encode_char", since = "1.15.0")]

`

``

711

`+

#[rustc_const_unstable(feature = "const_char_encode_utf16", issue = "130660")]

`

712

712

`#[inline]

`

713

``

`-

pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {

`

``

713

`+

pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {

`

714

714

`encode_utf16_raw(self as u32, dst)

`

715

715

`}

`

716

716

``

`@@ -1747,7 +1747,12 @@ const fn len_utf8(code: u32) -> usize {

`

1747

1747

`}

`

1748

1748

`}

`

1749

1749

``

1750

``

`-

/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,

`

``

1750

`+

#[inline]

`

``

1751

`+

const fn len_utf16(code: u32) -> usize {

`

``

1752

`+

if (code & 0xFFFF) == code { 1 } else { 2 }

`

``

1753

`+

}

`

``

1754

+

``

1755

`` +

/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,

``

1751

1756

`/// and then returns the subslice of the buffer that contains the encoded character.

`

1752

1757

`///

`

1753

1758

`` /// Unlike char::encode_utf8, this method also handles codepoints in the surrogate range.

``

`@@ -1801,7 +1806,7 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {

`

1801

1806

`unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }

`

1802

1807

`}

`

1803

1808

``

1804

``

`` -

/// Encodes a raw u32 value as UTF-16 into the provided u16 buffer,

``

``

1809

`` +

/// Encodes a raw u32 value as UTF-16 into the provided u16 buffer,

``

1805

1810

`/// and then returns the subslice of the buffer that contains the encoded character.

`

1806

1811

`///

`

1807

1812

`` /// Unlike char::encode_utf16, this method also handles codepoints in the surrogate range.

``

`@@ -1812,28 +1817,33 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {

`

1812

1817

`/// Panics if the buffer is not large enough.

`

1813

1818

`` /// A buffer of length 2 is large enough to encode any char.

``

1814

1819

`#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]

`

``

1820

`+

#[rustc_const_unstable(feature = "const_char_encode_utf16", issue = "130660")]

`

1815

1821

`#[doc(hidden)]

`

1816

1822

`#[inline]

`

1817

``

`-

pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {

`

1818

``

`-

// SAFETY: each arm checks whether there are enough bits to write into

`

1819

``

`-

unsafe {

`

1820

``

`-

if (code & 0xFFFF) == code && !dst.is_empty() {

`

1821

``

`-

// The BMP falls through

`

1822

``

`-

*dst.get_unchecked_mut(0) = code as u16;

`

1823

``

`-

slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)

`

1824

``

`-

} else if dst.len() >= 2 {

`

1825

``

`-

// Supplementary planes break into surrogates.

`

``

1823

`+

pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {

`

``

1824

`+

const fn panic_at_const(_code: u32, _len: usize, _dst_len: usize) {

`

``

1825

`+

// Note that we cannot format in constant expressions.

`

``

1826

`+

panic!("encode_utf16: buffer does not have enough bytes to encode code point");

`

``

1827

`+

}

`

``

1828

`+

fn panic_at_rt(code: u32, len: usize, dst_len: usize) {

`

``

1829

`+

panic!(

`

``

1830

`+

"encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",

`

``

1831

`+

);

`

``

1832

`+

}

`

``

1833

`+

let len = len_utf16(code);

`

``

1834

`+

match (len, &mut *dst) {

`

``

1835

`+

(1, [a, ..]) => {

`

``

1836

`+

*a = code as u16;

`

``

1837

`+

}

`

``

1838

`+

(2, [a, b, ..]) => {

`

1826

1839

` code -= 0x1_0000;

`

1827

``

`-

*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);

`

1828

``

`-

*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);

`

1829

``

`-

slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)

`

1830

``

`-

} else {

`

1831

``

`-

panic!(

`

1832

``

`-

"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",

`

1833

``

`-

char::from_u32_unchecked(code).len_utf16(),

`

1834

``

`-

code,

`

1835

``

`-

dst.len(),

`

1836

``

`-

)

`

``

1840

+

``

1841

`+

*a = (code >> 10) as u16 | 0xD800;

`

``

1842

`+

*b = (code & 0x3FF) as u16 | 0xDC00;

`

1837

1843

`}

`

1838

``

`-

}

`

``

1844

`+

// FIXME(const-hack): We would prefer to have streamlined panics when formatters become const-friendly.

`

``

1845

`+

_ => const_eval_select((code, len, dst.len()), panic_at_const, panic_at_rt),

`

``

1846

`+

};

`

``

1847

`` +

// SAFETY: <&mut [u16]>::as_mut_ptr is guaranteed to return a valid pointer and len has been tested to be within bounds.

``

``

1848

`+

unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }

`

1839

1849

`}

`