Fix reading comments with UTF chars (fixes #238) (#240) · codehaus-plexus/plexus-utils@32b72a6 (original) (raw)

`@@ -2981,8 +2981,8 @@ private void parseComment()

`

2981

2981

`// implements XML 1.0 Section 2.5 Comments

`

2982

2982

``

2983

2983

`// ASSUMPTION: seen <!-

`

2984

``

`-

char ch = more();

`

2985

``

`-

if ( ch != '-' )

`

``

2984

`+

char cch = more();

`

``

2985

`+

if ( cch != '-' )

`

2986

2986

`throw new XmlPullParserException( "expected <!-- for comment start", this, null );

`

2987

2987

`if ( tokenize )

`

2988

2988

`posStart = pos;

`

`@@ -2999,7 +2999,19 @@ private void parseComment()

`

2999

2999

`while ( true )

`

3000

3000

` {

`

3001

3001

`// scan until it hits -->

`

3002

``

`-

ch = more();

`

``

3002

`+

cch = more();

`

``

3003

`+

int ch;

`

``

3004

`+

char cch2;

`

``

3005

`+

if ( Character.isHighSurrogate( cch ) )

`

``

3006

`+

{

`

``

3007

`+

cch2 = more();

`

``

3008

`+

ch = Character.toCodePoint( cch, cch2 );

`

``

3009

`+

}

`

``

3010

`+

else

`

``

3011

`+

{

`

``

3012

`+

cch2 = 0;

`

``

3013

`+

ch = cch;

`

``

3014

`+

}

`

3003

3015

`if ( seenDashDash && ch != '>' )

`

3004

3016

` {

`

3005

3017

`throw new XmlPullParserException( "in comment after two dashes (--) next character must be >"

`

`@@ -3074,7 +3086,11 @@ else if ( ch == '\n' )

`

3074

3086

` {

`

3075

3087

`if ( pcEnd >= pc.length )

`

3076

3088

`ensurePC( pcEnd );

`

3077

``

`-

pc[pcEnd++] = ch;

`

``

3089

`+

pc[pcEnd++] = cch;

`

``

3090

`+

if ( cch2 != 0 )

`

``

3091

`+

{

`

``

3092

`+

pc[pcEnd++] = cch2;

`

``

3093

`+

}

`

3078

3094

` }

`

3079

3095

`normalizedCR = false;

`

3080

3096

` }

`

`@@ -4153,7 +4169,7 @@ private static boolean isS( char ch )

`

4153

4169

`// ch != '\u0000' ch < '\uFFFE'

`

4154

4170

``

4155

4171

`// private char printable(char ch) { return ch; }

`

4156

``

`-

private static String printable( char ch )

`

``

4172

`+

private static String printable( int ch )

`

4157

4173

` {

`

4158

4174

`if ( ch == '\n' )

`

4159

4175

` {

`

`@@ -4175,18 +4191,25 @@ else if ( ch == ''' )

`

4175

4191

` {

`

4176

4192

`return "\u" + Integer.toHexString( ch );

`

4177

4193

` }

`

4178

``

`-

return "" + ch;

`

``

4194

`+

if ( Character.isBmpCodePoint( ch ) )

`

``

4195

`+

{

`

``

4196

`+

return Character.toString( ( char ) ch );

`

``

4197

`+

}

`

``

4198

`+

else

`

``

4199

`+

{

`

``

4200

`+

return new String( new char[] { Character.highSurrogate( ch ), Character.lowSurrogate( ch ) } );

`

``

4201

`+

}

`

4179

4202

` }

`

4180

4203

``

4181

4204

`private static String printable( String s )

`

4182

4205

` {

`

4183

4206

`if ( s == null )

`

4184

4207

`return null;

`

4185

``

`-

final int sLen = s.length();

`

``

4208

`+

final int sLen = s.codePointCount(0, s.length());

`

4186

4209

`StringBuilder buf = new StringBuilder( sLen + 10 );

`

4187

4210

`for ( int i = 0; i < sLen; ++i )

`

4188

4211

` {

`

4189

``

`-

buf.append( printable( s.charAt( i ) ) );

`

``

4212

`+

buf.append( printable( s.codePointAt( i ) ) );

`

4190

4213

` }

`

4191

4214

`s = buf.toString();

`

4192

4215

`return s;

`