mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-14 18:03:44 +00:00
8357289: Break down the String constructor into smaller methods
Reviewed-by: liach, rriggs
This commit is contained in:
parent
ecd2d83096
commit
839cede1a4
@ -553,135 +553,125 @@ public final class String
|
||||
* disambiguate it against other similar methods of this class.
|
||||
*/
|
||||
private String(Charset charset, byte[] bytes, int offset, int length) {
|
||||
String str;
|
||||
if (length == 0) {
|
||||
this.value = "".value;
|
||||
this.coder = "".coder;
|
||||
str = "";
|
||||
} else if (charset == UTF_8.INSTANCE) {
|
||||
if (COMPACT_STRINGS) {
|
||||
int dp = StringCoding.countPositives(bytes, offset, length);
|
||||
if (dp == length) {
|
||||
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
str = utf8(bytes, offset, length);
|
||||
} else if (charset == ISO_8859_1.INSTANCE) {
|
||||
str = iso88591(bytes, offset, length);
|
||||
} else if (charset == US_ASCII.INSTANCE) {
|
||||
str = ascii(bytes, offset, length);
|
||||
} else {
|
||||
str = decode(charset, bytes, offset, length);
|
||||
}
|
||||
this(str);
|
||||
}
|
||||
|
||||
private static String utf8(byte[] bytes, int offset, int length) {
|
||||
if (COMPACT_STRINGS) {
|
||||
int dp = StringCoding.countPositives(bytes, offset, length);
|
||||
if (dp == length) {
|
||||
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
|
||||
}
|
||||
// Decode with a stable copy, to be the result if the decoded length is the same
|
||||
byte[] latin1 = Arrays.copyOfRange(bytes, offset, offset + length);
|
||||
int sp = dp; // first dp bytes are already in the copy
|
||||
while (sp < length) {
|
||||
int b1 = latin1[sp++];
|
||||
if (b1 >= 0) {
|
||||
latin1[dp++] = (byte) b1;
|
||||
continue;
|
||||
}
|
||||
// Decode with a stable copy, to be the result if the decoded length is the same
|
||||
byte[] latin1 = Arrays.copyOfRange(bytes, offset, offset + length);
|
||||
int sp = dp; // first dp bytes are already in the copy
|
||||
while (sp < length) {
|
||||
int b1 = latin1[sp++];
|
||||
if (b1 >= 0) {
|
||||
latin1[dp++] = (byte)b1;
|
||||
if ((b1 & 0xfe) == 0xc2 && sp < length) { // b1 either 0xc2 or 0xc3
|
||||
int b2 = latin1[sp];
|
||||
if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
|
||||
latin1[dp++] = (byte) decode2(b1, b2);
|
||||
sp++;
|
||||
continue;
|
||||
}
|
||||
if ((b1 & 0xfe) == 0xc2 && sp < length) { // b1 either 0xc2 or 0xc3
|
||||
int b2 = latin1[sp];
|
||||
if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
|
||||
latin1[dp++] = (byte)decode2(b1, b2);
|
||||
sp++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// anything not a latin1, including the REPL
|
||||
// we have to go with the utf16
|
||||
sp--;
|
||||
break;
|
||||
}
|
||||
if (sp == length) {
|
||||
if (dp != latin1.length) {
|
||||
latin1 = Arrays.copyOf(latin1, dp);
|
||||
}
|
||||
this.value = latin1;
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
byte[] utf16 = StringUTF16.newBytesFor(length);
|
||||
StringLatin1.inflate(latin1, 0, utf16, 0, dp);
|
||||
dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true);
|
||||
if (dp != length) {
|
||||
utf16 = Arrays.copyOf(utf16, dp << 1);
|
||||
}
|
||||
this.value = utf16;
|
||||
this.coder = UTF16;
|
||||
} else { // !COMPACT_STRINGS
|
||||
byte[] dst = StringUTF16.newBytesFor(length);
|
||||
int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
|
||||
if (dp != length) {
|
||||
dst = Arrays.copyOf(dst, dp << 1);
|
||||
}
|
||||
this.value = dst;
|
||||
this.coder = UTF16;
|
||||
// anything not a latin1, including the REPL
|
||||
// we have to go with the utf16
|
||||
sp--;
|
||||
break;
|
||||
}
|
||||
} else if (charset == ISO_8859_1.INSTANCE) {
|
||||
if (COMPACT_STRINGS) {
|
||||
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
|
||||
this.coder = LATIN1;
|
||||
} else {
|
||||
this.value = StringLatin1.inflate(bytes, offset, length);
|
||||
this.coder = UTF16;
|
||||
}
|
||||
} else if (charset == US_ASCII.INSTANCE) {
|
||||
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
|
||||
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
|
||||
this.coder = LATIN1;
|
||||
} else {
|
||||
byte[] dst = StringUTF16.newBytesFor(length);
|
||||
int dp = 0;
|
||||
while (dp < length) {
|
||||
int b = bytes[offset++];
|
||||
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
|
||||
if (sp == length) {
|
||||
if (dp != latin1.length) {
|
||||
latin1 = Arrays.copyOf(latin1, dp);
|
||||
}
|
||||
this.value = dst;
|
||||
this.coder = UTF16;
|
||||
return new String(latin1, LATIN1);
|
||||
}
|
||||
} else {
|
||||
// (1)We never cache the "external" cs, the only benefit of creating
|
||||
// an additional StringDe/Encoder object to wrap it is to share the
|
||||
// de/encode() method. These SD/E objects are short-lived, the young-gen
|
||||
// gc should be able to take care of them well. But the best approach
|
||||
// is still not to generate them if not really necessary.
|
||||
// (2)The defensive copy of the input byte/char[] has a big performance
|
||||
// impact, as well as the outgoing result byte/char[]. Need to do the
|
||||
// optimization check of (sm==null && classLoader0==null) for both.
|
||||
CharsetDecoder cd = charset.newDecoder();
|
||||
// ArrayDecoder fastpaths
|
||||
if (cd instanceof ArrayDecoder ad) {
|
||||
// ascii
|
||||
if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
|
||||
if (COMPACT_STRINGS) {
|
||||
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
this.value = StringLatin1.inflate(bytes, offset, length);
|
||||
this.coder = UTF16;
|
||||
return;
|
||||
}
|
||||
byte[] utf16 = StringUTF16.newBytesFor(length);
|
||||
StringLatin1.inflate(latin1, 0, utf16, 0, dp);
|
||||
dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true);
|
||||
if (dp != length) {
|
||||
utf16 = Arrays.copyOf(utf16, dp << 1);
|
||||
}
|
||||
return new String(utf16, UTF16);
|
||||
} else { // !COMPACT_STRINGS
|
||||
byte[] dst = StringUTF16.newBytesFor(length);
|
||||
int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
|
||||
if (dp != length) {
|
||||
dst = Arrays.copyOf(dst, dp << 1);
|
||||
}
|
||||
return new String(dst, UTF16);
|
||||
}
|
||||
}
|
||||
|
||||
private static String iso88591(byte[] bytes, int offset, int length) {
|
||||
if (COMPACT_STRINGS) {
|
||||
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
|
||||
} else {
|
||||
return new String(StringLatin1.inflate(bytes, offset, length), UTF16);
|
||||
}
|
||||
}
|
||||
|
||||
private static String ascii(byte[] bytes, int offset, int length) {
|
||||
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
|
||||
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
|
||||
} else {
|
||||
byte[] dst = StringUTF16.newBytesFor(length);
|
||||
int dp = 0;
|
||||
while (dp < length) {
|
||||
int b = bytes[offset++];
|
||||
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
|
||||
}
|
||||
return new String(dst, UTF16);
|
||||
}
|
||||
}
|
||||
|
||||
private static String decode(Charset charset, byte[] bytes, int offset, int length) {
|
||||
// (1)We never cache the "external" cs, the only benefit of creating
|
||||
// an additional StringDe/Encoder object to wrap it is to share the
|
||||
// de/encode() method. These SD/E objects are short-lived, the young-gen
|
||||
// gc should be able to take care of them well. But the best approach
|
||||
// is still not to generate them if not really necessary.
|
||||
// (2)The defensive copy of the input byte/char[] has a big performance
|
||||
// impact, as well as the outgoing result byte/char[]. Need to do the
|
||||
// optimization check of (sm==null && classLoader0==null) for both.
|
||||
CharsetDecoder cd = charset.newDecoder();
|
||||
// ArrayDecoder fastpaths
|
||||
if (cd instanceof ArrayDecoder ad) {
|
||||
// ascii
|
||||
if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
|
||||
return iso88591(bytes, offset, length);
|
||||
} else {
|
||||
// fastpath for always Latin1 decodable single byte
|
||||
if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
|
||||
byte[] dst = new byte[length];
|
||||
ad.decodeToLatin1(bytes, offset, length, dst);
|
||||
this.value = dst;
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
return new String(dst, LATIN1);
|
||||
} else {
|
||||
int en = scale(length, cd.maxCharsPerByte());
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
char[] ca = new char[en];
|
||||
int clen = ad.decode(bytes, offset, length, ca);
|
||||
return new String(ca, 0, clen, null);
|
||||
}
|
||||
|
||||
int en = scale(length, cd.maxCharsPerByte());
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
char[] ca = new char[en];
|
||||
int clen = ad.decode(bytes, offset, length, ca);
|
||||
if (COMPACT_STRINGS) {
|
||||
byte[] val = StringUTF16.compress(ca, 0, clen);;
|
||||
this.coder = StringUTF16.coderFromArrayLen(val, clen);
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
coder = UTF16;
|
||||
value = StringUTF16.toBytes(ca, 0, clen);
|
||||
return;
|
||||
}
|
||||
|
||||
} else {
|
||||
// decode using CharsetDecoder
|
||||
int en = scale(length, cd.maxCharsPerByte());
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
@ -694,14 +684,7 @@ public final class String
|
||||
// Substitution is enabled, so this shouldn't happen
|
||||
throw new Error(x);
|
||||
}
|
||||
if (COMPACT_STRINGS) {
|
||||
byte[] val = StringUTF16.compress(ca, 0, caLen);
|
||||
this.coder = StringUTF16.coderFromArrayLen(val, caLen);
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
coder = UTF16;
|
||||
value = StringUTF16.toBytes(ca, 0, caLen);
|
||||
return new String(ca, 0, caLen, null);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user