8023650: Regexp m flag does not recognize CRNL or CR

Reviewed-by: jlaskey, lagergren
This commit is contained in:
Hannes Wallnöfer 2013-08-26 15:59:41 +02:00
parent 0863203de8
commit c10f9ffaeb
7 changed files with 126 additions and 30 deletions

View File

@ -26,7 +26,6 @@ import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isCrnl;
import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
@ -500,7 +499,7 @@ class ByteCodeMachine extends StackMachine {
private void opAnyChar() {
if (s >= range) {opFail(); return;}
if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;}
if (isNewLine(chars[s])) {opFail(); return;}
s++;
sprev = sbegin; // break;
}
@ -538,7 +537,7 @@ class ByteCodeMachine extends StackMachine {
while (s < range) {
char b = chars[s];
if (c == b) pushAlt(ip + 1, s, sprev);
if (b == EncodingHelper.NEW_LINE) {opFail(); return;}
if (isNewLine(b)) {opFail(); return;}
sprev = s;
s++;
}
@ -617,7 +616,7 @@ class ByteCodeMachine extends StackMachine {
if (s == str) {
if (isNotBol(msaOptions)) opFail();
return;
} else if (EncodingHelper.isNewLine(chars, sprev, end) && s != end) {
} else if (isNewLine(chars, sprev, end) && s != end) {
return;
}
opFail();
@ -626,7 +625,7 @@ class ByteCodeMachine extends StackMachine {
private void opEndLine() {
if (s == end) {
if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
if (str == end || !EncodingHelper.isNewLine(chars, sprev, end)) {
if (str == end || !isNewLine(chars, sprev, end)) {
if (isNotEol(msaOptions)) opFail();
}
return;
@ -634,7 +633,7 @@ class ByteCodeMachine extends StackMachine {
if (isNotEol(msaOptions)) opFail();
return;
}
} else if (isNewLine(chars, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end))) {
} else if (isNewLine(chars, s, end)) {
return;
}
opFail();
@ -653,9 +652,6 @@ class ByteCodeMachine extends StackMachine {
}
} else if (isNewLine(chars, s, end) && s + 1 == end) {
return;
} else if (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end)) {
int ss = s + 2;
if (ss == end) return;
}
opFail();
}

View File

@ -29,7 +29,6 @@ public interface Config {
final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30);
final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN;
final boolean USE_CRNL_AS_LINE_TERMINATOR = false;
final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */

View File

@ -24,10 +24,12 @@ import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
import java.util.Arrays;
public class EncodingHelper {
public final class EncodingHelper {
public final static char NEW_LINE = 0xa;
public final static char RETURN = 0xd;
final static int NEW_LINE = 0x000a;
final static int RETURN = 0x000d;
final static int LINE_SEPARATOR = 0x2028;
final static int PARAGRAPH_SEPARATOR = 0x2029;
final static char[] EMPTYCHARS = new char[0];
final static int[][] codeRanges = new int[15][];
@ -64,15 +66,11 @@ public class EncodingHelper {
}
public static boolean isNewLine(int code) {
return code == NEW_LINE;
return code == NEW_LINE || code == RETURN || code == LINE_SEPARATOR || code == PARAGRAPH_SEPARATOR;
}
public static boolean isNewLine(char[] chars, int p, int end) {
return p < end && chars[p] == NEW_LINE;
}
public static boolean isCrnl(char[] chars, int p, int end) {
return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE;
return p < end && isNewLine(chars[p]);
}
// Encoding.prevCharHead
@ -194,7 +192,7 @@ public class EncodingHelper {
int type;
switch (ctype) {
case CharacterType.NEWLINE:
return code == EncodingHelper.NEW_LINE;
return isNewLine(code);
case CharacterType.ALPHA:
return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
case CharacterType.BLANK:

View File

@ -732,7 +732,7 @@ class Lexer extends ScannerSupport {
if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
break;
case '$':
if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
break;
case '[':
if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN;

View File

@ -141,7 +141,7 @@ public abstract class Matcher extends IntHolder {
continue retry;
}
}
} else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
} else if (!EncodingHelper.isNewLine(chars, p, end)) {
//if () break;
// goto retry_gate;
pprev = p;
@ -226,7 +226,7 @@ public abstract class Matcher extends IntHolder {
continue retry;
}
}
} else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
} else if (!EncodingHelper.isNewLine(chars, p, end)) {
p = EncodingHelper.prevCharHead(adjrange, p);
if (p == -1) return false;
continue retry;
@ -330,12 +330,6 @@ public abstract class Matcher extends IntHolder {
maxSemiEnd = end;
if (EncodingHelper.isNewLine(chars, preEnd, end)) {
minSemiEnd = preEnd;
if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
preEnd = EncodingHelper.stepBack(str, preEnd, 1);
if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) {
minSemiEnd = preEnd;
}
}
if (minSemiEnd > str && start <= minSemiEnd) {
// !goto end_buf;!
if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;

View File

@ -445,7 +445,7 @@ public class Shell {
continue;
}
if (res != null && res != ScriptRuntime.UNDEFINED) {
if (res != ScriptRuntime.UNDEFINED) {
err.println(JSType.toString(res));
}
}

View File

@ -0,0 +1,109 @@
/*
* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* JDK-8023650: Regexp m flag does not recognize CRNL or CR
*
* @test
* @run
*/
if (!/^Connection: close$/m.test('\r\n\r\nConnection: close\r\n\r\n')) {
throw new Error();
}
if (!/^Connection: close$/m.test('\n\nConnection: close\n\n')) {
throw new Error();
}
if (!/^Connection: close$/m.test('\r\rConnection: close\r\r')) {
throw new Error();
}
if (!/^Connection: close$/m.test('\u2028\u2028Connection: close\u2028\u2028')) {
throw new Error();
}
if (!/^Connection: close$/m.test('\u2029\u2029Connection: close\u2029\u2029')) {
throw new Error();
}
var result = /a(.*)/.exec("a\r");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
result = /a(.*)/m.exec("a\r");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
result = /a(.*)/.exec("a\n");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
result = /a(.*)/m.exec("a\n");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
result = /a(.*)/.exec("a\r\n");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
result = /a(.*)/m.exec("a\r\n");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
result = /a(.*)/.exec("a\u2028");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
result = /a(.*)/m.exec("a\u2029");
if (!result || result[0] != 'a' || result[1] != '') {
throw new Error();
}
if (/a$/.test("a\n")) {
throw new Error();
}
if (/a$/.test("a\r")) {
throw new Error();
}
if (/a$/.test("a\r\n")) {
throw new Error();
}
if (/a$/.test("a\u2028")) {
throw new Error();
}
if (/a$/.test("a\u2029")) {
throw new Error();
}