mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 20:18:48 +00:00
8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
Reviewed-by: jlaskey, lagergren
This commit is contained in:
parent
35f9ab2054
commit
bc7905ab3a
@ -26,11 +26,10 @@
|
||||
package jdk.nashorn.internal.runtime.regexp;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import jdk.nashorn.internal.parser.Lexer;
|
||||
@ -58,7 +57,7 @@ final class RegExpScanner extends Scanner {
|
||||
private final List<Capture> caps = new LinkedList<>();
|
||||
|
||||
/** Forward references to capturing parenthesis to be resolved later.*/
|
||||
private final Set<Integer> forwardReferences = new LinkedHashSet<>();
|
||||
private final LinkedList<Integer> forwardReferences = new LinkedList<>();
|
||||
|
||||
/** Current level of zero-width negative lookahead assertions. */
|
||||
private int negativeLookaheadLevel;
|
||||
@ -104,10 +103,20 @@ final class RegExpScanner extends Scanner {
|
||||
return;
|
||||
}
|
||||
|
||||
for (final Integer ref : forwardReferences) {
|
||||
if (ref.intValue() > caps.size()) {
|
||||
neverMatches = true;
|
||||
break;
|
||||
Iterator<Integer> iterator = forwardReferences.descendingIterator();
|
||||
while (iterator.hasNext()) {
|
||||
final int pos = iterator.next();
|
||||
final int num = iterator.next();
|
||||
if (num > caps.size()) {
|
||||
// Non-existing reference should never match, if smaller than 8 convert to octal escape
|
||||
// to be compatible with other engines.
|
||||
if (num < 8) {
|
||||
String escape = "\\x0" + num;
|
||||
sb.insert(pos, escape);
|
||||
} else {
|
||||
neverMatches = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -402,6 +411,10 @@ final class RegExpScanner extends Scanner {
|
||||
if (ch0 == '}') {
|
||||
pop('}');
|
||||
commit(1);
|
||||
} else {
|
||||
// Bad quantifier should be rejected but is accepted by all major engines
|
||||
restart(startIn, startOut);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -637,7 +650,16 @@ final class RegExpScanner extends Scanner {
|
||||
throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException
|
||||
}
|
||||
// ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.
|
||||
if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
|
||||
if (ch0 == 'c') {
|
||||
// Ignore invalid control letter escape if within a character class
|
||||
if (inCharClass && ch1 != ']') {
|
||||
sb.setLength(sb.length() - 1);
|
||||
skip(2);
|
||||
return true;
|
||||
} else {
|
||||
sb.append('\\'); // Treat invalid \c control sequence as \\c
|
||||
}
|
||||
} else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
|
||||
sb.setLength(sb.length() - 1);
|
||||
}
|
||||
return commit(1);
|
||||
@ -677,8 +699,9 @@ final class RegExpScanner extends Scanner {
|
||||
// Forward reference to a capture group. Forward references are always undefined so we
|
||||
// can omit it from the output buffer. Additionally, if the capture group does not exist
|
||||
// the whole regexp becomes invalid, so register the reference for later processing.
|
||||
forwardReferences.add(num);
|
||||
sb.setLength(sb.length() - 1);
|
||||
forwardReferences.add(num);
|
||||
forwardReferences.add(sb.length());
|
||||
skip(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
93
nashorn/test/script/basic/JDK-8009230.js
Normal file
93
nashorn/test/script/basic/JDK-8009230.js
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* JDK-8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
|
||||
*
|
||||
* @test
|
||||
* @run
|
||||
*/
|
||||
|
||||
|
||||
// Invalid ControlEscape/IdentityEscape character treated as literal.
|
||||
print(/\z/.exec("z")); // Invalid escape, same as /z/
|
||||
// Incomplete/Invalid ControlEscape treated as "\\c"
|
||||
print(/\c/.exec("\\c")); // same as /\\c/
|
||||
print(/\c2/.exec("\\c2")); // same as /\\c2/
|
||||
print(/\C/.exec("C")); // same as /C/
|
||||
print(/\C2/.exec("C2")); // same as /C2/
|
||||
// Incomplete HexEscapeSequence escape treated as "x".
|
||||
print(/\x/.exec("x")); // incomplete x-escape
|
||||
print(/\x1/.exec("x1")); // incomplete x-escape
|
||||
print(/\x1z/.exec("x1z")); // incomplete x-escape
|
||||
// Incomplete UnicodeEscapeSequence escape treated as "u".
|
||||
print(/\u/.exec("u")); // incomplete u-escape
|
||||
print(/\uz/.exec("uz")); // incomplete u-escape
|
||||
print(/\u1/.exec("u1")); // incomplete u-escape
|
||||
print(/\u1z/.exec("u1z")); // incomplete u-escape
|
||||
print(/\u12/.exec("u12")); // incomplete u-escape
|
||||
print(/\u12z/.exec("u12z")); // incomplete u-escape
|
||||
print(/\u123/.exec("u123")); // incomplete u-escape
|
||||
print(/\u123z/.exec("u123z")); // incomplete u-escape
|
||||
// Bad quantifier range:
|
||||
print(/x{z/.exec("x{z")); // same as /x\{z/
|
||||
print(/x{1z/.exec("x{1z")); // same as /x\{1z/
|
||||
print(/x{1,z/.exec("x{1,z")); // same as /x\{1,z/
|
||||
print(/x{1,2z/.exec("x{1,2z")); // same as /x\{1,2z/
|
||||
print(/x{10000,20000z/.exec("x{10000,20000z")); // same as /x\{10000,20000z/
|
||||
// Notice: It needs arbitrary lookahead to determine the invalidity,
|
||||
// except Mozilla that limits the numbers.
|
||||
|
||||
// Zero-initialized Octal escapes.
|
||||
/\012/; // same as /\x0a/
|
||||
|
||||
// Nonexisting back-references smaller than 8 treated as octal escapes:
|
||||
print(/\5/.exec("\u0005")); // same as /\x05/
|
||||
print(/\7/.exec("\u0007")); // same as /\x07/
|
||||
print(/\8/.exec("\u0008")); // does not match
|
||||
|
||||
// Invalid PatternCharacter accepted unescaped
|
||||
print(/]/.exec("]"));
|
||||
print(/{/.exec("{"));
|
||||
print(/}/.exec("}"));
|
||||
|
||||
// Bad escapes also inside CharacterClass.
|
||||
print(/[\z]/.exec("z"));
|
||||
print(/[\c]/.exec("c"));
|
||||
print(/[\c2]/.exec("c"));
|
||||
print(/[\x]/.exec("x"));
|
||||
print(/[\x1]/.exec("x1"));
|
||||
print(/[\x1z]/.exec("x1z"));
|
||||
print(/[\u]/.exec("u"));
|
||||
print(/[\uz]/.exec("u"));
|
||||
print(/[\u1]/.exec("u"));
|
||||
print(/[\u1z]/.exec("u"));
|
||||
print(/[\u12]/.exec("u"));
|
||||
print(/[\u12z]/.exec("u"));
|
||||
print(/[\u123]/.exec("u"));
|
||||
print(/[\u123z]/.exec("u"));
|
||||
print(/[\012]/.exec("0"));
|
||||
print(/[\5]/.exec("5"));
|
||||
// And in addition:
|
||||
print(/[\B]/.exec("B"));
|
||||
print(/()()[\2]/.exec("")); // Valid backreference should be invalid.
|
||||
45
nashorn/test/script/basic/JDK-8009230.js.EXPECTED
Normal file
45
nashorn/test/script/basic/JDK-8009230.js.EXPECTED
Normal file
@ -0,0 +1,45 @@
|
||||
z
|
||||
\c
|
||||
\c2
|
||||
C
|
||||
C2
|
||||
x
|
||||
x1
|
||||
x1z
|
||||
u
|
||||
uz
|
||||
u1
|
||||
u1z
|
||||
u12
|
||||
u12z
|
||||
u123
|
||||
u123z
|
||||
x{z
|
||||
x{1z
|
||||
x{1,z
|
||||
x{1,2z
|
||||
x{10000,20000z
|
||||
|
||||
|
||||
null
|
||||
]
|
||||
{
|
||||
}
|
||||
z
|
||||
c
|
||||
null
|
||||
x
|
||||
x
|
||||
x
|
||||
u
|
||||
u
|
||||
u
|
||||
u
|
||||
u
|
||||
u
|
||||
u
|
||||
u
|
||||
null
|
||||
null
|
||||
B
|
||||
null
|
||||
Loading…
x
Reference in New Issue
Block a user