8011756: Wrong characters supported in RegExp \c escape

Reviewed-by: lagergren, attila
This commit is contained in:
Hannes Wallnöfer 2013-04-10 14:08:00 +02:00
parent 3c60983c02
commit ad7e2cd8de
3 changed files with 86 additions and 11 deletions

View File

@ -596,13 +596,14 @@ final class RegExpScanner extends Scanner {
* ABCDEFGHIJKLMNOPQRSTUVWXYZ
*/
private boolean controlLetter() {
final char c = Character.toUpperCase(ch0);
if (c >= 'A' && c <= 'Z') {
// To match other engines we also accept '0'..'9' and '_' as control letters inside a character class.
if ((ch0 >= 'A' && ch0 <= 'Z') || (ch0 >= 'a' && ch0 <= 'z')
|| (inCharClass && (isDecimalDigit(ch0) || ch0 == '_'))) {
// for some reason java regexps don't like control characters on the
// form "\\ca".match([string with ascii 1 at char0]). Translating
// them to unicode does it though.
sb.setLength(sb.length() - 1);
unicode(c - 'A' + 1, sb);
unicode(ch0 % 32, sb);
skip(1);
return true;
}
@ -621,14 +622,7 @@ final class RegExpScanner extends Scanner {
}
// ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.
if (ch0 == 'c') {
// Ignore invalid control letter escape if within a character class
if (inCharClass && ch1 != ']') {
sb.setLength(sb.length() - 1);
skip(2);
return true;
} else {
sb.append('\\'); // Treat invalid \c control sequence as \\c
}
sb.append('\\'); // Treat invalid \c control sequence as \\c
} else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
sb.setLength(sb.length() - 1);
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* JDK-8011756: Wrong characters supported in RegExp \c escape
*
* @test
* @run
*/
// Invalid control letters should be escaped:
print(/\cı/.test("\x09"));
print(/\cı/.test("\\cı"));
print(/\cſ/.test("\x13"));
print(/\cſ/.test("\\cſ"));
print(/[\cſ]/.test("\x13"));
print(/[\cſ]/.test("\\"));
print(/[\cſ]/.test("c"));
print(/[\cſ]/.test("ſ"));
print(/[\c#]/.test("\\"));
print(/[\c#]/.test("c"));
print(/[\c#]/.test("#"));
// The characters that are supported by other engines are '0'-'9', '_':
print(/[\c0]/.test("\x10"));
print(/[\c1]/.test("\x11"));
print(/[\c2]/.test("\x12"));
print(/[\c3]/.test("\x13"));
print(/[\c4]/.test("\x14"));
print(/[\c5]/.test("\x15"));
print(/[\c6]/.test("\x16"));
print(/[\c7]/.test("\x17"));
print(/[\c8]/.test("\x18"));
print(/[\c9]/.test("\x19"));
print(/[\c_]/.test("\x1F"));

View File

@ -0,0 +1,22 @@
false
true
false
true
false
true
true
true
true
true
true
true
true
true
true
true
true
true
true
true
true
true