From e2a461bddeade1666fe15fb17cba8c9f4c5e7dab Mon Sep 17 00:00:00 2001 From: Nizar Benalla Date: Wed, 26 Mar 2025 12:59:25 +0000 Subject: [PATCH] 8351332: Line breaks in search tag descriptions corrupt JSON search index Reviewed-by: hannesw, liach --- .../formats/html/HtmlDocletWriter.java | 2 +- .../formats/html/taglets/IndexTaglet.java | 4 +- .../formats/html/taglets/SpecTaglet.java | 2 +- .../internal/doclets/toolkit/util/Utils.java | 11 ++- .../TestIndexLineBreaks.java | 83 +++++++++++++++++++ 5 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 test/langtools/jdk/javadoc/doclet/testIndexLineBreaks/TestIndexLineBreaks.java diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java index b83a60bbd81..448a68fd5a1 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java @@ -1890,7 +1890,7 @@ public abstract class HtmlDocletWriter { } // Generate index item if (!headingContent.isEmpty() && configuration.indexBuilder != null) { - String tagText = headingContent.replaceAll("\\s+", " "); + String tagText = utils.normalizeWhitespace(headingContent); IndexItem item = IndexItem.of(element, node, tagText, getTagletWriterInstance(context).getHolderName(element), "", diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/IndexTaglet.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/IndexTaglet.java index 1ce2384067c..9e7c9acc7bb 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/IndexTaglet.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/IndexTaglet.java @@ -58,10 +58,10 @@ public class IndexTaglet extends BaseTaglet { if (tagText.charAt(0) == '"' && tagText.charAt(tagText.length() - 1) == '"') { tagText = tagText.substring(1, tagText.length() - 1); } - tagText = tagText.replaceAll("\\s+", " "); + tagText = utils.normalizeWhitespace(tagText); Content desc = tagletWriter.htmlWriter.commentTagsToContent(element, indexTree.getDescription(), context.within(indexTree)); - String descText = extractText(desc); + String descText = utils.normalizeWhitespace(extractText(desc)); return tagletWriter.createAnchorAndSearchIndex(element, tagText, descText, tag); } diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/SpecTaglet.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/SpecTaglet.java index 6baca04b667..82d37cfad8a 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/SpecTaglet.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/taglets/SpecTaglet.java @@ -123,7 +123,7 @@ public class SpecTaglet extends BaseTaglet implements InheritableTaglet { List specTreeLabel = specTree.getTitle(); Content label = htmlWriter.commentTagsToContent(holder, specTreeLabel, tagletWriter.context.isFirstSentence); return getExternalSpecContent(holder, specTree, specTreeURL, - textOf(label).replaceAll("\\s+", " "), label); + utils.normalizeWhitespace(textOf(label)), label); } // this is here, for now, but might be a useful addition elsewhere, diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/toolkit/util/Utils.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/toolkit/util/Utils.java index 0cdf04fb779..f82c6e5c0d6 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/toolkit/util/Utils.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/toolkit/util/Utils.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1132,6 +1132,15 @@ public class Utils { return result.toString(); } + /** + * Replaces each group of one or more whitespace characters with a single canonical space + * @param s the string to be normalized + * @return normalized string + */ + public String normalizeWhitespace(String s) { + return s.replaceAll("\\s+", " "); + } + /** * Returns a locale independent lower cased String. That is, it * always uses US locale, this is a clone of the one in StringUtils. diff --git a/test/langtools/jdk/javadoc/doclet/testIndexLineBreaks/TestIndexLineBreaks.java b/test/langtools/jdk/javadoc/doclet/testIndexLineBreaks/TestIndexLineBreaks.java new file mode 100644 index 00000000000..1f9bbc5bc8e --- /dev/null +++ b/test/langtools/jdk/javadoc/doclet/testIndexLineBreaks/TestIndexLineBreaks.java @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8351332 + * @summary Line breaks in the description of `{@index}` tags may corrupt JSON search index + * @library /tools/lib ../../lib + * @modules jdk.javadoc/jdk.javadoc.internal.tool + * @build toolbox.ToolBox javadoc.tester.* + * @run main TestIndexLineBreaks + */ + +import java.io.IOException; +import java.nio.file.Path; + +import javadoc.tester.JavadocTester; +import toolbox.ToolBox; + +public class TestIndexLineBreaks extends JavadocTester { + + public static void main(String... args) throws Exception { + var tester = new TestIndexLineBreaks (); + tester.runTests(); + } + + ToolBox tb = new ToolBox(); + + @Test + public void test() throws IOException { + Path src = Path.of("src"); + tb.writeJavaFiles(src, + """ + package p; + public interface I { + /** + * + * The {@index "phrase1 + * phrase2" description1 + * description2 } + */ + int a(); + } + """); + + javadoc("-d", + "out", + "-sourcepath", + src.toString(), + "p"); + + checkExit(Exit.OK); + + checkOutput("tag-search-index.js", true, + """ + {"l":"phrase1 phrase2","h":"p.I.a()","d":"description1 description2 ","u":"p/I.html#phrase1phrase2"},{"l":"Search Tags","h":"","k":"18","u":"search-tags.html"}"""); + + checkOutput("tag-search-index.js", false, + """ + {"l":"phrase1 phrase2","h":"p.I.a()","d":"description1 + description2 ","u":"p/I.html#phrase1phrase2"},{"l":"Search Tags","h":"","k":"18","u":"search-tags.html"}"""); + } +}