From 0b6355794101bda9de623016ce88f8abbb314f63 Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Thu, 2 Mar 2023 18:31:53 +0000 Subject: [PATCH] 8303039: Utilize `coverageLevels.txt` Reviewed-by: iris, joehw --- make/CompileToolsJdk.gmk | 9 +- .../cldr/common/properties/coverageLevels.txt | 141 ++++++++++++++++++ .../tools/cldrconverter/CLDRConverter.java | 32 +++- .../OtherCommonLocales.properties | 140 +++++++++++++++++ 4 files changed, 318 insertions(+), 4 deletions(-) create mode 100644 make/data/cldr/common/properties/coverageLevels.txt create mode 100644 make/jdk/src/classes/build/tools/cldrconverter/OtherCommonLocales.properties diff --git a/make/CompileToolsJdk.gmk b/make/CompileToolsJdk.gmk index d937cddb294..dceca481451 100644 --- a/make/CompileToolsJdk.gmk +++ b/make/CompileToolsJdk.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -69,6 +69,13 @@ $(eval $(call SetupCopyFiles,COPY_NIMBUS_TEMPLATES, \ TARGETS += $(COPY_NIMBUS_TEMPLATES) +$(eval $(call SetupCopyFiles,COPY_CLDRCONVERTER_PROPERTIES, \ + SRC := $(TOPDIR)/make/jdk/src/classes/build/tools/cldrconverter, \ + DEST := $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes/build/tools/cldrconverter, \ + FILES := $(wildcard $(TOPDIR)/make/jdk/src/classes/build/tools/cldrconverter/*.properties))) + +TARGETS += $(COPY_CLDRCONVERTER_PROPERTIES) + ################################################################################ $(eval $(call SetupJavaCompilation, COMPILE_DEPEND, \ diff --git a/make/data/cldr/common/properties/coverageLevels.txt b/make/data/cldr/common/properties/coverageLevels.txt new file mode 100644 index 00000000000..8191212d4eb --- /dev/null +++ b/make/data/cldr/common/properties/coverageLevels.txt @@ -0,0 +1,141 @@ +# coverageLevels.txt +# Copyright © 2022 Unicode, Inc. +# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) +# For terms of use, see http://www.unicode.org/copyright.html +# +# Provides the Coverage Level of locales at Basic or Above. +# For more info,see the Locale Coverage Chart for this version. +# Generated by ShowLocaleCoverage. +# +#Locale ; Level +af ; modern +am ; modern +ar ; modern +as ; modern +ast ; basic +az ; modern +be ; modern +bg ; modern +bgc ; basic +bho ; basic +bn ; modern +br ; moderate +brx ; basic +bs ; modern +bs_Cyrl ; basic +ca ; modern +ceb ; moderate +chr ; modern +cs ; modern +cv ; moderate +cy ; modern +da ; modern +de ; modern +doi ; basic +dsb ; modern +el ; modern +en ; modern +es ; modern +et ; modern +eu ; modern +fa ; modern +ff_Adlm ; basic +fi ; modern +fil ; modern +fo ; basic +fr ; modern +ga ; modern +gd ; modern +gl ; modern +gu ; modern +ha ; modern +he ; modern +hi ; modern +hi_Latn ; modern +hr ; modern +hsb ; modern +hu ; modern +hy ; modern +ia ; basic +id ; modern +ig ; modern +is ; modern +it ; modern +ja ; modern +jv ; modern +ka ; modern +kea ; basic +kgp ; basic +kk ; modern +km ; modern +kn ; modern +ko ; modern +kok ; modern +ks ; basic +ks_Deva ; basic +ky ; modern +lo ; modern +lt ; modern +lv ; modern +mai ; basic +mi ; basic +mk ; modern +ml ; modern +mn ; modern +mni ; basic +mr ; modern +ms ; modern +my ; modern +ne ; modern +nl ; modern +nn ; modern +no ; modern +or ; modern +pa ; modern +pcm ; modern +pl ; modern +ps ; modern +pt ; modern +qu ; moderate +raj ; basic +rm ; basic +ro ; modern +ru ; modern +sa ; basic +sat ; basic +sc ; moderate +sd ; modern +sd_Deva ; basic +si ; modern +sk ; modern +sl ; modern +so ; modern +sq ; modern +sr ; modern +su ; basic +sv ; modern +sw ; modern +ta ; modern +te ; modern +tg ; basic +th ; modern +ti ; basic +tk ; modern +to ; basic +tr ; modern +tt ; basic +uk ; modern +ur ; modern +uz ; modern +uz_Cyrl ; basic +vi ; modern +wo ; basic +xh ; moderate +yo ; modern +yrl ; basic +yue ; modern +yue_Hans ; modern +zh ; modern +zh_Hant ; modern +zu ; modern +#EOF diff --git a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java index d6c502252e0..17ecb754529 100644 --- a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java +++ b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java @@ -69,6 +69,7 @@ public class CLDRConverter { private static String WINZONES_SOURCE_FILE; private static String PLURALS_SOURCE_FILE; private static String DAYPERIODRULE_SOURCE_FILE; + private static String COVERAGELEVELS_FILE; static String DESTINATION_DIR = "build/gensrc"; static final String LOCALE_NAME_PREFIX = "locale.displayname."; @@ -258,6 +259,7 @@ public class CLDRConverter { WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml"; PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml"; DAYPERIODRULE_SOURCE_FILE = CLDR_BASE + "/supplemental/dayPeriods.xml"; + COVERAGELEVELS_FILE = CLDR_BASE + "/properties/coverageLevels.txt"; if (BASE_LOCALES.isEmpty()) { setupBaseLocales("en-US"); @@ -359,13 +361,18 @@ public class CLDRConverter { private static List readBundleList() throws Exception { List retList = new ArrayList<>(); Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR); + var coverageMap = coverageLevelsMap(); try (DirectoryStream dirStr = Files.newDirectoryStream(path)) { for (Path entry : dirStr) { String fileName = entry.getFileName().toString(); if (fileName.endsWith(".xml")) { String id = fileName.substring(0, fileName.indexOf('.')); Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id)); - StringBuilder sb = getCandLocales(cldrLoc); + List candList = getCandidateLocales(cldrLoc); + if (!"root".equals(id) && candList.stream().noneMatch(coverageMap::containsKey)) { + continue; + } + StringBuilder sb = getCandLocales(candList); if (sb.indexOf("root") == -1) { sb.append("root"); } @@ -510,8 +517,7 @@ public class CLDRConverter { parser.parse(srcfile, handler); } - private static StringBuilder getCandLocales(Locale cldrLoc) { - List candList = getCandidateLocales(cldrLoc); + private static StringBuilder getCandLocales(List candList) { StringBuilder sb = new StringBuilder(); for (Locale loc : candList) { if (!loc.equals(Locale.ROOT)) { @@ -1195,6 +1201,26 @@ public class CLDRConverter { })); } + private static Map coverageLevelsMap() throws Exception { + // First, parse `coverageLevels.txt` file + var covMap = Files.readAllLines(Path.of(COVERAGELEVELS_FILE)).stream() + .filter(line -> !line.isBlank() && !line.startsWith("#")) + .map(line -> line.split("[\s\t]*;[\s\t]*", 3)) + .filter(a -> a[1].matches("basic|moderate|modern|comprehensive")) + .collect(Collectors.toMap( + a -> Locale.forLanguageTag(a[0].replaceAll("_", "-")), + a -> a[1], + (v1, v2) -> v2, // should never happen + HashMap::new)); + + // Add other common (non-seed) locales (below `basic` coverage level) as of v42 + ResourceBundle.getBundle(CLDRConverter.class.getPackageName() + ".OtherCommonLocales") + .keySet() + .forEach(k -> covMap.put(Locale.forLanguageTag(k), "")); + + return covMap; + } + // for debug static void dumpMap(Map map) { map.entrySet().stream() diff --git a/make/jdk/src/classes/build/tools/cldrconverter/OtherCommonLocales.properties b/make/jdk/src/classes/build/tools/cldrconverter/OtherCommonLocales.properties new file mode 100644 index 00000000000..b7bb73388a4 --- /dev/null +++ b/make/jdk/src/classes/build/tools/cldrconverter/OtherCommonLocales.properties @@ -0,0 +1,140 @@ +# +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +# Other common (non-seed) locales that are below `Basic` coverage level +# as of CLDR v42 (Extracted from +# https://github.com/unicode-org/cldr-staging/blob/main/docs/charts/42/tsv/locale-coverage.tsv) + +agq=Aghem +ak=Akan +ann=Obolo +asa=Asu +az-Cyrl=Azerbaijani (Cyrillic) +bas=Basaa +bem=Bemba +bez=Bena +bm=Bambara +bo=Tibetan +ccp=Chakma +ce=Chechen +cgg=Chiga +ckb=Central Kurdish +dav=Taita +dje=Zarma +dua=Duala +dyo=Jola-Fonyi +dz=Dzongkha +ebu=Embu +ee=Ewe +eo=Esperanto +ewo=Ewondo +ff=Fula +frr=Northern Frisian +fur=Friulian +fy=Western Frisian +gsw=Swiss German +guz=Gusii +gv=Manx +haw=Hawaiian +ii=Sichuan Yi +jgo=Ngomba +jmc=Machame +kab=Kabyle +kam=Kamba +kde=Makonde +khq=Koyra Chiini +ki=Kikuyu +kkj=Kako +kl=Kalaallisut +kln=Kalenjin +ksb=Shambala +ksf=Bafia +ksh=Colognian +ku=Kurdish +kw=Cornish +lag=Langi +lb=Luxembourgish +lg=Ganda +lkt=Lakota +ln=Lingala +lrc=Northern Luri +lu=Luba-Katanga +luo=Luo +luy=Luyia +mas=Masai +mdf=Moksha +mer=Meru +mfe=Morisyen +mg=Malagasy +mgh=Makhuwa-Meetto +mgo=Meta º +mt=Maltese +mua=Mundang +mzn=Mazanderani +naq=Nama +nd=North Ndebele +nds=Low German +nmg=Kwasio +nnh=Ngiemboon +nus=Nuer +nyn=Nyankole +oc=Occitan +om=Oromo +os=Ossetic +pa-Arab=Punjabi (Arabic) +pis=Pijin +rn=Rundi +rof=Rombo +rw=Kinyarwanda +rwk=Rwa +sah=Yakut +saq=Samburu +sbp=Sangu +se=Northern Sami +seh=Sena +ses=Koyraboro Senni +sg=Sango +shi=Tachelhit +shi-Latn=Tachelhit (Latin) +smn=Inari Sami +sms=Skolt Sami +sn=Shona +teo=Teso +tok=Toki Pona +twq=Tasawaq +tzm=Central Atlas Tamazight +ug=Uyghur +uz-Arab=Uzbek (Arabic) +vai=Vai +vai-Latn=Vai (Latin) +vun=Vunjo +wae=Walser +xog=Soga +yav=Yangben +yi=Yiddish +zgh=Standard Moroccan Tamazight + +# Not listed, but existed +sr-Latn=Serbian (Latin)