/* * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "cgroupSubsystem_linux.hpp" #include "cgroupUtil_linux.hpp" #include "cgroupV1Subsystem_linux.hpp" #include "cgroupV2Subsystem_linux.hpp" #include "logging/log.hpp" #include "memory/allocation.hpp" #include "os_linux.hpp" #include "runtime/globals.hpp" #include "runtime/os.hpp" #include "utilities/globalDefinitions.hpp" #include #include #include #include // Inlined from for portability. #ifndef CGROUP2_SUPER_MAGIC # define CGROUP2_SUPER_MAGIC 0x63677270 #endif // controller names have to match the *_IDX indices static const char* cg_controller_name[] = { "cpuset", "cpu", "cpuacct", "memory", "pids" }; static inline int cg_v2_controller_index(const char* name) { if (strcmp(name, "cpuset") == 0) { return CPUSET_IDX; } else if (strcmp(name, "cpu") == 0) { return CPU_IDX; } else if (strcmp(name, "memory") == 0) { return MEMORY_IDX; } else if (strcmp(name, "pids") == 0) { return PIDS_IDX; } else { return -1; } } CgroupSubsystem* CgroupSubsystemFactory::create() { CgroupV1MemoryController* memory = nullptr; CgroupV1Controller* cpuset = nullptr; CgroupV1CpuController* cpu = nullptr; CgroupV1CpuacctController* cpuacct = nullptr; CgroupV1Controller* pids = nullptr; CgroupInfo cg_infos[CG_INFO_LENGTH]; u1 cg_type_flags = INVALID_CGROUPS_GENERIC; const char* proc_cgroups = "/proc/cgroups"; const char* sys_fs_cgroup_cgroup_controllers = "/sys/fs/cgroup/cgroup.controllers"; const char* controllers_file = proc_cgroups; const char* proc_self_cgroup = "/proc/self/cgroup"; const char* proc_self_mountinfo = "/proc/self/mountinfo"; const char* sys_fs_cgroup = "/sys/fs/cgroup"; struct statfs fsstat = {}; bool cgroups_v2_enabled = false; // Assume cgroups v2 is usable by the JDK iff /sys/fs/cgroup has the cgroup v2 // file system magic. If it does not then heuristics are required to determine // if cgroups v1 is usable or not. if (statfs(sys_fs_cgroup, &fsstat) != -1) { cgroups_v2_enabled = (fsstat.f_type == CGROUP2_SUPER_MAGIC); if (cgroups_v2_enabled) { controllers_file = sys_fs_cgroup_cgroup_controllers; } } bool valid_cgroup = determine_type(cg_infos, cgroups_v2_enabled, controllers_file, proc_self_cgroup, proc_self_mountinfo, &cg_type_flags); if (!valid_cgroup) { // Could not detect cgroup type return nullptr; } assert(is_valid_cgroup(&cg_type_flags), "Expected valid cgroup type"); if (is_cgroup_v2(&cg_type_flags)) { // Cgroups v2 case, we have all the info we need. // Construct the subsystem, free resources and return // Note: We use the memory for non-cpu non-memory controller look-ups. // Perhaps we ought to have separate controllers for all. CgroupV2Controller mem_other = CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path, cg_infos[MEMORY_IDX]._cgroup_path, cg_infos[MEMORY_IDX]._read_only); CgroupV2MemoryController* memory = new CgroupV2MemoryController(mem_other); CgroupV2CpuController* cpu = new CgroupV2CpuController(CgroupV2Controller(cg_infos[CPU_IDX]._mount_path, cg_infos[CPU_IDX]._cgroup_path, cg_infos[CPU_IDX]._read_only)); CgroupV2CpuacctController* cpuacct = new CgroupV2CpuacctController(cpu); log_debug(os, container)("Detected cgroups v2 unified hierarchy"); cleanup(cg_infos); return new CgroupV2Subsystem(memory, cpu, cpuacct, mem_other); } /* * Cgroup v1 case: * * Use info gathered previously from /proc/self/cgroup * and map host mount point to * local one via /proc/self/mountinfo content above * * Docker example: * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 * * Host example: * 5:memory:/user.slice * * Construct a path to the process specific memory and cpuset * cgroup directory. * * For a container running under Docker from memory example above * the paths would be: * * /sys/fs/cgroup/memory * * For a Host from memory example above the path would be: * * /sys/fs/cgroup/memory/user.slice * */ assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected"); for (int i = 0; i < CG_INFO_LENGTH; i++) { CgroupInfo info = cg_infos[i]; if (info._data_complete) { // pids controller might have incomplete data if (strcmp(info._name, "memory") == 0) { memory = new CgroupV1MemoryController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only)); memory->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuset") == 0) { cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); cpuset->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpu") == 0) { cpu = new CgroupV1CpuController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only)); cpu->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuacct") == 0) { cpuacct = new CgroupV1CpuacctController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only)); cpuacct->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "pids") == 0) { pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); pids->set_subsystem_path(info._cgroup_path); } } else { log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]); } } cleanup(cg_infos); return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory); } void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, int controller, const char* name, char* mount_path, char* root_path, bool read_only) { if (cg_infos[controller]._mount_path != nullptr) { // On some systems duplicate controllers get mounted in addition to // the main cgroup controllers most likely under /sys/fs/cgroup. In that // case pick the one under /sys/fs/cgroup and discard others. if (strstr(cg_infos[controller]._mount_path, "/sys/fs/cgroup") != cg_infos[controller]._mount_path) { log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.", name, mount_path, cg_infos[controller]._mount_path); os::free(cg_infos[controller]._mount_path); os::free(cg_infos[controller]._root_mount_path); cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); cg_infos[controller]._read_only = read_only; } else { log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.", name, cg_infos[controller]._mount_path, mount_path); } } else { cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); cg_infos[controller]._read_only = read_only; } } /* * Determine whether or not the mount options, which are comma separated, * contain the 'ro' string. */ static bool find_ro_opt(char* mount_opts) { char* token; char* mo_ptr = mount_opts; // mount options are comma-separated (man proc). while ((token = strsep(&mo_ptr, ",")) != nullptr) { if (strcmp(token, "ro") == 0) { return true; } } return false; } /* * Read values of a /proc/self/mountinfo line into variables. For cgroups v1 * super options are needed. On cgroups v2 super options are not used. * * The scanning of a single mountinfo line entry is as follows: * * 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue * (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12) * * The numbers in parentheses are labels for the descriptions below: * * (1) mount ID: matched with '%*d' and discarded * (2) parent ID: matched with '%*d' and discarded * (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded * (4) minor: ---' * (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty. * (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty. * (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty. * (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen * (9) separator: ---' and discarded. Note: The discarded match is space characters if there * are no optionals. Otherwise it includes the optional fields as well. * (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type' * (11) mount source: matched with '%*s' and discarded * (12) super options: matched with '%s' and captured in 'tmpcgroups' */ static inline bool match_mount_info_line(char* line, char* tmproot, char* tmpmount, char* mount_opts, char* tmp_fs_type, char* tmpcgroups) { return sscanf(line, "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s", tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups) == 5; } bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, bool cgroups_v2_enabled, const char* controllers_file, const char* proc_self_cgroup, const char* proc_self_mountinfo, u1* flags) { FILE *mntinfo = nullptr; FILE* controllers = nullptr; FILE *cgroup = nullptr; char buf[MAXPATHLEN+1]; char *p; // true iff all required controllers, memory, cpu, cpuacct are enabled // at the kernel level. // pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1) // cpuset might not be enabled on newer Linux distros (Fedora 41) bool all_required_controllers_enabled = true; // If cgroups v2 is enabled, open /sys/fs/cgroup/cgroup.controllers. If not, open /proc/cgroups. controllers = os::fopen(controllers_file, "r"); if (controllers == nullptr) { log_debug(os, container)("Can't open %s, %s", controllers_file, os::strerror(errno)); *flags = INVALID_CGROUPS_GENERIC; return false; } if (cgroups_v2_enabled) { /* * cgroups v2 is enabled. For cgroups v2 (unified hierarchy), the cpu and memory * controllers must be enabled. */ if ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) { char* controller = nullptr; #define ISSPACE_CHARS " \n\t\r\f\v" while ((controller = strsep(&p, ISSPACE_CHARS)) != nullptr) { int i; if ((i = cg_v2_controller_index(controller)) != -1) { cg_infos[i]._name = os::strdup(controller); cg_infos[i]._enabled = true; if (i == PIDS_IDX || i == CPUSET_IDX) { log_debug(os, container)("Detected optional %s controller entry in %s", controller, controllers_file); } } } #undef ISSPACE_CHARS } else { log_debug(os, container)("Can't read %s, %s", controllers_file, os::strerror(errno)); *flags = INVALID_CGROUPS_V2; fclose(controllers); return false; } for (int i = 0; i < CG_INFO_LENGTH; i++) { // cgroups v2 does not have cpuacct. if (i == CPUACCT_IDX) { continue; } // For cgroups v2, cpuacct is rolled into cpu, and the pids and cpuset controllers // are optional; the remaining controllers, cpu and memory, are required. if (i == CPU_IDX || i == MEMORY_IDX) { all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled; } if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) { log_debug(os, container)("controller %s is not enabled", cg_controller_name[i]); } } } else { /* * The /sys/fs/cgroup filesystem magic hint suggests we have cg v1. Read /proc/cgroups; for * cgroups v1 hierarchy (hybrid or legacy), cpu, cpuacct, cpuset, and memory controllers must * have non-zero for the hierarchy ID field and relevant controllers mounted. */ while ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) { char name[MAXPATHLEN+1]; int hierarchy_id; int enabled; // Format of /proc/cgroups documented via man 7 cgroups if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) { continue; } if (strcmp(name, "memory") == 0) { cg_infos[MEMORY_IDX]._name = os::strdup(name); cg_infos[MEMORY_IDX]._hierarchy_id = hierarchy_id; cg_infos[MEMORY_IDX]._enabled = (enabled == 1); } else if (strcmp(name, "cpuset") == 0) { cg_infos[CPUSET_IDX]._name = os::strdup(name); cg_infos[CPUSET_IDX]._hierarchy_id = hierarchy_id; cg_infos[CPUSET_IDX]._enabled = (enabled == 1); } else if (strcmp(name, "cpu") == 0) { cg_infos[CPU_IDX]._name = os::strdup(name); cg_infos[CPU_IDX]._hierarchy_id = hierarchy_id; cg_infos[CPU_IDX]._enabled = (enabled == 1); } else if (strcmp(name, "cpuacct") == 0) { cg_infos[CPUACCT_IDX]._name = os::strdup(name); cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id; cg_infos[CPUACCT_IDX]._enabled = (enabled == 1); } else if (strcmp(name, "pids") == 0) { log_debug(os, container)("Detected optional pids controller entry in %s", controllers_file); cg_infos[PIDS_IDX]._name = os::strdup(name); cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id; cg_infos[PIDS_IDX]._enabled = (enabled == 1); } } for (int i = 0; i < CG_INFO_LENGTH; i++) { // pids controller is optional. All other controllers are required if (i != PIDS_IDX) { all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled; } if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) { log_debug(os, container)("controller %s is not enabled", cg_controller_name[i]); } } } fclose(controllers); if (!all_required_controllers_enabled) { // one or more required controllers disabled, disable container support log_debug(os, container)("One or more required controllers disabled at kernel level."); cleanup(cg_infos); *flags = INVALID_CGROUPS_GENERIC; return false; } /* * Read /proc/self/cgroup and determine: * - the cgroup path for cgroups v2 or * - on a cgroups v1 system, collect info for mapping * the host mount point to the local one via /proc/self/mountinfo below. */ cgroup = os::fopen(proc_self_cgroup, "r"); if (cgroup == nullptr) { log_debug(os, container)("Can't open %s, %s", proc_self_cgroup, os::strerror(errno)); cleanup(cg_infos); *flags = INVALID_CGROUPS_GENERIC; return false; } while ((p = fgets(buf, MAXPATHLEN, cgroup)) != nullptr) { char *controllers; char *token; char *hierarchy_id_str; int hierarchy_id; char *cgroup_path; hierarchy_id_str = strsep(&p, ":"); hierarchy_id = atoi(hierarchy_id_str); /* Get controllers and base */ controllers = strsep(&p, ":"); cgroup_path = strsep(&p, "\n"); if (controllers == nullptr) { continue; } while (!cgroups_v2_enabled && (token = strsep(&controllers, ",")) != nullptr) { if (strcmp(token, "memory") == 0) { assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory"); cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path); } else if (strcmp(token, "cpuset") == 0) { assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset"); cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path); } else if (strcmp(token, "cpu") == 0) { assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu"); cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path); } else if (strcmp(token, "cpuacct") == 0) { assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacct"); cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path); } else if (strcmp(token, "pids") == 0) { assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids", cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id); cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path); } } if (cgroups_v2_enabled) { // On some systems we have mixed cgroups v1 and cgroups v2 controllers (e.g. freezer on cg1 and // all relevant controllers on cg2). Only set the cgroup path when we see a hierarchy id of 0. if (hierarchy_id != 0) { continue; } for (int i = 0; i < CG_INFO_LENGTH; i++) { assert(cg_infos[i]._cgroup_path == nullptr, "cgroup path must only be set once"); cg_infos[i]._cgroup_path = os::strdup(cgroup_path); } } } fclose(cgroup); // Find various mount points by reading /proc/self/mountinfo // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt mntinfo = os::fopen(proc_self_mountinfo, "r"); if (mntinfo == nullptr) { log_debug(os, container)("Can't open %s, %s", proc_self_mountinfo, os::strerror(errno)); cleanup(cg_infos); *flags = INVALID_CGROUPS_GENERIC; return false; } bool cgroupv2_mount_point_found = false; bool any_cgroup_mounts_found = false; while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != nullptr) { char tmp_fs_type[MAXPATHLEN+1]; char tmproot[MAXPATHLEN+1]; char tmpmount[MAXPATHLEN+1]; char tmpcgroups[MAXPATHLEN+1]; char mount_opts[MAXPATHLEN+1]; char *cptr = tmpcgroups; char *token; /* Cgroup v2 relevant info. We only look for the _mount_path iff cgroups_v2_enabled so * as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1 * block in the hybrid case. * * We collect the read only mount option in the cgroup infos so as to have that * info ready when determining is_containerized(). */ if (cgroups_v2_enabled && match_mount_info_line(p, tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups /* unused */)) { // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype if (strcmp("cgroup2", tmp_fs_type) == 0) { cgroupv2_mount_point_found = true; any_cgroup_mounts_found = true; // For unified we only have a single line with cgroup2 fs type. // Therefore use that option for all CG info structs. bool ro_option = find_ro_opt(mount_opts); for (int i = 0; i < CG_INFO_LENGTH; i++) { set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_option); } } } /* Cgroup v1 relevant info * * Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids. For each controller * determine whether or not they show up as mounted read only or not. * * Example for docker: * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory * * Example for host: * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory * * 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids * */ if (match_mount_info_line(p, tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups)) { if (strcmp("cgroup", tmp_fs_type) != 0) { // Skip cgroup2 fs lines on hybrid or unified hierarchy. continue; } while ((token = strsep(&cptr, ",")) != nullptr) { if (strcmp(token, "memory") == 0) { any_cgroup_mounts_found = true; bool ro_option = find_ro_opt(mount_opts); set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_option); cg_infos[MEMORY_IDX]._data_complete = true; } else if (strcmp(token, "cpuset") == 0) { any_cgroup_mounts_found = true; bool ro_option = find_ro_opt(mount_opts); set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPUSET_IDX]._data_complete = true; } else if (strcmp(token, "cpu") == 0) { any_cgroup_mounts_found = true; bool ro_option = find_ro_opt(mount_opts); set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPU_IDX]._data_complete = true; } else if (strcmp(token, "cpuacct") == 0) { any_cgroup_mounts_found = true; bool ro_option = find_ro_opt(mount_opts); set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPUACCT_IDX]._data_complete = true; } else if (strcmp(token, "pids") == 0) { any_cgroup_mounts_found = true; bool ro_option = find_ro_opt(mount_opts); set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_option); cg_infos[PIDS_IDX]._data_complete = true; } } } } fclose(mntinfo); // Neither cgroup2 nor cgroup filesystems mounted via /proc/self/mountinfo // No point in continuing. if (!any_cgroup_mounts_found) { log_trace(os, container)("No relevant cgroup controllers mounted."); cleanup(cg_infos); *flags = INVALID_CGROUPS_NO_MOUNT; return false; } if (cgroups_v2_enabled) { if (!cgroupv2_mount_point_found) { log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo"); cleanup(cg_infos); *flags = INVALID_CGROUPS_V2; return false; } // Cgroups v2 case, we have all the info we need. *flags = CGROUPS_V2; return true; } // What follows is cgroups v1 log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers"); if (!cg_infos[MEMORY_IDX]._data_complete) { log_debug(os, container)("Required cgroup v1 memory subsystem not found"); cleanup(cg_infos); *flags = INVALID_CGROUPS_V1; return false; } if (!cg_infos[CPUSET_IDX]._data_complete) { log_debug(os, container)("Required cgroup v1 cpuset subsystem not found"); cleanup(cg_infos); *flags = INVALID_CGROUPS_V1; return false; } if (!cg_infos[CPU_IDX]._data_complete) { log_debug(os, container)("Required cgroup v1 cpu subsystem not found"); cleanup(cg_infos); *flags = INVALID_CGROUPS_V1; return false; } if (!cg_infos[CPUACCT_IDX]._data_complete) { log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found"); cleanup(cg_infos); *flags = INVALID_CGROUPS_V1; return false; } if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) { log_debug(os, container)("Optional cgroup v1 pids subsystem not found"); // keep the other controller info, pids is optional } // Cgroups v1 case, we have all the info we need. *flags = CGROUPS_V1; return true; }; void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) { assert(cg_infos != nullptr, "Invariant"); for (int i = 0; i < CG_INFO_LENGTH; i++) { os::free(cg_infos[i]._name); os::free(cg_infos[i]._cgroup_path); os::free(cg_infos[i]._root_mount_path); os::free(cg_infos[i]._mount_path); } } /* active_processor_count * * Calculate an appropriate number of active processors for the * VM to use based on these three inputs. * * cpu affinity * cgroup cpu quota & cpu period * * Algorithm: * * Determine the number of available CPUs from sched_getaffinity * * If user specified a quota (quota != -1), calculate the number of * required CPUs by dividing quota by period. * * All results of division are rounded up to the next whole number. * * If quotas have not been specified, sets the result reference to * the number of active processors in the system. * * If quotas have been specified, the number set in the result * reference will never exceed the number of active processors. * * return: * true if there were no errors. false otherwise. */ bool CgroupSubsystem::active_processor_count(double& value) { // We use a cache with a timeout to avoid performing expensive // computations in the event this function is called frequently. // [See 8227006]. CachingCgroupController* contrl = cpu_controller(); CachedMetric* cpu_limit = contrl->metrics_cache(); if (!cpu_limit->should_check_metric()) { value = cpu_limit->value(); log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %.2f", value); return true; } int cpu_count = os::Linux::active_processor_count(); double result = -1; if (!CgroupUtil::processor_count(contrl->controller(), cpu_count, result)) { return false; } assert(result > 0 && result <= cpu_count, "must be"); // Update cached metric to avoid re-reading container settings too often cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT); value = result; return true; } /* memory_limit_in_bytes * * Return the limit of available memory for this process in the provided * physical_memory_size_type reference. If there was no limit value set in the underlying * interface files 'value_unlimited' is returned. * * return: * false if retrieving the value failed * true if retrieving the value was successfull and the value was * set in the 'value' reference. */ bool CgroupSubsystem::memory_limit_in_bytes(physical_memory_size_type upper_bound, physical_memory_size_type& value) { CachingCgroupController* contrl = memory_controller(); CachedMetric* memory_limit = contrl->metrics_cache(); if (!memory_limit->should_check_metric()) { value = memory_limit->value(); return true; } physical_memory_size_type mem_limit = 0; if (!contrl->controller()->read_memory_limit_in_bytes(upper_bound, mem_limit)) { return false; } // Update cached metric to avoid re-reading container settings too often memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT); value = mem_limit; return true; } bool CgroupController::read_string(const char* filename, char* buf, size_t buf_size) { assert(buf != nullptr, "buffer must not be null"); assert(filename != nullptr, "filename must be given"); const char* s_path = subsystem_path(); if (s_path == nullptr) { log_debug(os, container)("read_string: subsystem path is null"); return false; } stringStream file_path; file_path.print_raw(s_path); file_path.print_raw(filename); if (file_path.size() > MAXPATHLEN) { log_debug(os, container)("File path too long %s, %s", file_path.base(), filename); return false; } const char* absolute_path = file_path.freeze(); log_trace(os, container)("Path to %s is %s", filename, absolute_path); FILE* fp = os::fopen(absolute_path, "r"); if (fp == nullptr) { log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno)); return false; } // Read a single line into the provided buffer. // At most buf_size - 1 characters. char* line = fgets(buf, buf_size, fp); fclose(fp); if (line == nullptr) { log_debug(os, container)("Empty file %s", absolute_path); return false; } size_t len = strlen(line); assert(len <= buf_size - 1, "At most buf_size - 1 bytes can be read"); if (line[len - 1] == '\n') { line[len - 1] = '\0'; // trim trailing new line } return true; } bool CgroupController::read_number(const char* filename, uint64_t& result) { char buf[1024]; bool is_ok = read_string(filename, buf, 1024); if (!is_ok) { return false; } int matched = sscanf(buf, UINT64_FORMAT, &result); if (matched == 1) { return true; } return false; } bool CgroupController::read_number_handle_max(const char* filename, uint64_t& result) { char buf[1024]; bool is_ok = read_string(filename, buf, 1024); if (!is_ok) { return false; } uint64_t val = 0; if (!limit_from_str(buf, val)) { return false; } result = val; return true; } bool CgroupController::read_numerical_key_value(const char* filename, const char* key, uint64_t& result) { assert(key != nullptr, "key must be given"); assert(filename != nullptr, "file to search in must be given"); const char* s_path = subsystem_path(); if (s_path == nullptr) { log_debug(os, container)("read_numerical_key_value: subsystem path is null"); return false; } stringStream file_path; file_path.print_raw(s_path); file_path.print_raw(filename); if (file_path.size() > MAXPATHLEN) { log_debug(os, container)("File path too long %s, %s", file_path.base(), filename); return false; } const char* absolute_path = file_path.freeze(); log_trace(os, container)("Path to %s is %s", filename, absolute_path); FILE* fp = os::fopen(absolute_path, "r"); if (fp == nullptr) { log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno)); return false; } const int buf_len = MAXPATHLEN+1; char buf[buf_len]; char* line = fgets(buf, buf_len, fp); bool found_match = false; // File consists of multiple lines in a "key value" // fashion, we have to find the key. const size_t key_len = strlen(key); for (; line != nullptr; line = fgets(buf, buf_len, fp)) { char after_key = line[key_len]; if (strncmp(line, key, key_len) == 0 && isspace((unsigned char) after_key) != 0 && after_key != '\n') { // Skip key, skip space const char* value_substr = line + key_len + 1; int matched = sscanf(value_substr, UINT64_FORMAT, &result); found_match = matched == 1; if (found_match) { break; } } } fclose(fp); if (found_match) { return true; } log_debug(os, container)("Type %s (key == %s) not found in file %s", UINT64_FORMAT, key, absolute_path); return false; } bool CgroupController::read_numerical_tuple_value(const char* filename, bool use_first, uint64_t& result) { char buf[1024]; bool is_ok = read_string(filename, buf, 1024); if (!is_ok) { return false; } char token[1024]; const int matched = sscanf(buf, (use_first ? "%1023s %*s" : "%*s %1023s"), token); if (matched != 1) { return false; } uint64_t val = 0; if (!limit_from_str(token, val)) { return false; } result = val; return true; } bool CgroupController::limit_from_str(char* limit_str, uint64_t& value) { if (limit_str == nullptr) { return false; } // Unlimited memory in cgroups is the literal string 'max' for // some controllers, for example the pids controller. if (strcmp("max", limit_str) == 0) { value = value_unlimited; return true; } uint64_t limit; if (sscanf(limit_str, UINT64_FORMAT, &limit) != 1) { return false; } value = limit; return true; } // CgroupSubsystem implementations bool CgroupSubsystem::memory_and_swap_limit_in_bytes(physical_memory_size_type upper_mem_bound, physical_memory_size_type upper_swap_bound, physical_memory_size_type& value) { return memory_controller()->controller()->memory_and_swap_limit_in_bytes(upper_mem_bound, upper_swap_bound, value); } bool CgroupSubsystem::memory_and_swap_usage_in_bytes(physical_memory_size_type upper_mem_bound, physical_memory_size_type upper_swap_bound, physical_memory_size_type& value) { return memory_controller()->controller()->memory_and_swap_usage_in_bytes(upper_mem_bound, upper_swap_bound, value); } bool CgroupSubsystem::memory_soft_limit_in_bytes(physical_memory_size_type upper_bound, physical_memory_size_type& value) { return memory_controller()->controller()->memory_soft_limit_in_bytes(upper_bound, value); } bool CgroupSubsystem::memory_throttle_limit_in_bytes(physical_memory_size_type& value) { return memory_controller()->controller()->memory_throttle_limit_in_bytes(value); } bool CgroupSubsystem::memory_usage_in_bytes(physical_memory_size_type& value) { return memory_controller()->controller()->memory_usage_in_bytes(value); } bool CgroupSubsystem::memory_max_usage_in_bytes(physical_memory_size_type& value) { return memory_controller()->controller()->memory_max_usage_in_bytes(value); } bool CgroupSubsystem::rss_usage_in_bytes(physical_memory_size_type& value) { return memory_controller()->controller()->rss_usage_in_bytes(value); } bool CgroupSubsystem::cache_usage_in_bytes(physical_memory_size_type& value) { return memory_controller()->controller()->cache_usage_in_bytes(value); } bool CgroupSubsystem::cpu_quota(int& value) { return cpu_controller()->controller()->cpu_quota(value); } bool CgroupSubsystem::cpu_period(int& value) { return cpu_controller()->controller()->cpu_period(value); } bool CgroupSubsystem::cpu_shares(int& value) { return cpu_controller()->controller()->cpu_shares(value); } bool CgroupSubsystem::cpu_usage_in_micros(uint64_t& value) { return cpuacct_controller()->cpu_usage_in_micros(value); } void CgroupSubsystem::print_version_specific_info(outputStream* st, physical_memory_size_type upper_mem_bound) { memory_controller()->controller()->print_version_specific_info(st, upper_mem_bound); }