mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
910 lines
36 KiB
C++
910 lines
36 KiB
C++
/*
|
|
* Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*
|
|
*/
|
|
|
|
#include "cgroupSubsystem_linux.hpp"
|
|
#include "cgroupUtil_linux.hpp"
|
|
#include "cgroupV1Subsystem_linux.hpp"
|
|
#include "cgroupV2Subsystem_linux.hpp"
|
|
#include "logging/log.hpp"
|
|
#include "memory/allocation.hpp"
|
|
#include "os_linux.hpp"
|
|
#include "runtime/globals.hpp"
|
|
#include "runtime/os.hpp"
|
|
#include "utilities/globalDefinitions.hpp"
|
|
|
|
#include <errno.h>
|
|
#include <math.h>
|
|
#include <string.h>
|
|
#include <sys/vfs.h>
|
|
|
|
// Inlined from <linux/magic.h> for portability.
|
|
#ifndef CGROUP2_SUPER_MAGIC
|
|
# define CGROUP2_SUPER_MAGIC 0x63677270
|
|
#endif
|
|
|
|
// controller names have to match the *_IDX indices
|
|
static const char* cg_controller_name[] = { "cpuset", "cpu", "cpuacct", "memory", "pids" };
|
|
static inline int cg_v2_controller_index(const char* name) {
|
|
if (strcmp(name, "cpuset") == 0) {
|
|
return CPUSET_IDX;
|
|
} else if (strcmp(name, "cpu") == 0) {
|
|
return CPU_IDX;
|
|
} else if (strcmp(name, "memory") == 0) {
|
|
return MEMORY_IDX;
|
|
} else if (strcmp(name, "pids") == 0) {
|
|
return PIDS_IDX;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
CgroupSubsystem* CgroupSubsystemFactory::create() {
|
|
CgroupV1MemoryController* memory = nullptr;
|
|
CgroupV1Controller* cpuset = nullptr;
|
|
CgroupV1CpuController* cpu = nullptr;
|
|
CgroupV1CpuacctController* cpuacct = nullptr;
|
|
CgroupV1Controller* pids = nullptr;
|
|
CgroupInfo cg_infos[CG_INFO_LENGTH];
|
|
u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
|
|
const char* proc_cgroups = "/proc/cgroups";
|
|
const char* sys_fs_cgroup_cgroup_controllers = "/sys/fs/cgroup/cgroup.controllers";
|
|
const char* controllers_file = proc_cgroups;
|
|
const char* proc_self_cgroup = "/proc/self/cgroup";
|
|
const char* proc_self_mountinfo = "/proc/self/mountinfo";
|
|
const char* sys_fs_cgroup = "/sys/fs/cgroup";
|
|
struct statfs fsstat = {};
|
|
bool cgroups_v2_enabled = false;
|
|
|
|
// Assume cgroups v2 is usable by the JDK iff /sys/fs/cgroup has the cgroup v2
|
|
// file system magic. If it does not then heuristics are required to determine
|
|
// if cgroups v1 is usable or not.
|
|
if (statfs(sys_fs_cgroup, &fsstat) != -1) {
|
|
cgroups_v2_enabled = (fsstat.f_type == CGROUP2_SUPER_MAGIC);
|
|
if (cgroups_v2_enabled) {
|
|
controllers_file = sys_fs_cgroup_cgroup_controllers;
|
|
}
|
|
}
|
|
|
|
bool valid_cgroup = determine_type(cg_infos, cgroups_v2_enabled, controllers_file, proc_self_cgroup, proc_self_mountinfo, &cg_type_flags);
|
|
|
|
if (!valid_cgroup) {
|
|
// Could not detect cgroup type
|
|
return nullptr;
|
|
}
|
|
assert(is_valid_cgroup(&cg_type_flags), "Expected valid cgroup type");
|
|
|
|
if (is_cgroup_v2(&cg_type_flags)) {
|
|
// Cgroups v2 case, we have all the info we need.
|
|
// Construct the subsystem, free resources and return
|
|
// Note: We use the memory for non-cpu non-memory controller look-ups.
|
|
// Perhaps we ought to have separate controllers for all.
|
|
CgroupV2Controller mem_other = CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path,
|
|
cg_infos[MEMORY_IDX]._cgroup_path,
|
|
cg_infos[MEMORY_IDX]._read_only);
|
|
CgroupV2MemoryController* memory = new CgroupV2MemoryController(mem_other);
|
|
CgroupV2CpuController* cpu = new CgroupV2CpuController(CgroupV2Controller(cg_infos[CPU_IDX]._mount_path,
|
|
cg_infos[CPU_IDX]._cgroup_path,
|
|
cg_infos[CPU_IDX]._read_only));
|
|
CgroupV2CpuacctController* cpuacct = new CgroupV2CpuacctController(cpu);
|
|
log_debug(os, container)("Detected cgroups v2 unified hierarchy");
|
|
cleanup(cg_infos);
|
|
return new CgroupV2Subsystem(memory, cpu, cpuacct, mem_other);
|
|
}
|
|
|
|
/*
|
|
* Cgroup v1 case:
|
|
*
|
|
* Use info gathered previously from /proc/self/cgroup
|
|
* and map host mount point to
|
|
* local one via /proc/self/mountinfo content above
|
|
*
|
|
* Docker example:
|
|
* 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
|
|
*
|
|
* Host example:
|
|
* 5:memory:/user.slice
|
|
*
|
|
* Construct a path to the process specific memory and cpuset
|
|
* cgroup directory.
|
|
*
|
|
* For a container running under Docker from memory example above
|
|
* the paths would be:
|
|
*
|
|
* /sys/fs/cgroup/memory
|
|
*
|
|
* For a Host from memory example above the path would be:
|
|
*
|
|
* /sys/fs/cgroup/memory/user.slice
|
|
*
|
|
*/
|
|
assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected");
|
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
|
CgroupInfo info = cg_infos[i];
|
|
if (info._data_complete) { // pids controller might have incomplete data
|
|
if (strcmp(info._name, "memory") == 0) {
|
|
memory = new CgroupV1MemoryController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only));
|
|
memory->set_subsystem_path(info._cgroup_path);
|
|
} else if (strcmp(info._name, "cpuset") == 0) {
|
|
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
|
|
cpuset->set_subsystem_path(info._cgroup_path);
|
|
} else if (strcmp(info._name, "cpu") == 0) {
|
|
cpu = new CgroupV1CpuController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only));
|
|
cpu->set_subsystem_path(info._cgroup_path);
|
|
} else if (strcmp(info._name, "cpuacct") == 0) {
|
|
cpuacct = new CgroupV1CpuacctController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only));
|
|
cpuacct->set_subsystem_path(info._cgroup_path);
|
|
} else if (strcmp(info._name, "pids") == 0) {
|
|
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
|
|
pids->set_subsystem_path(info._cgroup_path);
|
|
}
|
|
} else {
|
|
log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]);
|
|
}
|
|
}
|
|
cleanup(cg_infos);
|
|
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
|
|
}
|
|
|
|
void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos,
|
|
int controller,
|
|
const char* name,
|
|
char* mount_path,
|
|
char* root_path,
|
|
bool read_only) {
|
|
if (cg_infos[controller]._mount_path != nullptr) {
|
|
// On some systems duplicate controllers get mounted in addition to
|
|
// the main cgroup controllers most likely under /sys/fs/cgroup. In that
|
|
// case pick the one under /sys/fs/cgroup and discard others.
|
|
if (strstr(cg_infos[controller]._mount_path, "/sys/fs/cgroup") != cg_infos[controller]._mount_path) {
|
|
log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.",
|
|
name, mount_path, cg_infos[controller]._mount_path);
|
|
os::free(cg_infos[controller]._mount_path);
|
|
os::free(cg_infos[controller]._root_mount_path);
|
|
cg_infos[controller]._mount_path = os::strdup(mount_path);
|
|
cg_infos[controller]._root_mount_path = os::strdup(root_path);
|
|
cg_infos[controller]._read_only = read_only;
|
|
} else {
|
|
log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.",
|
|
name, cg_infos[controller]._mount_path, mount_path);
|
|
}
|
|
} else {
|
|
cg_infos[controller]._mount_path = os::strdup(mount_path);
|
|
cg_infos[controller]._root_mount_path = os::strdup(root_path);
|
|
cg_infos[controller]._read_only = read_only;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Determine whether or not the mount options, which are comma separated,
|
|
* contain the 'ro' string.
|
|
*/
|
|
static bool find_ro_opt(char* mount_opts) {
|
|
char* token;
|
|
char* mo_ptr = mount_opts;
|
|
// mount options are comma-separated (man proc).
|
|
while ((token = strsep(&mo_ptr, ",")) != nullptr) {
|
|
if (strcmp(token, "ro") == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Read values of a /proc/self/mountinfo line into variables. For cgroups v1
|
|
* super options are needed. On cgroups v2 super options are not used.
|
|
*
|
|
* The scanning of a single mountinfo line entry is as follows:
|
|
*
|
|
* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
|
|
* (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12)
|
|
*
|
|
* The numbers in parentheses are labels for the descriptions below:
|
|
*
|
|
* (1) mount ID: matched with '%*d' and discarded
|
|
* (2) parent ID: matched with '%*d' and discarded
|
|
* (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded
|
|
* (4) minor: ---'
|
|
* (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty.
|
|
* (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty.
|
|
* (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty.
|
|
* (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen
|
|
* (9) separator: ---' and discarded. Note: The discarded match is space characters if there
|
|
* are no optionals. Otherwise it includes the optional fields as well.
|
|
* (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type'
|
|
* (11) mount source: matched with '%*s' and discarded
|
|
* (12) super options: matched with '%s' and captured in 'tmpcgroups'
|
|
*/
|
|
static inline bool match_mount_info_line(char* line,
|
|
char* tmproot,
|
|
char* tmpmount,
|
|
char* mount_opts,
|
|
char* tmp_fs_type,
|
|
char* tmpcgroups) {
|
|
return sscanf(line,
|
|
"%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s",
|
|
tmproot,
|
|
tmpmount,
|
|
mount_opts,
|
|
tmp_fs_type,
|
|
tmpcgroups) == 5;
|
|
}
|
|
|
|
bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
|
|
bool cgroups_v2_enabled,
|
|
const char* controllers_file,
|
|
const char* proc_self_cgroup,
|
|
const char* proc_self_mountinfo,
|
|
u1* flags) {
|
|
FILE *mntinfo = nullptr;
|
|
FILE* controllers = nullptr;
|
|
FILE *cgroup = nullptr;
|
|
char buf[MAXPATHLEN+1];
|
|
char *p;
|
|
// true iff all required controllers, memory, cpu, cpuacct are enabled
|
|
// at the kernel level.
|
|
// pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
|
|
// cpuset might not be enabled on newer Linux distros (Fedora 41)
|
|
bool all_required_controllers_enabled = true;
|
|
|
|
// If cgroups v2 is enabled, open /sys/fs/cgroup/cgroup.controllers. If not, open /proc/cgroups.
|
|
controllers = os::fopen(controllers_file, "r");
|
|
if (controllers == nullptr) {
|
|
log_debug(os, container)("Can't open %s, %s", controllers_file, os::strerror(errno));
|
|
*flags = INVALID_CGROUPS_GENERIC;
|
|
return false;
|
|
}
|
|
|
|
if (cgroups_v2_enabled) {
|
|
/*
|
|
* cgroups v2 is enabled. For cgroups v2 (unified hierarchy), the cpu and memory
|
|
* controllers must be enabled.
|
|
*/
|
|
if ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) {
|
|
char* controller = nullptr;
|
|
#define ISSPACE_CHARS " \n\t\r\f\v"
|
|
while ((controller = strsep(&p, ISSPACE_CHARS)) != nullptr) {
|
|
int i;
|
|
if ((i = cg_v2_controller_index(controller)) != -1) {
|
|
cg_infos[i]._name = os::strdup(controller);
|
|
cg_infos[i]._enabled = true;
|
|
if (i == PIDS_IDX || i == CPUSET_IDX) {
|
|
log_debug(os, container)("Detected optional %s controller entry in %s",
|
|
controller, controllers_file);
|
|
}
|
|
}
|
|
}
|
|
#undef ISSPACE_CHARS
|
|
} else {
|
|
log_debug(os, container)("Can't read %s, %s", controllers_file, os::strerror(errno));
|
|
*flags = INVALID_CGROUPS_V2;
|
|
fclose(controllers);
|
|
return false;
|
|
}
|
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
|
// cgroups v2 does not have cpuacct.
|
|
if (i == CPUACCT_IDX) {
|
|
continue;
|
|
}
|
|
// For cgroups v2, cpuacct is rolled into cpu, and the pids and cpuset controllers
|
|
// are optional; the remaining controllers, cpu and memory, are required.
|
|
if (i == CPU_IDX || i == MEMORY_IDX) {
|
|
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
|
|
}
|
|
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
|
|
log_debug(os, container)("controller %s is not enabled", cg_controller_name[i]);
|
|
}
|
|
}
|
|
} else {
|
|
/*
|
|
* The /sys/fs/cgroup filesystem magic hint suggests we have cg v1. Read /proc/cgroups; for
|
|
* cgroups v1 hierarchy (hybrid or legacy), cpu, cpuacct, cpuset, and memory controllers must
|
|
* have non-zero for the hierarchy ID field and relevant controllers mounted.
|
|
*/
|
|
while ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) {
|
|
char name[MAXPATHLEN+1];
|
|
int hierarchy_id;
|
|
int enabled;
|
|
|
|
// Format of /proc/cgroups documented via man 7 cgroups
|
|
if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
|
|
continue;
|
|
}
|
|
if (strcmp(name, "memory") == 0) {
|
|
cg_infos[MEMORY_IDX]._name = os::strdup(name);
|
|
cg_infos[MEMORY_IDX]._hierarchy_id = hierarchy_id;
|
|
cg_infos[MEMORY_IDX]._enabled = (enabled == 1);
|
|
} else if (strcmp(name, "cpuset") == 0) {
|
|
cg_infos[CPUSET_IDX]._name = os::strdup(name);
|
|
cg_infos[CPUSET_IDX]._hierarchy_id = hierarchy_id;
|
|
cg_infos[CPUSET_IDX]._enabled = (enabled == 1);
|
|
} else if (strcmp(name, "cpu") == 0) {
|
|
cg_infos[CPU_IDX]._name = os::strdup(name);
|
|
cg_infos[CPU_IDX]._hierarchy_id = hierarchy_id;
|
|
cg_infos[CPU_IDX]._enabled = (enabled == 1);
|
|
} else if (strcmp(name, "cpuacct") == 0) {
|
|
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
|
|
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
|
|
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
|
|
} else if (strcmp(name, "pids") == 0) {
|
|
log_debug(os, container)("Detected optional pids controller entry in %s", controllers_file);
|
|
cg_infos[PIDS_IDX]._name = os::strdup(name);
|
|
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
|
|
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
|
|
}
|
|
}
|
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
|
// pids controller is optional. All other controllers are required
|
|
if (i != PIDS_IDX) {
|
|
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
|
|
}
|
|
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
|
|
log_debug(os, container)("controller %s is not enabled", cg_controller_name[i]);
|
|
}
|
|
}
|
|
}
|
|
fclose(controllers);
|
|
|
|
if (!all_required_controllers_enabled) {
|
|
// one or more required controllers disabled, disable container support
|
|
log_debug(os, container)("One or more required controllers disabled at kernel level.");
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_GENERIC;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Read /proc/self/cgroup and determine:
|
|
* - the cgroup path for cgroups v2 or
|
|
* - on a cgroups v1 system, collect info for mapping
|
|
* the host mount point to the local one via /proc/self/mountinfo below.
|
|
*/
|
|
cgroup = os::fopen(proc_self_cgroup, "r");
|
|
if (cgroup == nullptr) {
|
|
log_debug(os, container)("Can't open %s, %s",
|
|
proc_self_cgroup, os::strerror(errno));
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_GENERIC;
|
|
return false;
|
|
}
|
|
|
|
while ((p = fgets(buf, MAXPATHLEN, cgroup)) != nullptr) {
|
|
char *controllers;
|
|
char *token;
|
|
char *hierarchy_id_str;
|
|
int hierarchy_id;
|
|
char *cgroup_path;
|
|
|
|
hierarchy_id_str = strsep(&p, ":");
|
|
hierarchy_id = atoi(hierarchy_id_str);
|
|
/* Get controllers and base */
|
|
controllers = strsep(&p, ":");
|
|
cgroup_path = strsep(&p, "\n");
|
|
|
|
if (controllers == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
while (!cgroups_v2_enabled && (token = strsep(&controllers, ",")) != nullptr) {
|
|
if (strcmp(token, "memory") == 0) {
|
|
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
|
|
cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
|
|
} else if (strcmp(token, "cpuset") == 0) {
|
|
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset");
|
|
cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path);
|
|
} else if (strcmp(token, "cpu") == 0) {
|
|
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
|
|
cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
|
|
} else if (strcmp(token, "cpuacct") == 0) {
|
|
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacct");
|
|
cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
|
|
} else if (strcmp(token, "pids") == 0) {
|
|
assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
|
|
cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
|
|
cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
|
|
}
|
|
}
|
|
if (cgroups_v2_enabled) {
|
|
// On some systems we have mixed cgroups v1 and cgroups v2 controllers (e.g. freezer on cg1 and
|
|
// all relevant controllers on cg2). Only set the cgroup path when we see a hierarchy id of 0.
|
|
if (hierarchy_id != 0) {
|
|
continue;
|
|
}
|
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
|
assert(cg_infos[i]._cgroup_path == nullptr, "cgroup path must only be set once");
|
|
cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
|
|
}
|
|
}
|
|
}
|
|
fclose(cgroup);
|
|
|
|
// Find various mount points by reading /proc/self/mountinfo
|
|
// mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
|
|
mntinfo = os::fopen(proc_self_mountinfo, "r");
|
|
if (mntinfo == nullptr) {
|
|
log_debug(os, container)("Can't open %s, %s",
|
|
proc_self_mountinfo, os::strerror(errno));
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_GENERIC;
|
|
return false;
|
|
}
|
|
|
|
bool cgroupv2_mount_point_found = false;
|
|
bool any_cgroup_mounts_found = false;
|
|
while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != nullptr) {
|
|
char tmp_fs_type[MAXPATHLEN+1];
|
|
char tmproot[MAXPATHLEN+1];
|
|
char tmpmount[MAXPATHLEN+1];
|
|
char tmpcgroups[MAXPATHLEN+1];
|
|
char mount_opts[MAXPATHLEN+1];
|
|
char *cptr = tmpcgroups;
|
|
char *token;
|
|
|
|
/* Cgroup v2 relevant info. We only look for the _mount_path iff cgroups_v2_enabled so
|
|
* as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1
|
|
* block in the hybrid case.
|
|
*
|
|
* We collect the read only mount option in the cgroup infos so as to have that
|
|
* info ready when determining is_containerized().
|
|
*/
|
|
if (cgroups_v2_enabled && match_mount_info_line(p,
|
|
tmproot,
|
|
tmpmount,
|
|
mount_opts,
|
|
tmp_fs_type,
|
|
tmpcgroups /* unused */)) {
|
|
// we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype
|
|
if (strcmp("cgroup2", tmp_fs_type) == 0) {
|
|
cgroupv2_mount_point_found = true;
|
|
any_cgroup_mounts_found = true;
|
|
// For unified we only have a single line with cgroup2 fs type.
|
|
// Therefore use that option for all CG info structs.
|
|
bool ro_option = find_ro_opt(mount_opts);
|
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
|
set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_option);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Cgroup v1 relevant info
|
|
*
|
|
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids. For each controller
|
|
* determine whether or not they show up as mounted read only or not.
|
|
*
|
|
* Example for docker:
|
|
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
|
|
*
|
|
* Example for host:
|
|
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
|
|
*
|
|
* 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
|
|
*
|
|
*/
|
|
if (match_mount_info_line(p, tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups)) {
|
|
if (strcmp("cgroup", tmp_fs_type) != 0) {
|
|
// Skip cgroup2 fs lines on hybrid or unified hierarchy.
|
|
continue;
|
|
}
|
|
while ((token = strsep(&cptr, ",")) != nullptr) {
|
|
if (strcmp(token, "memory") == 0) {
|
|
any_cgroup_mounts_found = true;
|
|
bool ro_option = find_ro_opt(mount_opts);
|
|
set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_option);
|
|
cg_infos[MEMORY_IDX]._data_complete = true;
|
|
} else if (strcmp(token, "cpuset") == 0) {
|
|
any_cgroup_mounts_found = true;
|
|
bool ro_option = find_ro_opt(mount_opts);
|
|
set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_option);
|
|
cg_infos[CPUSET_IDX]._data_complete = true;
|
|
} else if (strcmp(token, "cpu") == 0) {
|
|
any_cgroup_mounts_found = true;
|
|
bool ro_option = find_ro_opt(mount_opts);
|
|
set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_option);
|
|
cg_infos[CPU_IDX]._data_complete = true;
|
|
} else if (strcmp(token, "cpuacct") == 0) {
|
|
any_cgroup_mounts_found = true;
|
|
bool ro_option = find_ro_opt(mount_opts);
|
|
set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_option);
|
|
cg_infos[CPUACCT_IDX]._data_complete = true;
|
|
} else if (strcmp(token, "pids") == 0) {
|
|
any_cgroup_mounts_found = true;
|
|
bool ro_option = find_ro_opt(mount_opts);
|
|
set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_option);
|
|
cg_infos[PIDS_IDX]._data_complete = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fclose(mntinfo);
|
|
|
|
// Neither cgroup2 nor cgroup filesystems mounted via /proc/self/mountinfo
|
|
// No point in continuing.
|
|
if (!any_cgroup_mounts_found) {
|
|
log_trace(os, container)("No relevant cgroup controllers mounted.");
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_NO_MOUNT;
|
|
return false;
|
|
}
|
|
|
|
if (cgroups_v2_enabled) {
|
|
if (!cgroupv2_mount_point_found) {
|
|
log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_V2;
|
|
return false;
|
|
}
|
|
// Cgroups v2 case, we have all the info we need.
|
|
*flags = CGROUPS_V2;
|
|
return true;
|
|
}
|
|
|
|
// What follows is cgroups v1
|
|
log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");
|
|
|
|
if (!cg_infos[MEMORY_IDX]._data_complete) {
|
|
log_debug(os, container)("Required cgroup v1 memory subsystem not found");
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_V1;
|
|
return false;
|
|
}
|
|
if (!cg_infos[CPUSET_IDX]._data_complete) {
|
|
log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_V1;
|
|
return false;
|
|
}
|
|
if (!cg_infos[CPU_IDX]._data_complete) {
|
|
log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_V1;
|
|
return false;
|
|
}
|
|
if (!cg_infos[CPUACCT_IDX]._data_complete) {
|
|
log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
|
|
cleanup(cg_infos);
|
|
*flags = INVALID_CGROUPS_V1;
|
|
return false;
|
|
}
|
|
if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) {
|
|
log_debug(os, container)("Optional cgroup v1 pids subsystem not found");
|
|
// keep the other controller info, pids is optional
|
|
}
|
|
// Cgroups v1 case, we have all the info we need.
|
|
*flags = CGROUPS_V1;
|
|
return true;
|
|
};
|
|
|
|
void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
|
|
assert(cg_infos != nullptr, "Invariant");
|
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
|
os::free(cg_infos[i]._name);
|
|
os::free(cg_infos[i]._cgroup_path);
|
|
os::free(cg_infos[i]._root_mount_path);
|
|
os::free(cg_infos[i]._mount_path);
|
|
}
|
|
}
|
|
|
|
/* active_processor_count
|
|
*
|
|
* Calculate an appropriate number of active processors for the
|
|
* VM to use based on these three inputs.
|
|
*
|
|
* cpu affinity
|
|
* cgroup cpu quota & cpu period
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* Determine the number of available CPUs from sched_getaffinity
|
|
*
|
|
* If user specified a quota (quota != -1), calculate the number of
|
|
* required CPUs by dividing quota by period.
|
|
*
|
|
* All results of division are rounded up to the next whole number.
|
|
*
|
|
* If quotas have not been specified, sets the result reference to
|
|
* the number of active processors in the system.
|
|
*
|
|
* If quotas have been specified, the number set in the result
|
|
* reference will never exceed the number of active processors.
|
|
*
|
|
* return:
|
|
* true if there were no errors. false otherwise.
|
|
*/
|
|
bool CgroupSubsystem::active_processor_count(double& value) {
|
|
// We use a cache with a timeout to avoid performing expensive
|
|
// computations in the event this function is called frequently.
|
|
// [See 8227006].
|
|
CachingCgroupController<CgroupCpuController, double>* contrl = cpu_controller();
|
|
CachedMetric<double>* cpu_limit = contrl->metrics_cache();
|
|
if (!cpu_limit->should_check_metric()) {
|
|
value = cpu_limit->value();
|
|
log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %.2f", value);
|
|
return true;
|
|
}
|
|
|
|
int cpu_count = os::Linux::active_processor_count();
|
|
double result = -1;
|
|
if (!CgroupUtil::processor_count(contrl->controller(), cpu_count, result)) {
|
|
return false;
|
|
}
|
|
assert(result > 0 && result <= cpu_count, "must be");
|
|
// Update cached metric to avoid re-reading container settings too often
|
|
cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT);
|
|
value = result;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* memory_limit_in_bytes
|
|
*
|
|
* Return the limit of available memory for this process in the provided
|
|
* physical_memory_size_type reference. If there was no limit value set in the underlying
|
|
* interface files 'value_unlimited' is returned.
|
|
*
|
|
* return:
|
|
* false if retrieving the value failed
|
|
* true if retrieving the value was successfull and the value was
|
|
* set in the 'value' reference.
|
|
*/
|
|
bool CgroupSubsystem::memory_limit_in_bytes(physical_memory_size_type upper_bound,
|
|
physical_memory_size_type& value) {
|
|
CachingCgroupController<CgroupMemoryController, physical_memory_size_type>* contrl = memory_controller();
|
|
CachedMetric<physical_memory_size_type>* memory_limit = contrl->metrics_cache();
|
|
if (!memory_limit->should_check_metric()) {
|
|
value = memory_limit->value();
|
|
return true;
|
|
}
|
|
physical_memory_size_type mem_limit = 0;
|
|
if (!contrl->controller()->read_memory_limit_in_bytes(upper_bound, mem_limit)) {
|
|
return false;
|
|
}
|
|
// Update cached metric to avoid re-reading container settings too often
|
|
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
|
|
value = mem_limit;
|
|
return true;
|
|
}
|
|
|
|
bool CgroupController::read_string(const char* filename, char* buf, size_t buf_size) {
|
|
assert(buf != nullptr, "buffer must not be null");
|
|
assert(filename != nullptr, "filename must be given");
|
|
const char* s_path = subsystem_path();
|
|
if (s_path == nullptr) {
|
|
log_debug(os, container)("read_string: subsystem path is null");
|
|
return false;
|
|
}
|
|
|
|
stringStream file_path;
|
|
file_path.print_raw(s_path);
|
|
file_path.print_raw(filename);
|
|
|
|
if (file_path.size() > MAXPATHLEN) {
|
|
log_debug(os, container)("File path too long %s, %s", file_path.base(), filename);
|
|
return false;
|
|
}
|
|
const char* absolute_path = file_path.freeze();
|
|
log_trace(os, container)("Path to %s is %s", filename, absolute_path);
|
|
|
|
FILE* fp = os::fopen(absolute_path, "r");
|
|
if (fp == nullptr) {
|
|
log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno));
|
|
return false;
|
|
}
|
|
|
|
// Read a single line into the provided buffer.
|
|
// At most buf_size - 1 characters.
|
|
char* line = fgets(buf, buf_size, fp);
|
|
fclose(fp);
|
|
if (line == nullptr) {
|
|
log_debug(os, container)("Empty file %s", absolute_path);
|
|
return false;
|
|
}
|
|
size_t len = strlen(line);
|
|
assert(len <= buf_size - 1, "At most buf_size - 1 bytes can be read");
|
|
if (line[len - 1] == '\n') {
|
|
line[len - 1] = '\0'; // trim trailing new line
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool CgroupController::read_number(const char* filename, uint64_t& result) {
|
|
char buf[1024];
|
|
bool is_ok = read_string(filename, buf, 1024);
|
|
if (!is_ok) {
|
|
return false;
|
|
}
|
|
int matched = sscanf(buf, UINT64_FORMAT, &result);
|
|
if (matched == 1) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool CgroupController::read_number_handle_max(const char* filename, uint64_t& result) {
|
|
char buf[1024];
|
|
bool is_ok = read_string(filename, buf, 1024);
|
|
if (!is_ok) {
|
|
return false;
|
|
}
|
|
uint64_t val = 0;
|
|
if (!limit_from_str(buf, val)) {
|
|
return false;
|
|
}
|
|
result = val;
|
|
return true;
|
|
}
|
|
|
|
bool CgroupController::read_numerical_key_value(const char* filename, const char* key, uint64_t& result) {
|
|
assert(key != nullptr, "key must be given");
|
|
assert(filename != nullptr, "file to search in must be given");
|
|
const char* s_path = subsystem_path();
|
|
if (s_path == nullptr) {
|
|
log_debug(os, container)("read_numerical_key_value: subsystem path is null");
|
|
return false;
|
|
}
|
|
|
|
stringStream file_path;
|
|
file_path.print_raw(s_path);
|
|
file_path.print_raw(filename);
|
|
|
|
if (file_path.size() > MAXPATHLEN) {
|
|
log_debug(os, container)("File path too long %s, %s", file_path.base(), filename);
|
|
return false;
|
|
}
|
|
const char* absolute_path = file_path.freeze();
|
|
log_trace(os, container)("Path to %s is %s", filename, absolute_path);
|
|
FILE* fp = os::fopen(absolute_path, "r");
|
|
if (fp == nullptr) {
|
|
log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno));
|
|
return false;
|
|
}
|
|
|
|
const int buf_len = MAXPATHLEN+1;
|
|
char buf[buf_len];
|
|
char* line = fgets(buf, buf_len, fp);
|
|
bool found_match = false;
|
|
// File consists of multiple lines in a "key value"
|
|
// fashion, we have to find the key.
|
|
const size_t key_len = strlen(key);
|
|
for (; line != nullptr; line = fgets(buf, buf_len, fp)) {
|
|
char after_key = line[key_len];
|
|
if (strncmp(line, key, key_len) == 0
|
|
&& isspace((unsigned char) after_key) != 0
|
|
&& after_key != '\n') {
|
|
// Skip key, skip space
|
|
const char* value_substr = line + key_len + 1;
|
|
int matched = sscanf(value_substr, UINT64_FORMAT, &result);
|
|
found_match = matched == 1;
|
|
if (found_match) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
fclose(fp);
|
|
if (found_match) {
|
|
return true;
|
|
}
|
|
log_debug(os, container)("Type %s (key == %s) not found in file %s", UINT64_FORMAT,
|
|
key, absolute_path);
|
|
return false;
|
|
}
|
|
|
|
bool CgroupController::read_numerical_tuple_value(const char* filename, bool use_first, uint64_t& result) {
|
|
char buf[1024];
|
|
bool is_ok = read_string(filename, buf, 1024);
|
|
if (!is_ok) {
|
|
return false;
|
|
}
|
|
char token[1024];
|
|
const int matched = sscanf(buf, (use_first ? "%1023s %*s" : "%*s %1023s"), token);
|
|
if (matched != 1) {
|
|
return false;
|
|
}
|
|
uint64_t val = 0;
|
|
if (!limit_from_str(token, val)) {
|
|
return false;
|
|
}
|
|
result = val;
|
|
return true;
|
|
}
|
|
|
|
bool CgroupController::limit_from_str(char* limit_str, uint64_t& value) {
|
|
if (limit_str == nullptr) {
|
|
return false;
|
|
}
|
|
// Unlimited memory in cgroups is the literal string 'max' for
|
|
// some controllers, for example the pids controller.
|
|
if (strcmp("max", limit_str) == 0) {
|
|
value = value_unlimited;
|
|
return true;
|
|
}
|
|
uint64_t limit;
|
|
if (sscanf(limit_str, UINT64_FORMAT, &limit) != 1) {
|
|
return false;
|
|
}
|
|
value = limit;
|
|
return true;
|
|
}
|
|
|
|
// CgroupSubsystem implementations
|
|
bool CgroupSubsystem::memory_and_swap_limit_in_bytes(physical_memory_size_type upper_mem_bound,
|
|
physical_memory_size_type upper_swap_bound,
|
|
physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->memory_and_swap_limit_in_bytes(upper_mem_bound,
|
|
upper_swap_bound,
|
|
value);
|
|
}
|
|
|
|
bool CgroupSubsystem::memory_and_swap_usage_in_bytes(physical_memory_size_type upper_mem_bound,
|
|
physical_memory_size_type upper_swap_bound,
|
|
physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->memory_and_swap_usage_in_bytes(upper_mem_bound,
|
|
upper_swap_bound,
|
|
value);
|
|
}
|
|
|
|
bool CgroupSubsystem::memory_soft_limit_in_bytes(physical_memory_size_type upper_bound,
|
|
physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->memory_soft_limit_in_bytes(upper_bound, value);
|
|
}
|
|
|
|
bool CgroupSubsystem::memory_throttle_limit_in_bytes(physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->memory_throttle_limit_in_bytes(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::memory_usage_in_bytes(physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->memory_usage_in_bytes(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::memory_max_usage_in_bytes(physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->memory_max_usage_in_bytes(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::rss_usage_in_bytes(physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->rss_usage_in_bytes(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::cache_usage_in_bytes(physical_memory_size_type& value) {
|
|
return memory_controller()->controller()->cache_usage_in_bytes(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::cpu_quota(int& value) {
|
|
return cpu_controller()->controller()->cpu_quota(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::cpu_period(int& value) {
|
|
return cpu_controller()->controller()->cpu_period(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::cpu_shares(int& value) {
|
|
return cpu_controller()->controller()->cpu_shares(value);
|
|
}
|
|
|
|
bool CgroupSubsystem::cpu_usage_in_micros(uint64_t& value) {
|
|
return cpuacct_controller()->cpu_usage_in_micros(value);
|
|
}
|
|
|
|
void CgroupSubsystem::print_version_specific_info(outputStream* st, physical_memory_size_type upper_mem_bound) {
|
|
memory_controller()->controller()->print_version_specific_info(st, upper_mem_bound);
|
|
}
|