-
Notifications
You must be signed in to change notification settings - Fork 75
Limit gProfiler memory & CPU usage and --log-usage support in exe mode. #564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -18,6 +18,9 @@ | |||||
| from typing import Iterable, Optional, Type, cast | ||||||
|
|
||||||
| import configargparse | ||||||
| from granulate_utils.exceptions import AlreadyInCgroup, UnsupportedCGroupV2 | ||||||
| from granulate_utils.linux.cgroups.cpu_cgroup import CpuCgroup | ||||||
| from granulate_utils.linux.cgroups.memory_cgroup import MemoryCgroup | ||||||
| from granulate_utils.linux.ns import is_running_in_init_pid | ||||||
| from granulate_utils.linux.process import is_process_running | ||||||
| from granulate_utils.metadata import Metadata | ||||||
|
|
@@ -602,6 +605,30 @@ def parse_cmd_args() -> configargparse.Namespace: | |||||
| " beginning of a session.", | ||||||
| ) | ||||||
|
|
||||||
| parser.add_argument( | ||||||
| "--limit-memory", | ||||||
| default=(1 << 30), # 1Gi, same as in the k8s DaemonSet | ||||||
| dest="memory_limit", | ||||||
| type=int, | ||||||
| help="Limit on the memory used by gProfiler." | ||||||
| ) | ||||||
|
|
||||||
| parser.add_argument( | ||||||
| "--limit-cpu", | ||||||
| default=0.5, # 500m, same as in the k8s DaemonSet | ||||||
| dest="cpu_limit", | ||||||
| type=float, | ||||||
| help="Limit on the cpu used by gProfiler." | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
same for memory (with appropriate units) |
||||||
| ) | ||||||
|
|
||||||
| parser.add_argument( | ||||||
| "--no-cgroups", | ||||||
| action="store_true", | ||||||
| dest="disable_cgroups", | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
| default=False, | ||||||
| help="Disable the cgroups changes.", | ||||||
| ) | ||||||
|
|
||||||
| args = parser.parse_args() | ||||||
|
|
||||||
| args.perf_inject = args.nodejs_mode == "perf" | ||||||
|
|
@@ -679,12 +706,6 @@ def verify_preconditions(args: configargparse.Namespace) -> None: | |||||
| ) | ||||||
| sys.exit(1) | ||||||
|
|
||||||
| if args.log_usage and get_run_mode() not in ("k8s", "container"): | ||||||
| # TODO: we *can* move into another cpuacct cgroup, to let this work also when run as a standalone | ||||||
| # executable. | ||||||
| print("--log-usage is available only when run as a container!", file=sys.stderr) | ||||||
| sys.exit(1) | ||||||
|
|
||||||
|
|
||||||
| def setup_signals() -> None: | ||||||
| # When we run under staticx & PyInstaller, both of them forward (some of the) signals to gProfiler. | ||||||
|
|
@@ -723,6 +744,45 @@ def init_pid_file(pid_file: str) -> None: | |||||
| Path(pid_file).write_text(str(os.getpid())) | ||||||
|
|
||||||
|
|
||||||
| # Set limits and return path of the cgroup. | ||||||
| def set_limits(cpu: float, memory: int) -> str: | ||||||
| cgroups = {} | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
| logger.debug("Check if cgroup version is supported.") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This log is not needed IMO. As I suggested in https://github.com/Granulate/gprofiler/pull/564/files#r1007993619, log only the negative case. |
||||||
| try: | ||||||
| cgroups["cpu"] = CpuCgroup() | ||||||
| cgroups["memory"] = MemoryCgroup() | ||||||
| except UnsupportedCGroupV2: | ||||||
| logger.error("cgroup v2 is not supported by gProfiler, cpu and memory limits wouldn't be set.") | ||||||
|
Creatone marked this conversation as resolved.
Outdated
Creatone marked this conversation as resolved.
Outdated
|
||||||
| return | ||||||
|
|
||||||
| logger.debug("Prepare gProfiler cpu cgroup.") | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
| try: | ||||||
| cgroups["cpu"].move_to_cgroup("gprofiler", os.getpid()) | ||||||
| except AlreadyInCgroup: | ||||||
| logger.warning("gProfiler have already a cpu group.") | ||||||
|
|
||||||
| logger.debug("Set cpu limit in the cgroup.") | ||||||
| cgroups["cpu"].set_cpu_limit_cores(cpu) | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
|
|
||||||
| logger.debug("Prepare gProfiler memory cgroup.") | ||||||
| try: | ||||||
| cgroups["memory"].move_to_cgroup("gprofiler", os.getpid()) | ||||||
| except AlreadyInCgroup: | ||||||
| logger.warning("gProfiler have already a memory group.") | ||||||
|
|
||||||
| logger.debug("Set memory limit in the cgroup.") | ||||||
| cgroups["memory"].set_limit_in_bytes(memory) | ||||||
|
|
||||||
| return cgroups['cpu'].cgroup | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
|
|
||||||
|
|
||||||
| def setup_usage_logger(log_usage: bool, cgroup: str) -> UsageLoggerInterface: | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
| if log_usage: | ||||||
| return CgroupsUsageLogger(logger, cgroup) | ||||||
| else: | ||||||
| return NoopUsageLogger() | ||||||
|
|
||||||
|
|
||||||
| def main() -> None: | ||||||
| args = parse_cmd_args() | ||||||
| verify_preconditions(args) | ||||||
|
|
@@ -738,10 +798,20 @@ def main() -> None: | |||||
| remote_logs_handler, | ||||||
| ) | ||||||
|
|
||||||
| # check if there is no kill switch for managing cgroups | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
| # TODO(Creatone): Check the containerized scenario. | ||||||
| cgroup = "/" # assume we run in the root cgroup (when containerized, that's our view) | ||||||
| if not args.disable_cgroups and get_run_mode() not in ("k8s", "container"): | ||||||
| logger.info(f"Trying to set resource limits, cpu='{args.cpu_limit}' and memory='{args.memory_limit}'.") | ||||||
|
Creatone marked this conversation as resolved.
Outdated
|
||||||
| try: | ||||||
| cgroup = set_limits(args.cpu_limit, args.memory_limit) | ||||||
| except Exception: | ||||||
| logger.exception("Failed to set resource limits, continuing anyway") | ||||||
|
|
||||||
| setup_signals() | ||||||
| reset_umask() | ||||||
| # assume we run in the root cgroup (when containerized, that's our view) | ||||||
| usage_logger = CgroupsUsageLogger(logger, "/") if args.log_usage else NoopUsageLogger() | ||||||
|
|
||||||
| usage_logger = CgroupsUsageLogger(logger, cgroup) if args.log_usage else NoopUsageLogger() | ||||||
|
|
||||||
| try: | ||||||
| init_pid_file(args.pid_file) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,7 +11,17 @@ | |
|
|
||
| import psutil | ||
|
|
||
| CGROUPFS_ROOT = "/sys/fs/cgroup" # TODO extract from /proc/mounts, this may change | ||
| from granulate_utils.linux.cgroups.cgroup import find_v1_hierarchies, find_v2_hierarchy | ||
|
|
||
|
|
||
| # TODO(Creatone): Move it to granulate-utils. | ||
| def _obtain_cgroup_controller_path(cgroup: str, controller: str) -> str: | ||
| cgroup_v1_hierarchies = find_v1_hierarchies() | ||
| if len(cgroup_v1_hierarchies) != 1: | ||
| assert controller in cgroup_v1_hierarchies | ||
| return f"{cgroup_v1_hierarchies[controller]}{cgroup}" | ||
| else: | ||
| return f"{find_v2_hierarchy()}/{controller}{cgroup}" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this will work now - because cgroups v2 files are different. For example - there is no I created a ticket for cgroups v2 support. Until that's done, I suggest you raise an exception here if v2 is in use. Also I see that granulate-utils does this check: |
||
|
|
||
|
|
||
| class UsageLoggerInterface: | ||
|
|
@@ -30,7 +40,8 @@ class CpuUsageLogger(UsageLoggerInterface): | |
|
|
||
| def __init__(self, logger: logging.LoggerAdapter, cgroup: str): | ||
| self._logger = logger | ||
| self._cpuacct_usage = Path(f"{CGROUPFS_ROOT}{cgroup}cpuacct/cpuacct.usage") | ||
| cpu_root = _obtain_cgroup_controller_path(cgroup, 'cpuacct') | ||
| self._cpuacct_usage = Path(os.path.join(cpu_root, "cpuacct.usage")) | ||
| self._last_usage: Optional[int] = None | ||
| self._last_ts: Optional[float] = None | ||
|
|
||
|
|
@@ -78,7 +89,7 @@ class MemoryUsageLogger(UsageLoggerInterface): | |
|
|
||
| def __init__(self, logger: logging.LoggerAdapter, cgroup: str): | ||
| self._logger = logger | ||
| memory_root = f"{CGROUPFS_ROOT}{cgroup}memory" | ||
| memory_root = _obtain_cgroup_controller_path(cgroup, 'memory') | ||
| self._memory_usage = Path(os.path.join(memory_root, "memory.usage_in_bytes")) | ||
| self._memory_watermark = Path(os.path.join(memory_root, "memory.max_usage_in_bytes")) | ||
| self._last_usage: Optional[int] = None | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How do I specific no CPU limit? i.e
--limit-cpu noneshould be a possible parameter, not necessarily this way, but you should be able to limit ONLY the memory or CPU.