本篇文章为大家展示了k8s cpu limit node节点异常 怎么办,内容简明扼要并且容易理解,绝对能使你眼前一亮,通过这篇文章的详细介绍希望你能有所收获。
日志信息:
OCI runtime create failed: container_linux.go:348:
starting container process caused "process_linux.go:402: container init caused \"process_linux.go:367:
setting cgroup config for procHooks process caused \\\"failed to write 100000 to
cpu.cfs_period_us: write /sys/fs/cgroup/cpu,cpuacct/kubepods/burstable/pod0f117614-9bae-11e9-b422-525400530fa1/fa79711ac936f4650b00bab66e21f5efb64fac642aa38e9c314385ab4023b484/cpu.cfs_period_us:
invalid argument\\\"\"": unknown
从日志信息描述来看:cgroup cput 参数:cpu.cfs_period_us: invalid argument\\\"\"": unknown 有问题!
参数文件目录:/sys/fs/cgroup/cpu,cpuacct/kubepods/burstable/pod0f117614-9bae-11e9-b422-525400530fa1/fa79711ac936f4650b00bab66e21f5efb64fac642aa38e9c314385ab4023b484/cpu.cfs_period_us
具体原因导致未知!
出现的症状:
新调度过来的pod起步来了,报上面那个错误,以前的pod好像没有问题
https://my.oschina.net/xiaominmin/blog/3068378
https://kubernetes.io/zh/docs/tasks/administer-cluster/cpu-management-policies/
https://qingwave.github.io/2019/01/09/%E6%B7%B1%E5%85%A5%E7%90%86%E8%A7%A3K8s%E8%B5%84%E6%BA%90%E9%99%90%E5%88%B6/#%E5%86%85%E5%AD%98%E9%99%90%E5%88%B6
https://www.yangcs.net/posts/understanding-resource-limits-in-kubernetes-cpu-time/
原因:内核问题
解决:node 执行脚本
#/usr/bin/python2
"""Fixs a CFS bug in Container-optimized OS (COS) nodes.
The bug is described at:
https://docs.google.com/document/d/13KLD__6A935igLXpTFFomqfclATC89nAkhPOxsuKA0I/edit#
Examples:
To check script sanity in COS VM:
sudo python fix-cfs.py --dry_run
To fix cfs bug in COS VM:
sudo python fix-cfs.py
When running in a container/DaemonSet:
sudo python fix-cfs.py --sys=/host/sys --dry_run
sudo python fix-cfs.py --sys=/host/sys
sudo python fix-cfs.py --sys=/host/sys --interval=10
"""
from __future__ import print_function
import argparse
import math
import os
import time
def _ParseCommandLine():
parser = argparse.ArgumentParser(description='Fix cfs bug.')
parser.add_argument('--dry_run', dest='dry_run', action='store_true',
help='Whether or not to run in dry_run mode')
parser.set_defaults(dry_run=False)
parser.add_argument('--sys', type=str, default='/sys',
help='The root directory of the /sys fs')
parser.add_argument('--interval', type=int,
help='Seconds to wait between invocations')
return parser.parse_args()
def ReadFile(directory, filename):
with open(os.path.join(directory, filename), 'r') as f:
return f.read()
def WriteFile(directory, filename, number):
with open(os.path.join(directory, filename), 'w') as f:
f.write('%d' % number)
def ListSubdirs(directory):
return [
os.path.join(directory, f)
for f in os.listdir(directory)
if os.path.isdir(os.path.join(directory, f))
]
def CalculateNewQuotaPeriodForPod(quota, period):
scaled_times = math.ceil(
(math.log(period) - math.log(100000)) / (math.log(147) - math.log(128)))
new_period = 100000
new_quota = math.floor(
quota * math.pow(128.0 / 147, scaled_times)) + 1 + scaled_times
return new_quota, new_period
def CalculateNewQuotaPeriodForContainer(quota, period):
scaled_times = math.ceil(
(math.log(period) - math.log(100000)) / (math.log(147) - math.log(128)))
new_period = 100000
new_quota = math.floor(quota * math.pow(128.0 / 147, scaled_times))
new_quota = max(new_quota, 1000)
return new_quota, new_period
def FixPodIfAffected(pod_dir, dry_run):
try:
quota = long(ReadFile(pod_dir, 'cpu.cfs_quota_us'))
period = long(ReadFile(pod_dir, 'cpu.cfs_period_us'))
except IOError:
print('Skipping pod_dir %s. The pod may have disappeared from cfs before',
' it could be examined' % (pod_dir))
return
if quota <= 0 or period <= 0:
return
if period <= 100000:
return
print('Found a problem:')
print('pod %s has quota %d, period %d' % (pod_dir, quota, period))
new_quota, new_period = CalculateNewQuotaPeriodForPod(quota, period)
if dry_run:
print('dry_run: would fix pod %s with quota %d, period %d' %
(pod_dir, new_quota, new_period))
return
try:
WriteFile(pod_dir, 'cpu.cfs_period_us', new_period)
WriteFile(pod_dir, 'cpu.cfs_quota_us', new_quota)
print('fixed pod %s with quota %d, period %d' %
(pod_dir, new_quota, new_period))
except IOError:
print('Warning: failed to fix cfs at pod_dir %s, ',
'the directory may have disappeared.')
return
def FixContainerIfAffected(container_dir, dry_run):
try:
quota = long(ReadFile(container_dir, 'cpu.cfs_quota_us'))
period = long(ReadFile(container_dir, 'cpu.cfs_period_us'))
except IOError:
print('Skipping container_dir %s. The container may have disappeared from',
' cfs before it could be examined' % (container_dir))
return
if quota <= 0 or period <= 0:
return
if period <= 100000:
return
print('Found a problem:')
print('container %s has quota %d, period %d' % (container_dir, quota, period))
new_quota, new_period = CalculateNewQuotaPeriodForContainer(quota, period)
if dry_run:
print('dry_run: would fix container %s with quota %d, period %d' %
(container_dir, new_quota, new_period))
return
try:
WriteFile(container_dir, 'cpu.cfs_quota_us', new_quota)
WriteFile(container_dir, 'cpu.cfs_period_us', new_period)
print('fixed container %s with quota %d, period %d' %
(container_dir, new_quota, new_period))
except IOError:
print('Warning: failed to fix cfs at container_dir %s, ',
'the directory may have disappeared.')
return
def FixAllPods(sysfs='/sys', dry_run=True):
pods_dir = os.path.join(sysfs, 'fs/cgroup/cpu/kubepods/burstable')
pod_dirs = ListSubdirs(pods_dir)
for pod_dir in pod_dirs:
FixPodIfAffected(pod_dir, dry_run)
container_dirs = ListSubdirs(pod_dir)
for container_dir in container_dirs:
FixContainerIfAffected(container_dir, dry_run)
def main():
args = _ParseCommandLine()
while True:
FixAllPods(sysfs=args.sys, dry_run=args.dry_run)
if args.interval:
time.sleep(args.interval)
else:
break
if __name__ == '__main__':
main()
上述内容就是k8s cpu limit node节点异常 怎么办,你们学到知识或技能了吗?如果还想学到更多技能或者丰富自己的知识储备,欢迎关注亿速云行业资讯频道。
亿速云「云服务器」,即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘,价格低至29元/月。点击查看>>
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。
原文链接:https://my.oschina.net/xiaominmin/blog/3068251