k8s容器化部署Prometheus+grafana

环境简介

  • k8s版本:1.15.2,三台主机
  • 安装方式:kubeadm

主机说明:

操作系统 IP地址 角色 CPU 内存 主机名
centos7 192.168.50.13 master 2 2 k8s-master
centos7 192.168.50.14 node 2 2 k8s-node1
centos7 192.168.50.15 node 2 2 k8s-node2

名称空间

[root@k8s-master ~]# cd prometheus/prometheus/
[root@k8s-master prometheus]# vim ns.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: monitoring

[root@k8s-master prometheus]# kubectl apply -f ns.yaml

部署Prometheus

rbac授权

相当于在k8s中创建一个有权限的用户,Prometheus通过这个用户获取数据

[root@k8s-master prometheus]# vim rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups:
  - ""
  resources:
  - nodes
  - services
  - endpoints
  - pods
  - nodes/proxy
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - "extensions"
  resources:
    - ingresses
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - configmaps
  - nodes/metrics
  verbs:
  - get
- nonResourceURLs:
  - /metrics
  verbs:
  - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: monitoring
---    #获取永久sa的token
apiVersion: v1
kind: Secret
metadata:
  name: prometheus-secrets
  annotations:
    kubernetes.io/service-account.name: "prometheus"
type: kubernetes.io/service-account-token

 查看token

kubectl describe secret prometheus-secrets -n monitoring

使用configMap 创建报警规则文件和配置文件

[root@k8s-master prometheus]# vim prometheus-rule.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-rule
  namespace: monitoring
data:
  example01.yaml: |
    groups:
    - name: node
      rules:
      - alert: NodeDown
        expr: up == 0
        for: 3m
        labels:
          severity: critical
        annotations:
          summary: "{
  
  { $labels.instance }}: down"
          description: "{
  
  { $labels.instance }} has been down for more than 3m"
          value: "{
  
  { $value }}"

      - alert: NodeCPUHigh
        expr: (1 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100 > 75
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "{
  
  {$labels.instance}}: High CPU usage"
          description: "{
  
  {$labels.instance}}: CPU usage is above 75%"
          value: "{
  
  { $value }}"

      - alert: NodeCPUIowaitHigh
        expr: avg by (instance) (irate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 50
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "{
  
  {$labels.instance}}: High CPU iowait usage"
          description: "{
  
  {$labels.instance}}: CPU iowait usage is above 50%"
          value: "{
  
  { $value }}"

      - alert: NodeMemoryUsageHigh
        expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 > 90
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "{
  
  {$labels.instance}}: High memory usage"
          description: "{
  
  {$labels.instance}}: Memory usage is above 90%"
          value: "{
  
  { $value }}"

      - alert: NodeDiskRootLow
        expr: (1 - node_filesystem_avail_bytes{fstype=~"ext.*|xfs",mountpoint ="/"} / node_filesystem_size_bytes{fstype=~"ext.*|xfs",mountpoint ="/"}) * 100 > 80
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "{
  
  {$labels.instance}}: Low disk(the / partition) space"
          description: "{
  
  {$labels.instance}}: Disk(the / partition) usage is above 80%"
          value: "{
  
  { $value }}"
        
      - alert: NodeDiskBootLow
        expr: (1 - node_filesystem_avail_bytes{fstype=~"ext.*|xfs",mountpoint ="/boot"} / node_filesystem_size_bytes{fstype=~"ext.*|xfs",mountpoint ="/boot"}) * 100 > 80
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "{
  
  {$labels.instance}}: Low disk(the /boot partition) space"
          description: "{
  
  {$labels.instance}}: Disk(the /boot partition) usage is above 80%"
          value: "{
  
  { $value }}"

      - alert: NodeLoad5High
        expr: (node_load5) > (count by (instance) (node_cpu_seconds_total{mode='system'}) * 2)
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "{
  
  {$labels.instance}}: Load(5m) High"
          description: "{
  
  {$labels.instance}}: Load(5m) is 2 times the number of CPU cores"
          value: "{
  
  { $value }}"

 

[root@k8s-master prometheus]# vim prometheus-configMap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitoring
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      evaluation_interval: 15s
    
    alerting:
      alertmanagers:
      - static_configs:
        - targets:
          - alertmanager.monitoring.svc.cluster.local:9093 
    rule_files:
      - "rules/*.yaml"  
    scrape_configs:
    - job_name: 'prometheus'
      static_configs:
      - targets: ['localhost:9090']
    
    - job_name: 'kubernetes-nodes-cadvisor'
      metrics_path: /metrics
      scheme: https
      kubernetes_sd_configs:
      - role: node
        api_server: https://192.168.50.13:6443
        bearer_token_file: /opt/prometh
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值