prometheus报警规则

注意事项:如何看报警规则适用自己 ,去prometheus 的主页面查看,这里有监控项和监控参数
在这里插入图片描述

mysql

groups:
  - name: Mysql-rules
    rules:
      - alert: "Mysql status"
        expr: mysql_up == 0
        for: 5s
        labels:
          severity: error
        annotations:
          summary: "您的 {
   
   { $labels.instance }} 的 Mysql 已停止运行!"
          description: "Mysql数据库宕机,请检查"

      - alert: "Mysql slave io thread status"
        expr: mysql_slave_status_slave_io_running == 0
        for: 5s
        labels:
          severity: error
        annotations:
          summary: "您的 {
   
   { $labels.instance }} Mysql slave io thread 已停止"
          description: "Mysql主从IO线程故障,请检测"

      - alert: "Mysql slave sql thread status"
        expr: mysql_slave_status_slave_sql_running == 0
        for: 5s
        labels:
          severity: error
        annotations:
          summary: "您的 {
   
   { $labels.instance }} Mysql slave sql thread 已停止"
          description: "Mysql主从sql线程故障,请检测"

nginx

groups:
  - name: nginx
    rules:
      - alert: "nginx status"
        expr: sum(up{
   
   job="nginx"}) < 2
        for: 1m
        labels:
          severity: error
        annotations:
          summary: "您的 {
   
   { $labels.instance }} 的 Nginx 已停止运行!"
          description: "Nginx宕机,请检查"

      - alert: NginxHighHttp4xxErrorRate
        expr: |
          sum(rate(nginx_http_requests_total{
   
   status=~"^4.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Nginx high HTTP 4xx error rate (instance {
   
   { $labels.instance }})"
          description: "Too many HTTP requests with status 4xx (> 5%)\n  VALUE = {
   
   { $value }}\n  LABELS: {
   
   { $labels }}"

      - alert: NginxHighHttp5xxErrorRate
        expr: |
          sum(rate(nginx_http_requests_total{
   
   status=~"^5.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Nginx high HTTP 5xx error rate (instance {
   
   { $labels.instance }})"
          description: "Too many HTTP requests with status 5xx (> 5%)\n  VALUE = {
   
   { $value }}\n  LABELS: {
   
   { $labels }}"

      - alert: NginxLatencyHigh
        expr: |
          histogram_quantile(0.99, sum(rate(nginx_http_request_duration_seconds_bucket[30m])) by (host, node)) > 10
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Nginx latency high (instance {
   
   { $labels.instance }})"
          description: "Nginx p99 latency is higher than 10 seconds\n  VALUE = {
   
   { $value }}\n  LABELS: {
   
   { $labels }}"


node-exporter

# 服务器资源告警策略
groups:
- name: 服务器资源监控
  rules:
  - alert: 内存使用率过高
    expr: 100 - (node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 > 90
    for: 5m  # 告警持续时间,超过这个时间才会发送给alertmanager
    labels:
      severity: 严重告警
    annotations:
      summary: "{
   
   { $labels.instance }} 内存使用率过高,请尽快处理!"
      description: "{
   
   { $labels.instance }}内存使用率超过90%,当前使用率{
   
   { $value }}%."
          
  - alert: 服务器宕机
    expr: up == 0
    for: 3m
    labels:
      severity: 严重告警
    annotations:
      summary: "{
   
   {
   
   $labels.instance}} 服务器宕机,请尽快处理!"
      description: "{
   
   {
   
   $labels.instance}} 服务器延时超过3分钟,当前状态{
   
   { $value }}. "
 
  - alert: CPU高负荷
    expr: 100 - (avg by 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值