标签搜索

实时日志监控告警脚本

mrui
2025-09-11 / 0 评论 / 3 阅读 / 正在检测是否收录...
#!/bin/bash
# 实时日志监控告警脚本

LOG_FILE="/var/log/nginx/access.log"
ERROR_THRESHOLD=50  # 每分钟错误数阈值
ALERT_EMAIL="admin@example.com"

monitor_realtime() {
    echo "开始实时监控 $LOG_FILE..."
    
    # 创建临时文件记录状态
    TEMP_DIR="/tmp/log_monitor"
    mkdir -p "$TEMP_DIR"
    
    tail -F "$LOG_FILE" | while read line; do
        # 提取时间戳 (分钟级别)
        timestamp=$(echo "$line" | awk '{print substr($4, 2, 16)}')
        current_minute=$(date '+%d/%b/%Y:%H:%M')
        
        # 检查是否是错误请求
        if echo "$line" | grep -qE " (4[0-9]{2}|5[0-9]{2}) "; then
            error_file="$TEMP_DIR/errors_$current_minute"
            echo "$line" >> "$error_file"
            
            # 统计当前分钟的错误数
            error_count=$(wc -l < "$error_file" 2>/dev/null || echo 0)
            
            if [ "$error_count" -ge "$ERROR_THRESHOLD" ]; then
                send_alert "$current_minute" "$error_count" "$error_file"
                # 重置计数器,避免重复告警
                > "$error_file"
            fi
        fi
        
        # 清理旧的临时文件
        find "$TEMP_DIR" -name "errors_*" -mmin +5 -delete
    done
}

send_alert() {
    local time_period=$1
    local error_count=$2
    local error_file=$3
    
    echo "🚨 告警: $time_period 发生 $error_count 个错误请求"
    
    # 分析错误类型
    echo "错误分析:"
    awk '{print $9, $1, $7}' "$error_file" | \
    sort | uniq -c | sort -nr | head -5 | \
    awk '{printf "状态码%s: %d次, IP:%s, URL:%s\n", $2, $1, $3, $4}'
    
    # 发送邮件告警 (需要配置sendmail)
    if command -v mail >/dev/null; then
        {
            echo "时间: $time_period"
            echo "错误数量: $error_count"
            echo "详细信息:"
            head -10 "$error_file"
        } | mail -s "网站错误告警" "$ALERT_EMAIL"
    fi
}

# 启动监控
monitor_realtime
0

评论 (0)

取消