#!/bin/bash
# 实时日志监控告警脚本
LOG_FILE="/var/log/nginx/access.log"
ERROR_THRESHOLD=50 # 每分钟错误数阈值
ALERT_EMAIL="admin@example.com"
monitor_realtime() {
echo "开始实时监控 $LOG_FILE..."
# 创建临时文件记录状态
TEMP_DIR="/tmp/log_monitor"
mkdir -p "$TEMP_DIR"
tail -F "$LOG_FILE" | while read line; do
# 提取时间戳 (分钟级别)
timestamp=$(echo "$line" | awk '{print substr($4, 2, 16)}')
current_minute=$(date '+%d/%b/%Y:%H:%M')
# 检查是否是错误请求
if echo "$line" | grep -qE " (4[0-9]{2}|5[0-9]{2}) "; then
error_file="$TEMP_DIR/errors_$current_minute"
echo "$line" >> "$error_file"
# 统计当前分钟的错误数
error_count=$(wc -l < "$error_file" 2>/dev/null || echo 0)
if [ "$error_count" -ge "$ERROR_THRESHOLD" ]; then
send_alert "$current_minute" "$error_count" "$error_file"
# 重置计数器,避免重复告警
> "$error_file"
fi
fi
# 清理旧的临时文件
find "$TEMP_DIR" -name "errors_*" -mmin +5 -delete
done
}
send_alert() {
local time_period=$1
local error_count=$2
local error_file=$3
echo "🚨 告警: $time_period 发生 $error_count 个错误请求"
# 分析错误类型
echo "错误分析:"
awk '{print $9, $1, $7}' "$error_file" | \
sort | uniq -c | sort -nr | head -5 | \
awk '{printf "状态码%s: %d次, IP:%s, URL:%s\n", $2, $1, $3, $4}'
# 发送邮件告警 (需要配置sendmail)
if command -v mail >/dev/null; then
{
echo "时间: $time_period"
echo "错误数量: $error_count"
echo "详细信息:"
head -10 "$error_file"
} | mail -s "网站错误告警" "$ALERT_EMAIL"
fi
}
# 启动监控
monitor_realtime
版权属于:
mrui
评论 (0)