-
-
Notifications
You must be signed in to change notification settings - Fork 134
/
Copy pathapache-4xx-report.sh
executable file
·195 lines (166 loc) · 7.31 KB
/
apache-4xx-report.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/bin/bash
#
# Report Apache client and server errors of the last 24 hours.
#
# VERSION :3.2.0
# DATE :2023-09-13
# AUTHOR :Viktor Szépe <[email protected]>
# URL :https://github.com/szepeviktor/debian-server-tools
# LICENSE :The MIT License (MIT)
# BASH-VERSION :4.2+
# DEPENDS :apt-get install mail-transport-agent apache2 ccze perl dategrep
# LOCATION :/usr/local/sbin/apache-4xx-report.sh
# CRON-DAILY :/usr/local/sbin/apache-4xx-report.sh
CCZE_CSS_URL="https://cdn.rawgit.com/szepeviktor/debian-server-tools/master/monitoring/apache-ccze.css"
CCZE_BODY_BG="#fdf6e3"
EMAIL_HEADER="Subject: [admin] HTTP client errors from $(hostname -f)
From: webserver <root>
MIME-Version: 1.0
Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: quoted-printable
"
APACHE_CONFIGS="$(find /etc/apache2/sites-enabled/ -type l -name "*.conf")"
# 1.2.3.4 - - [27/Jun/2015:14:35:41 +0200] "GET /request-uri HTTP/1.1" 404 1234 "-" "User-agent/1.1"
declare -a IGNORE_PATTERNS=(
# 408 Request Timeout on preconnect
'"-" 408 [0-9]+ "-" "-(\|Host:-)?"$'
# Bad request
'"GET / HTTP/(1\.0|1\.1|2\.0)" 400 0 "-" "-"$'
# Tunneling attempts through Amazon CloudFront for blocked news sites in China
'"GET /(ogShow\.aspx|show\.aspx|ogPipe\.aspx|oo\.aspx|1|email|img/logo-s\.gif) HTTP/(1\.0|1\.1|2\.0)" (301|403) [0-9]+ "[^"]+" "Amazon CloudFront"$'
# Favicon in a subdirectory
#'/favicon\.(ico|png) HTTP/(1\.0|1\.1|2\.0)" (403|404) [0-9]+ "'
# WordPress login page
#'"GET /wp-login\.php HTTP/(1\.0|1\.1|2\.0)" 404'
#'"GET /wp-login\.php HTTP/(1\.0|1\.1|2\.0)" 403'
#'"GET /wp-login\.php\?redirect_to=\S+ HTTP/(1\.0|1\.1|2\.0)" 404'
#'"GET /wp-login\.php\?redirect_to=\S+ HTTP/(1\.0|1\.1|2\.0)" 403'
#'"GET /[a-z]+/wp-login\.php HTTP/(1\.0|1\.1|2\.0)" 404'
#'"GET /[a-z]+/wp-login\.php HTTP/(1\.0|1\.1|2\.0)" 403'
#'"GET /[a-z]+/wp-login\.php\?redirect_to=\S+ HTTP/(1\.0|1\.1|2\.0)" 404'
#'"GET /[a-z]+/wp-login\.php\?redirect_to=\S+ HTTP/(1\.0|1\.1|2\.0)" 403'
# WordPress user enumeration
#'"GET (/\?author=|/wp-json/wp/v2/users/)[0-9]+ HTTP/(1\.0|1\.1|2\.0)" 403'
# WordPress' Windows Live Writer manifest
#'/wlwmanifest\.xml HTTP/(1\.0|1\.1|2\.0)" (403|404) [0-9]+ "'
# WordPress direct execution and readme sniffing
#'"GET /wp-content/(plugins|themes)/\S+(\.php(\?\S+)?|/readme\.txt) HTTP/(1\.0|1\.1|2\.0)" 403'
# Dynamic request from CDN
#'"GET /\S* HTTP/(1\.0|1\.1|2\.0)" 403 [0-9]+ "-" "Amazon CloudFront"$'
# cPanel's Let's Encrypt HTTP-01 challenge
#'"GET /\.well-known/acme-challenge/.* "-" "Cpanel-HTTP-Client/1\.0"$'
# .env file
#'"GET /(\S*/)?\.env HTTP/(1\.0|1\.1|2\.0)" (403|404)'
# SEO bots
#'"GET /\S* HTTP/(1\.0|1\.1|2\.0)" 404 [0-9]+ "[^"]+" "[^"]*(SemrushBot/|DotBot/|AhrefsBot/|MJ12bot/|AlphaBot/|BLEXBot/)[^"]*"$'
# Google crawler https://en.wikipedia.org/wiki/List_of_search_engines#General
#'"GET /\S* HTTP/(1\.0|1\.1|2\.0)" 404 [0-9]+ "[^"]+" "[^"]*(Googlebot/2\.1|Googlebot-Image/1\.0|Google Web Preview)[^"]*"$'
# Other search engine crawlers
#'"GET /\S* HTTP/(1\.0|1\.1|2\.0)" 404 [0-9]+ "[^"]+" "[^"]*(Baiduspider/2\.0|bingbot/2\.0|DuckDuckBot/1\.1|PetalBot;|YandexBot/3\.0|Qwantify/2\.4w)[^"]*"$'
# Feed fetchers
#'"GET /\S* HTTP/(1\.0|1\.1|2\.0)" 404 [0-9]+ "[^"]+" "[^"]*(facebookexternalhit/|Twitterbot/|Mail\.RU_Bot/Img/)[^"]*"$'
# DNS over HTTP
#'"GET /dns-query\?dns=AAABAAABAAAAAAAAA3d3dwdleGFtcGxlA2NvbQAAAQAB HTTP/(1\.0|1\.1|2\.0)"'
)
Color_html()
{
ccze --plugin httpd --html --options "cssfile=${CCZE_CSS_URL}" --color "cssbody=${CCZE_BODY_BG}" \
| perl -MMIME::QuotedPrint -p -e '$_=MIME::QuotedPrint::encode_qp($_);'
}
Maybe_sendmail()
{
local STRIPPED_BYTE
read -r -n 1 STRIPPED_BYTE \
&& {
# stdin is not empty
echo "${EMAIL_HEADER}"
{ echo -n "${STRIPPED_BYTE}"; cat; } | Color_html
} | /usr/sbin/sendmail
}
In_array()
{
local NEEDLE="$1"
local ELEMENT
shift
for ELEMENT; do
if [ "${ELEMENT}" == "${NEEDLE}" ]; then
return 0
fi
done
return 1
}
Array_to_lines()
{
while [ -n "${1}" ]; do
echo "${1}"
shift
done
}
declare -a PROCESSED_LOGS
if [ -z "${APACHE_CONFIGS}" ]; then
echo "Apace log files could not be found." 1>&2
exit 1
fi
# APACHE_LOG_DIR is defined here
# shellcheck disable=SC1091
source /etc/apache2/envvars
# For non-existent previous log files
shopt -s nullglob
LOG_EXCERPT="$(mktemp --suffix=.apachelog)"
while read -r CONFIG_FILE; do
# Skip if marked
if grep --quiet --fixed-strings '#APACHE-4XXREPORT-SKIP#' "${CONFIG_FILE}"; then
continue
fi
ACCESS_LOG="$(sed -n -e '/^\s*CustomLog\s\+\(\S\+\)\s\+\S\+.*$/I{s//\1/p;q;}' "${CONFIG_FILE}")"
SITE_USER="$(sed -n -e '/^\s*Define\s\+SITE_USER\s\+\(\S\+\).*$/I{s//\1/p;q;}' "${CONFIG_FILE}")"
# Substitute variables
ACCESS_LOG="$(sed -e "s#\${APACHE_LOG_DIR}#${APACHE_LOG_DIR}#g" -e "s#\${SITE_USER}#${SITE_USER}#g" <<<"${ACCESS_LOG}")"
# Prevent double log processing
if In_array "${ACCESS_LOG}" "${PROCESSED_LOGS[@]}"; then
continue
fi
PROCESSED_LOGS+=( "${ACCESS_LOG}" )
# Log lines for 1 day from Debian cron.daily
# https://datatracker.ietf.org/doc/html/rfc9110#section-15.5
nice dategrep --multiline \
--start "now truncate 24h add -17h35m" --end "06:25:00" "${ACCESS_LOG}".[1] "${ACCESS_LOG}" \
| grep --extended-regexp '" [45][0-9][0-9] [0-9]+ "' \
| sed -e "s#^#$(basename "${ACCESS_LOG}" .log): #"
## "+" encoded spaces, lower case hexadecimal digits
#nice dategrep --multiline \
# --start "now truncate 24h add -17h35m" --end "06:25:00" "${ACCESS_LOG}".[1] "${ACCESS_LOG}" \
# | grep --extended-regexp '([?&][^= ]+=[^& ]*\+|\?\S*%[[:xdigit:]]?[a-f])' \
# | sed -e "s#^#$(basename "${ACCESS_LOG}" .log): #"
## Facebook Ads campaign errors
#nice dategrep --multiline \
# --start "now truncate 24h add -17h35m" --end "06:25:00" "${ACCESS_LOG}".[1] "${ACCESS_LOG}" \
# | grep --extended-regexp '"GET /.*\?utm_source=facebook.* HTTP/(1\.0|1\.1|2\.0)" [345][0-9][0-9]' \
# | sed -e "s#^#$(basename "${ACCESS_LOG}" .log): #"
done <<<"${APACHE_CONFIGS}" >"${LOG_EXCERPT}"
{
echo "$(wc -l <"${LOG_EXCERPT}") errors total."
for PATTERN in "${IGNORE_PATTERNS[@]}"; do
COUNT="$(grep --extended-regexp --count "${PATTERN}" "${LOG_EXCERPT}")"
if [ "${COUNT}" == 0 ]; then
continue
fi
echo "Ignored: $(printf '%4d' "${COUNT}") × #${PATTERN}#"
done
# Summary
#Array_to_lines "${IGNORE_PATTERNS[@]}" \
# | grep --extended-regexp --invert-match --file=- "${LOG_EXCERPT}" \
# | sed -n -e 's#^\(\S\+:\).* \(".\+" [0-9]\+\) .*$#\1 \2#p' | sort | uniq -cd
Array_to_lines "${IGNORE_PATTERNS[@]}" \
| grep --extended-regexp --invert-match --file=- "${LOG_EXCERPT}" \
| dd iflag=fullblock bs=1M count=2 2>/dev/null
} | Maybe_sendmail
rm "${LOG_EXCERPT}"
# Report PHP-FPM errors
nice dategrep --multiline \
--start "now truncate 24h add -17h35m" --end "06:25:00" /var/log/php*-fpm.log.[1] /var/log/php*-fpm.log \
| grep --fixed-strings --invert-match ' NOTICE: ' \
| sed -e 's#^#php-fpm.log: #' \
| Maybe_sendmail
exit 0