#!/bin/sh
check_daemons=$1
log_dir=/var/log/hadoop
warn_file=/tmp/warnings.txt
emails="{email to send errors}"
rm -f /tmp/warnings.txt
for log_file in namenode jobtracker secondarynamenode datanode tasktracker
do
echo "$check_daemons" | grep -qw "$log_file"
if [[ $? -eq 0 ]] ; then
echo -e "\n --------------------- $log_file errors and warning ------------------ \n" >> $warn_file
cat $log_dir/hadoop-hadoop-$log_file-`hostname`.log.`date -d "1 day ago" +%Y-%m-%d`
| egrep -v 'Checkpoint done' | egrep -A 10 'WARN|ERROR' >> $warn_file
fi
done
error_line_count=`cat /tmp/warnings.txt | egrep -v "datanode errors and warning" |
egrep -v "tasktracker errors and warning" | egrep -v "jobtracker errors and warning"
| egrep -v "namenode errors and warning" | egrep -v "secondarynamenode errors and warning" | egrep -v "^$" | wc -l`
if [ $error_line_count != 0 ] ; then
mail -s "`hostname` node errors and warnings" $emails < $warn_file
fi
The script can be cronned and it checks the logs from yesterday. You can easily modify the script for standalone runs for any specific dates
Usage:
For a node running namenode, jobtracker run script as
sh get_errors.sh namenode,jobtracker
For a node running datanode, tasktracker run script as
sh get_errors.sh datanode,tasktracker