1. 删除十天以上的日志数据
find /data/zz/logs -mtime +10 | xargs rm
# find ./logs -mtime +10 | xargs rm
# rm ./logs/log_${last_10dt}
2. 获取进程pid,并kill
#/bin/bash
PID=$(ps -ef | grep algorithms_predict | grep -v grep | awk '{print $2}')
if [ -z $PID ]; then
echo "process provider not exist" >> ../logs/algorithms_predict.log
exit
else
echo "process id: $PID" >> ../logs/algorithms_predict.log
kill -9 ${PID}
echo "process algorithms_predict killed" >> ../logs/algorithms_predict.log
fi
或 ps -ef | grep algo | awk -F' ' '{print $2}' | xargs kill -9
3. 从元数据获取表中数据的条数
user_cnt=`hive -e "desc formatted table_name partition(pt='2021-07-01');" \
| grep "numRows" | grep -oE "[0-9]*"`
4. 计算两日期间隔
计算日期 date1 - date2 有多少天
function date_diff {
local date1=${1}
local date2=${2}
local dt1_time=$(date -d "$date1" +%s)
local dt2_time=$(date -d "$date2" +%s)
echo $[($dt1_time-$dt2_time)/86400]
}
5. 下载数据到本地
for trf in `hadoop fs -ls ${instance_path}/train/* | grep -o "part-.*.gz"`;
do
hadoop fs -get ${instance_path}/train/${trf} ${train_dir} &
done
wait
if_error_exit "Download train instance from ${instance_path}/train to ${train_dir}"
6. 检测文件是否存在
6.1 检测hdfs文件是否存在
检查HDFS文件是否存在,存在返回0,如果不存在,每10min检查一次,每1h报警一次,持续等待指定秒数(2h)后退出,返回-1。
function check_hdfs_exist {
local target_file=${1}
local beg=`date +%s`
local end=`date +%s`
local start=`date +%s`
local the_max_wait_time = 7200
while :
do
${HADOOP_HOME}/bin/hdfs dfs -test -e $target_file
if [ $? -eq 0 ];then
echo "---> $1 ready!"
return 0
fi
sleep 600s
end=`date +%s`
if ((end - beg > 3600));then
((beg=end))
log_warn "${BASH_SOURCE[*]}" "${LINENO}" "need hdfs: $target_file!"
fi
if ((end - start > $the_max_wait_time));then
log_error "${BASH_SOURCE[*]}" "${LINENO}" "need hdfs: $target_file! exit after waiting $the_max_wait_time seconds"
exit 1
fi
done
}
调用
#!/usr/bin/env bash
# source /etc/profile
source ./utils.sh
FILE_PATH="hdfs://wxlx/user/hive/warehouse/zz.db/table/pt=2021-06-08/hour=00"
check_hdfs_exist $FILE_PATH
6.2 等待hdfs文件生成并持续报警
检查HDFS文件是否存在,存在返回0,如果不存在,每10min(600秒)检查一次,2h(7200s)后,每半小时(1800)报警一次,持续等待直到文件生成。${1} 文件名称
function check_hdfs_hold {
local target_file=${1}
local current=`date +%s`
local beg=`date +%s`
local end=`date +%s`
while :
do
${HADOOP_HOME}/bin/hdfs dfs -test -e $target_file
# 检测目录
# ${HADOOP_HOME}/bin/hdfs dfs -test -d ${1}
if [ $? -eq 0 ];then
echo "----$target_file is ready!!!"
return 0
fi
sleep 600s
end=`date +%s`
if ((end - current > 7200));then
if ((end - beg > 1800));then
((beg=end))
log_warn "${BASH_SOURCE[*]}" "${LINENO}" "need hdfs: $target_file!"
fi
fi
done
}
调用
function check_hdfs_done {
check_hdfs_hold "${1}/done"
return $?
}
function check_hdfs_success {
check_hdfs_hold "${1}/_SUCCESS"
return $?
}
7. 判断命令是否执行成功及日志打印
7.1 判断命令是否执行成功
function if_error_exit {
if [ $? -eq 0 ];then
log_debug "${BASH_SOURCE[*]}" "${LINENO}" "Success: ${1}"
else
log_error "${BASH_SOURCE[*]}" "${LINENO}" "Failed: ${1}"
exit -1
fi
}
7.2 自定义打印日志信息
function log_message {
# 参数:log level + script caller stack + lineno + message
local log_level=${1}
local cs=${2}
local line_no=${3}
local message=${4}
local MSG=""
local callStackStr=""
for s in ${cs[*]}; do
callStackStr="${s}:${callStackStr}"
#if [ ${#callStackStr} -gt 0 ]; then # csr长度是否>0
# callStackStr="${s}:${callStackStr}"
#else
# callStackStr="${s}"
#fi
done
MSG="[`hostname`][`whoami`][`date '+%Y-%m-%d %H:%M:%S'`][${log_level}][${callStackStr}:${line_no}] ${message}"
if [[ "${log_level}" == "WARN" || "${log_level}" == "ERROR" ]];then
echo "ERROR: $MSG"
curl -H 'host:portal.ad.wkanx.com' -d 's={"type":"raw","data":"xxxxx"}' "http://10.2.52.9/alert?a=at&c=6000000006&m=`echo ${MSG} | sed 's/ /+/g'`" --globoff
elif [[ "${log_level}" == "INFO" ]];then
echo "INFO: $MSG"
curl -H 'host:portal.ad.wkanx.com' -d 's={"type":"raw","data":"xxxxx"}' "http://10.2.52.9/alert?a=at&c=6000000003&m=`echo ${MSG} | sed 's/ /+/g'`" --globoff
fi
}
调用
# 为每个级别的通知设置不同的方式
function log {
local log_level=$1
local call_stack=$2
local lineno=$3
local msg=$4
log_message "$log_level" "$call_stack" "$lineno" "$msg"
}
8. 计算指定时间前的N个小时的所有时间点
计算从 "$curr_dt $curr_hr" 向前推算 "$past_hours - 1" 个小时,中间经历的日期和小时,如果某一天24个小时都取到,则用 "*" 表示该日期对应的小时。 例如:"2018-09-17 14" 向前推算 "42" 个小时,则有结果。
# 2018-09-15,21
# 2018-09-15,22
# 2018-09-15,23
# 2018-09-16,*
# 2018-09-17,00
# 2018-09-17,01
# 2018-09-17,02
# 2018-09-17,03
# 2018-09-17,04
# 2018-09-17,05
# 2018-09-17,06
# 2018-09-17,07
# 2018-09-17,08
# 2018-09-17,09
# 2018-09-17,10
# 2018-09-17,11
# 2018-09-17,12
# 2018-09-17,13
# 2018-09-17,14
function select_dt_hr {
local curr_dt=$1
local curr_hr=$2
local past_hours=`expr $3 - 1`
if (($past_hours < 0)); then
exit -1
fi
local first_datetime=`date -d "$curr_dt $curr_hr -$past_hours hours" '+%Y-%m-%d %H:%M:%S'`
local first_date=${first_datetime:0:10} # 第一个小时对应的日期
local first_hour=${first_datetime:11:2} # 第一个小时对应的小时
local total_days_diff=`date_diff "$curr_dt" "$first_date"`
# 去掉第一天和最后一天,计算中间天需要在当前时间上减多少days
local days=`seq $[total_days_diff-1] -1 1`
if [ z"$curr_dt" = z"$first_date" ]; then
# 时间未跨天
for hour in `seq $first_hour 1 $curr_hr`; do
printf "%s,%02d\n" $curr_dt $hour
done
else
# 时间有跨天
# 第一天
if [ z"$first_hour" = z"00" ]; then
# 完整一天
printf "%s,*\n" $first_date
else
# 不完整
for hour in `seq $first_hour 1 23`; do
printf "%s,%02d\n" $first_date $hour
done
fi
# 中间完整的天
for day in $days; do
printf "%s,*\n" `date -d "$curr_dt $curr_hr -$day days" '+%Y-%m-%d'`
done
# 最后一天
for hour in `seq 0 1 $curr_hr`; do
printf "%s,%02d\n" $curr_dt $hour
done
fi
}
11