线上常用shell脚本分享
一.邮件发送
服务器经常可能需要发送邮件,就可以使用mailx工具,通过163、qq等邮件代理发送。
安装
# yum install mailx
设置配置文件
# vi /etc/mail.rc set from=qtorm1@163.com smtp=smtp.163.com set smtp-auth-user=qtorm1@163.com smtp-auth-password=GGAWP***** set smtp-auth=login
可在163邮箱中找到对应SMTP服务器地址,并设置授权码
二.磁盘监控告警
原理:通关一台控制主机ssh到被监控主机通过df -h查看计算磁盘大小,超过阈值则告警。
首先需要控制主机能免密ssh到被监控主机上,通过ssh-keygen配置
#!/bin/bash #cat host.info #192.168.1.102 root 22 #192.168.1.103 root 22 HOST_INFO=host.info #记录ip地址 for IP in $(awk '/^[^#]/{print $1}' $HOST_INFO); do #除开头#号的所有行第一列 USER=$(awk -v ip=$IP 'ip==$1{print $2}' $HOST_INFO) PORT=$(awk -v ip=$IP 'ip==$1{print $3}' $HOST_INFO) TMP_FILE=/tmp/disk.tmp ssh -p $PORT $USER@$IP 'df -h' > $TMP_FILE USE_RATE_LIST=$(awk 'BEGIN{OFS="="}/^\/dev/{print $NF,int($5)}' $TMP_FILE) for USE_RATE in $USE_RATE_LIST; do PART_NAME=${USE_RATE%=*} USE_RATE=${USE_RATE#*=} if [ $USE_RATE -ge 80 ]; then echo "Warning: $PART_NAME Partition usage $USE_RATE%!" fi done done
三.网卡流量检测
原理:网卡流量可以在系统/proc/net/dev查看
NIC=$1 echo -e " In ------ Out" while true; do OLD_IN=$(awk '$0~"'$NIC'"{print $2}' /proc/net/dev) #全文匹配($0~)含有$NIC的行,并输出此行的第二列 OLD_OUT=$(awk '$0~"'$NIC'"{print $10}' /proc/net/dev) sleep 1 NEW_IN=$(awk '$0~"'$NIC'"{print $2}' /proc/net/dev) NEW_OUT=$(awk '$0~"'$NIC'"{print $10}' /proc/net/dev) IN=$(printf "%.1f%s" "$((($NEW_IN-$OLD_IN)/1024))" "KB/s") #%.1f代表保留一位浮点数,%s表示占位符 OUT=$(printf "%.1f%s" "$((($NEW_OUT-$OLD_OUT)/1024))" "KB/s") echo "$IN $OUT" sleep 1 done
效果:一秒一次检测某网卡的入和出流量
四.系统初始化优化
常用系统初始化优化如下,可根据自身需求操作
#/bin/bash #服务器时间同步 ln -s /usr/share/zoneinfo/Asia/Shanghai /etc/localtime if ! crontab -l |grep ntpdate &>/dev/null ; then (echo "* 1 * * * ntpdate time.windows.com >/dev/null 2>&1";crontab -l) |crontab fi # 禁用selinux sed -i '/SELINUX/{s/permissive/disabled/}' /etc/selinux/config # 关闭防火墙 if egrep "7.[0-9]" /etc/redhat-release &>/dev/null; then systemctl stop firewalld systemctl disable firewalld elif egrep "6.[0-9]" /etc/redhat-release &>/dev/null; then service iptables stop chkconfig iptables off fi # 历史命令显示操作时间 if ! grep HISTTIMEFORMAT /etc/bashrc; then echo 'export HISTTIMEFORMAT="%F %T `whoami` "' >> /etc/bashrc fi # SSH超时时间设置 if ! grep "TMOUT=600" /etc/profile &>/dev/null; then echo "export TMOUT=600" >> /etc/profile fi # 禁止root远程登录 sed -i 's/#PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config # 禁止定时任务向发送邮件 sed -i 's/^MAILTO=root/MAILTO=""/' /etc/crontab # 设置最大打开文件数 if ! grep "* soft nofile 65535" /etc/security/limits.conf &>/dev/null; then cat >> /etc/security/limits.conf << EOF * soft nofile 65535 * hard nofile 65535 EOF fi # 系统内核优化 cat >> /etc/sysctl.conf << EOF net.ipv4.tcp_syncookies = 1 net.ipv4.tcp_max_tw_buckets = 20480 net.ipv4.tcp_max_syn_backlog = 20480 net.core.netdev_max_backlog = 262144 net.ipv4.tcp_fin_timeout = 20 EOF # 减少SWAP使用 (权重0-100,值越大使用越多) echo "0" > /proc/sys/vm/swappiness # 安装系统性能分析工具及其他 yum install gcc make autoconf vim sysstat net-tools iostat iftop iotp lrzsz -y
五.批量创建用户
运行:./脚本名.sh 用户名1 用户名2 …
#!/bin/bash USER_LIST=$@ USER_FILE=user.txt for USER in $USER_LIST; do if ! id $USER &>/dev/null; then PASS=$(echo $RANDOM |md5sum |cut -c 1-8) useradd $USER echo $PASS |passwd --stdin $USER &>/dev/null echo "$USER $PASS" >> $USER_FILE echo "$USER User create successful." else echo "$USER User already exists!" fi done
六.找cpu内存高的进程
ps -eo可以定义进程项,–sort=是把某一项排序,head显示数量
ps -eo user,pid,pcpu,pmem,args --sort=-pcpu |head -n 10 ps -eo user,pid,pcpu,pmem,args --sort=-pmem |head -n 10
七.查看服务器利用率
原理:cpu是通过vmstat命令收集的数据,memory是通过free -m命令收集,磁盘是通过df命令,tcp是通过ss命令。
#!/bin/bash function cpu() { NUM=1 while [ $NUM -le 3 ]; do util=`vmstat |awk '{if(NR==3)print 100-$15"%"}'` user=`vmstat |awk '{if(NR==3)print $13"%"}'` sys=`vmstat |awk '{if(NR==3)print $14"%"}'` iowait=`vmstat |awk '{if(NR==3)print $16"%"}'` echo "CPU - 使用率: $util , 等待磁盘IO响应使用率: $iowait" let NUM++ sleep 1 done } function memory() { total=`free -m |awk '{if(NR==2)printf "%.1f",$2/1024}'` used=`free -m |awk '{if(NR==2) printf "%.1f",($2-$NF)/1024}'` available=`free -m |awk '{if(NR==2) printf "%.1f",$NF/1024}'` echo "内存 - 总大小: ${total}G , 使用: ${used}G , 剩余: ${available}G" } function disk() { fs=$(df -h |awk '/^\/dev/{print $1}') for p in $fs; do mounted=$(df -h |awk '$1=="'$p'"{print $NF}') size=$(df -h |awk '$1=="'$p'"{print $2}') used=$(df -h |awk '$1=="'$p'"{print $3}') used_percent=$(df -h |awk '$1=="'$p'"{print $5}') echo "硬盘 - 挂载点: $mounted , 总大小: $size , 使用: $used , 使用率: $used_percent" done } function tcp_status() { summary=$(ss -antp |awk '{status[$1]++}END{for(i in status) printf i":"status[i]" "}') echo "TCP连接状态 - $summary" } cpu memory disk tcp_status
八.批量检测网站异常
原理:通过curl检测网站,再通过返回的状态码确认是否异常,设定2和3开头的为访问正常。
通过for语句循环判断,超过3次不是上述状态码则可以触发发送邮件等操作。
#!/bin/bash URL_LIST="www.baidu.com www.qq.com" for URL in $URL_LIST; do #遍历列表网站 FAIL_COUNT=0 for ((i=1;i<=3;i++)); do HTTP_CODE=$(curl -o /dev/null --connect-timeout 3 -s -w "%{http_code}" $URL) #输出重定向/dev/null,-w只保留状态码,-s静默模式,超时时间3秒 if [[ $HTTP_CODE = 2?? ]] || [[ $HTTP_CODE = 3?? ]]; then #检测状态码2或者3开头的为正常 echo "$URL OK" break else echo "$URL retry $FAIL_COUNT" let FAIL_COUNT++ fi done if [ $FAIL_COUNT -eq 3 ]; then #表示3次访问失败 echo "Warning: $URL Access failure!" fi done
九.批量主机执行命令
市场上有很多像abible类似的工具,但是这是通过shell的方式ssh到被控制主机执行命令,再通过expect进行交互,有些场景可能会用得到。
# cat host.info
192.168.0.102 root 22 password1
192.168.0.103 root 22 password1
#!/bin/bash COMMAND=$* HOST_INFO=host.info for IP in $(awk '/^[^#]/{print $1}' $HOST_INFO); do USER=$(awk -v ip=$IP 'ip==$1{print $2}' $HOST_INFO) PORT=$(awk -v ip=$IP 'ip==$1{print $3}' $HOST_INFO) PASS=$(awk -v ip=$IP 'ip==$1{print $4}' $HOST_INFO) expect -c " spawn ssh -p $PORT $USER@$IP expect { \"(yes/no)\" {send \"yes\r\"; exp_continue} \"password:\" {send \"$PASS\r\"; exp_continue} \"$USER@*\" {send \"$COMMAND\r exit\r\"; exp_continue} } " echo "-------------------" done
十.一键部署LNMP
正常选编编译安装nginx、php,可以按需设置安装版本
#!/bin/bash #按需设置版本 NGINX_V=1.15.6 PHP_V=5.6.36 TMP_DIR=/tmp INSTALL_DIR=/usr/local PWD_C=$PWD #当前路径变量 echo echo -e "\tMenu\n" echo -e "1. Install Nginx" echo -e "2. Install PHP" echo -e "3. Install MySQL" echo -e "4. Deploy LNMP" echo -e "9. Quit" function command_status_check() { if [ $? -ne 0 ]; then echo $1 exit fi } function install_nginx() { cd $TMP_DIR yum install -y gcc gcc-c++ make openssl-devel pcre-devel wget wget http://nginx.org/download/nginx-${NGINX_V}.tar.gz tar zxf nginx-${NGINX_V}.tar.gz cd nginx-${NGINX_V} ./configure --prefix=$INSTALL_DIR/nginx \ --with-http_ssl_module \ --with-http_stub_status_module \ --with-stream command_status_check "Nginx - 平台环境检查失败!" make -j 4 command_status_check "Nginx - 编译失败!" make install command_status_check "Nginx - 安装失败!" mkdir -p $INSTALL_DIR/nginx/conf/vhost alias cp=cp ; cp -rf $PWD_C/nginx.conf $INSTALL_DIR/nginx/conf rm -rf $INSTALL_DIR/nginx/html/* echo "ok" > $INSTALL_DIR/nginx/html/status.html echo '' > $INSTALL_DIR/nginx/html/status.php $INSTALL_DIR/nginx/sbin/nginx command_status_check "Nginx - 启动失败!" } function install_php() { cd $TMP_DIR yum install -y gcc gcc-c++ make gd-devel libxml2-devel \ libcurl-devel libjpeg-devel libpng-devel openssl-devel \ libmcrypt-devel libxslt-devel libtidy-devel wget http://docs.php.net/distributions/php-${PHP_V}.tar.gz tar zxf php-${PHP_V}.tar.gz cd php-${PHP_V} ./configure --prefix=$INSTALL_DIR/php \ --with-config-file-path=$INSTALL_DIR/php/etc \ --enable-fpm --enable-opcache \ --with-mysql --with-mysqli --with-pdo-mysql \ --with-openssl --with-zlib --with-curl --with-gd \ --with-jpeg-dir --with-png-dir --with-freetype-dir \ --enable-mbstring --enable-hash command_status_check "PHP - 平台环境检查失败!" make -j 4 command_status_check "PHP - 编译失败!" make install command_status_check "PHP - 安装失败!" cp php.ini-production $INSTALL_DIR/php/etc/php.ini cp sapi/fpm/php-fpm.conf $INSTALL_DIR/php/etc/php-fpm.conf cp sapi/fpm/init.d.php-fpm /etc/init.d/php-fpm chmod +x /etc/init.d/php-fpm /etc/init.d/php-fpm start command_status_check "PHP - 启动失败!" } read -p "请输入编号:" number case $number in 1) install_nginx;; 2) install_php;; 3) install_mysql;; 4) install_nginx install_php ;; 9) exit;; esac
十一.监控mysql主从同步是否异常
原理:主库写入数据到binlog中,从库会在readylog复制执行到从库中,可以在数据库show slave status;通过监听Slave_IO_Running和Slave_SQL_Running两个状态(Yes)来判断是否异常。
#!/bin/bash HOST=localhost USER=root PASSWD=123.com IO_SQL_STATUS=$(mysql -h$HOST -u$USER -p$PASSWD -e 'show slave status\G' 2>/dev/null |awk '/Slave_.*_Running:/{print $1$2}') #加上2>/dev/null可以去掉warning提示,再通过awk筛选Slave_IO_Running和Slave_SQL_Running两个值 for i in $IO_SQL_STATUS; do #两个值遍历 THREAD_STATUS_NAME=${i%:*} #去掉:后面的得到名称 THREAD_STATUS=${i#*:} #去掉:前面的得到状态值 if [ "$THREAD_STATUS" != "Yes" ]; then #判断值不等于yes则触发告警发送邮件 echo "Error: MySQL Master-Slave $THREAD_STATUS_NAME status is $THREAD_STATUS!" |mail -s "Master-Slave Staus" xxx@163.com fi done
十二.mysql分库分表备份
备份命令:
分库:mysqldump -uroot -pxxxx -B A库 > A.sql
分表:mysqldump -uroot -pxxxx A库 table > t.sql
分库备份
#!/bin/bash DATE=$(date +%F_%H-%M-%S) HOST=localhost USER=backup PASS=123.com BACKUP_DIR=/data/db_backup DB_LIST=$(mysql -h$HOST -u$USER -p$PASS -s -e "show databases;" 2>/dev/null |egrep -v "Database|information_schema|mysql|performance_schema|sys") #egrep去除不要的表,也可以直接指定需要备份的表 for DB in $DB_LIST; do #遍历备份的库 BACKUP_NAME=$BACKUP_DIR/${DB}_${DATE}.sql if ! mysqldump -h$HOST -u$USER -p$PASS -B $DB > $BACKUP_NAME 2>/dev/null; then echo "$BACKUP_NAME 备份失败!" fi done
分表备份
#!/bin/bash DATE=$(date +%F_%H-%M-%S) HOST=localhost USER=backup PASS=123.com BACKUP_DIR=/data/db_backup DB_LIST=$(mysql -h$HOST -u$USER -p$PASS -s -e "show databases;" 2>/dev/null |egrep -v "Database|information_schema|mysql|performance_schema|sys") for DB in $DB_LIST; do BACKUP_DB_DIR=$BACKUP_DIR/${DB}_${DATE} [ ! -d $BACKUP_DB_DIR ] && mkdir -p $BACKUP_DB_DIR &>/dev/null TABLE_LIST=$(mysql -h$HOST -u$USER -p$PASS -s -e "use $DB;show tables;" 2>/dev/null) #得出要备份的库中所有的表 for TABLE in $TABLE_LIST; do BACKUP_NAME=$BACKUP_DB_DIR/${TABLE}.sql if ! mysqldump -h$HOST -u$USER -p$PASS $DB $TABLE > $BACKUP_NAME 2>/dev/null; then echo "$BACKUP_NAME 备份失败!" fi done done
十三.切割nginx日志
原理:备份旧的日志文件,再重新生成pid即可生成新的日志文件
#!/bin/bash LOG_DIR=/www/wwwlogs YESTERDAY_TIME=$(date -d "yesterday" +%F) LOG_MONTH_DIR=$LOG_DIR/$(date +"%Y-%m") LOG_FILE_LIST="www.yj-example.cn.log" PID="/www/server/nginx/logs/nginx.pid" for LOG_FILE in $LOG_FILE_LIST; do [ ! -d $LOG_MONTH_DIR ] && mkdir -p $LOG_MONTH_DIR tar -zcPf $LOG_DIR/${LOG_FILE}.tar.gz $LOG_DIR/$LOG_FILE mv $LOG_DIR/${LOG_FILE}.tar.gz $LOG_MONTH_DIR/${LOG_FILE}_${YESTERDAY_TIME}.tar.gz done kill -USR1 $(cat $PID)
十四.DOS攻击防范
原理:通过检测nginx访问日志,当某个IP一分钟访问大于10次时,使用防火墙屏蔽此IP
#!/bin/bash DATE=$(date +%d/%b/%Y:%H:%M) LOG_FILE=/usr/local/nginx/logs/demo2.access.log ABNORMAL_IP=$(tail -n5000 $LOG_FILE |grep $DATE |awk '{a[$1]++}END{for(i in a)if(a[i]>10)print i}') for IP in $ABNORMAL_IP; do if [ $(iptables -vnL |grep -c "$IP") -eq 0 ]; then iptables -I INPUT -s $IP -j DROP echo "$(date +'%F_%T') $IP" >> /tmp/drop_ip.log fi done
十五.文件更新监控
原理:通过inotifywait命令监控,有创建、更新、修改文件即发送邮件。
#!/bin/bash MON_DIR=/opt inotifywait -mqr --format %f -e create $MON_DIR |\ while read files; do rsync -avz /opt /tmp/opt #echo "$(date +'%F %T') create $files" | mail -s "dir monitor" xxx@163.com done