本文共 4755 字,大约阅读时间需要 15 分钟。
线上django服务偶尔会因为机器访问mysql过多,造成too many connections 问题,导致服务挂掉。
之前调大了最大连接数,有点治标不治本。
所以今天抽空写个监控mysql连接数的服务,如果连接数超过某个阈值,就杀掉一部分连接。
核心代码。
20191928更新:
之前直接time.sleep有点low,这回换了个Python定时任务的第三方框架——schedule,还挺好用的
参考:
更新后的核心代码如下:
# -*- coding: utf-8 -*-import getpassimport timeimport functoolsimport MySQLdbimport schedulefrom config import *from log_setup import set_logif getpass.getuser() == 'wangjinyu': from loc_config import *logger = set_log(__name__)def catch_exceptions(cancel_on_failure=False): def catch_exceptions_decorator(job_func): @functools.wraps(job_func) def wrapper(*args, **kwargs): try: return job_func(*args, **kwargs) except: import traceback logger.info(traceback.format_exc()) if cancel_on_failure: return schedule.CancelJob return wrapper return catch_exceptions_decorator@catch_exceptions()def handle_process_num(): conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD) cur = conn.cursor() cur.execute('show full processlist') process_list = list(cur.fetchall()) cur_process_num = len(process_list) cur.close() if cur_process_num > MAX_CON_NUM: logger.info('current process num: %d' % cur_process_num) logger.info('start kill process') try: kill_process(process_list, cur_process_num) except Exception: logger.exception('process error ocurr')def kill_process(process_list, cur_process_num): killed_num = 0 # order by connected time process_list.sort(key=lambda x: x[5], reverse=True) conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD) cur = conn.cursor() # try to kill 1/3 of processes for i in range(cur_process_num // 2): command = process_list[i][4] time = int(process_list[i][5]) # if command is Sleep and Sleep time over 600s then kill process if command == 'Sleep' and time > MAX_SLEEP_TIME: try: cur.execute('kill %s' % process_list[i][0]) except Exception: logger.exception('kill process %d fail' % process_list[i][0]) continue killed_num += 1 cur.execute('show full processlist') post_process_num = len(cur.fetchall()) cur.close() logger.info('this time kill %d process' % killed_num) logger.info('remain process num:%d' % post_process_num)if __name__ == '__main__': # run job every 30 minutes schedule.every(INTERVAL_TIME).seconds.do(handle_process_num) logger.info('mysql monitor service startup success') while True: schedule.run_pending() time.sleep(1)
# -*- coding: utf-8 -*-import timeimport MySQLdbimport getpassfrom config import *from log_setup import set_loglogger = set_log(__name__)def hanlde_process_num(): conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD) cur = conn.cursor() cur.execute('show full processlist') process_list = list(cur.fetchall()) cur_process_num = len(process_list) cur.close() if cur_process_num > MAX_CON_NUM: logger.info('current process num: %d' % cur_process_num) logger.info('start kill process') try: kill_process(process_list) except Exception: logger.exception('process error ocurr')def kill_process(process_list): killed_num = 0 #order by connected time process_list.sort(key=lambda x:x[5],reverse=True) conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD) cur = conn.cursor() #try to kill 1/3 of processes for i in range(MAX_CON_NUM // 3): Command = process_list[i][4] Time = int(process_list[i][5]) #if command is Sleep and Sleep time over 600s then kill process if Command == 'Sleep' and Time > MAX_SLEEP_TIME: try: cur.execute('kill %s' % process_list[i][0]) except Exception: logger.exception('kill process %d fail'%process_list[i][0]) continue killed_num += 1 cur.execute('show full processlist') post_process_num = len(cur.fetchall()) cur.close() logger.info('this time kill %d process' % killed_num) logger.info('remain process num:%d' % post_process_num)def sever(): logger.info('mysql monitor startup success') while True: try: hanlde_process_num() except Exception: logger.exception("hanlde process num error") time.sleep(10) continue #check mysql connect num every an hour time.sleep(3600)if __name__ == '__main__': sever()
转载地址:http://rkxws.baihongyu.com/