From 8f6e989ee654f2c767771f6b449611f73e6c5fd0 Mon Sep 17 00:00:00 2001 From: xamgore <xamgore@ya.ru> Date: Sun, 28 Jun 2015 20:22:57 +0300 Subject: [PATCH] Logs analyser (by Denis) --- .gitignore | 2 +- ~dev_rating/application/logs/analyse.py | 143 ++++++++++++++++++++++++ ~dev_rating/application/logs/readme.md | 5 + 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 ~dev_rating/application/logs/analyse.py create mode 100644 ~dev_rating/application/logs/readme.md diff --git a/.gitignore b/.gitignore index dfdfc29fa..525bfb1d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ ~dev_rating/application/cache/ -~dev_rating/application/logs/ +~dev_rating/application/logs/*/ ~dev_rating/application/config/ nbproject/ *.*~ diff --git a/~dev_rating/application/logs/analyse.py b/~dev_rating/application/logs/analyse.py new file mode 100644 index 000000000..e9b715396 --- /dev/null +++ b/~dev_rating/application/logs/analyse.py @@ -0,0 +1,143 @@ +#! python +__author__ = "Denis M." + + +import sys +import getopt +import re +import os.path +import argparse + + +DEFAULT_LINES_TO_SHOW = 5 + + +def is_info_line(line): + """Checks if the line is info line. + + :type line: str + :param line: Some line from the log file + :rtype: bool + """ + re_info_line = re.compile('^.*--- INFO: ([0-9]+(\.[0-9]+)?).*\|(([^:]*): (.*)|(.*))in (/.*|file)+:(.*)$') + return re_info_line.match(line) is not None + + +def iter_action_info(info_lines): + """ + :type info_lines: str + :param info_lines: Such string that is_info_line(info_line) is True + :rtype: tuple + :returns: tuple which has + - str type of action + - str message + - float duration of action + """ + for info_line in info_lines: + re_info_line = re.compile('^.*--- INFO: ([0-9]+(\.[0-9]+)?).*\|(([^:]*): (.*)|(.*))in (/.*|file)+:(.*)$') + match = re_info_line.match(info_line) + duration = float(str(match.group(1))) + if match.group(4) is None: + act_type = 'CONTROLLER' + message = str(match.group(6)).strip() + else: + act_type = str(match.group(4)).strip().upper() + message = str(match.group(5)).strip() + location = str(match.group(7)) + line = str(match.group(5)) + yield duration, act_type, message, location, line + + +def iter_lines(files): + """ + :type files: list + :param files: list of files to retrieve lines from + :return: generator for lines from all specified files + """ + for file_path in files: + if not os.path.isfile(file_path): + raise IOError() + + def gen_lines(): + for file_name in files: + for line in open(file_name).read().splitlines(): + yield line + return gen_lines() + + +def print_usage(): + print('usage: {0} [-c N] [-t message_type] [-v] log-1.php log-2.php ...\n' + '-c N To show N the longest operations. Mnemonic: count.\n' + '-t message_type To show only operations of message_type type.\n' + ' (e. g. URI, DB, Controller)\n' + '-v To show additional information.' + .format(sys.argv[0].split('/')[-1])) + + +def main(): + # collect information from script arguments + lines_to_show = DEFAULT_LINES_TO_SHOW + type_to_show = None + is_verbose = False + opt, args = getopt.getopt(sys.argv[1:], 'c:t:v', ['help']) + log_files = args # files to analyse + if len(log_files) == 0: + print('No files specified.') + print_usage() + return -1 + try: + try: + lines = filter(is_info_line, iter_lines(log_files)) # lines with '--- INFO:' + except IOError as err: + print('File not found: {0}.'.format(err.filename)) + return -1 + data = {} + ''' + structure of data: + { + act_type: + { + (message, location, line): [duration1, duration2...] + } + act_type2: + { + ... + } + } + ''' + for duration, act_type, message, location, line in iter_action_info(lines): + if type_to_show is not None and act_type != type_to_show: # if type_to_show specified then skip other types + continue + if act_type not in data: + data[act_type] = {} # prepare nested dictionary + msg_loc_ln = (message, location, line) + if msg_loc_ln not in data[act_type]: + data[act_type][msg_loc_ln] = [] # prepare nested list to append + data[act_type][msg_loc_ln].append(duration) + + for act_type in data.keys(): + print('*' * 20 + ' ' + act_type + ' ' + '*' * 20) + print('') + for msg_loc_ln, durations in sorted(data[act_type].items(), + reverse=True, + key=lambda x: sum(x[1]) / len(x[1]))[:lines_to_show]: + average_duration = sum(durations) / len(durations) + min_duration = min(durations) + max_duration = max(durations) + print(msg_loc_ln[0]) + if is_verbose: + print(msg_loc_ln[1]) + print(msg_loc_ln[2]) + print('average: {0}ms'.format(int(1000 * average_duration))) + if is_verbose: + print('min: {0}ms'.format(int(1000 * min_duration))) + print('max: {0}ms'.format(int(1000 * max_duration))) + print('') + return 0 + except IOError as err: + print('Error while reading: {0}.'.format(err.filename)) + return -1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/~dev_rating/application/logs/readme.md b/~dev_rating/application/logs/readme.md new file mode 100644 index 000000000..3f1e68f0a --- /dev/null +++ b/~dev_rating/application/logs/readme.md @@ -0,0 +1,5 @@ +Python 3.4, usage: + +``` +find . -type f | xargs python analyse.py -j | python -m json.tool > result.json +``` \ No newline at end of file -- GitLab