Skip to content

Split logs by message python script

andrzejekandrzejek Polska
edited July 2019 in New Engines/Platforms
This simple script will split your log into separate files, create folder "output" and read from "input.txt":

from typing import List


LINES_REGISTRATION = [
'registration failed',
'unknown registration status',
'registration successful'
]
LINES = [
'download failed',
]
LINES_CAPTCHA = [
'ReCaptcha v2',
]


ALL_LINES = \
LINES_REGISTRATION + LINES + LINES_CAPTCHA


def get_file_map(lines: List[str]) -> List[dict]:
return [{
'file': open(f"./output/{x}.txt", encoding='utf-8', mode='a+'),
'file_urls': open(f"./output/{x}_urls.txt", encoding='utf-8', mode='a+'),
'line': x,
} for x in lines]


if __name__ == '__main__':
file_map = get_file_map(ALL_LINES)

with open("./input.txt") as file:
for line in file:
for d in file_map:
if d['line'] in line:
d['file'].write(line)
log_split = line.split(' ')
if 'http' in log_split[-1]:
d['file_urls'].write(log_split[-1]) # It would be cool to modify SER with python

Comments

  • import os
    from typing import List


    OUTPUT = "./output/"
    INPUT = "./input.txt"


    LINES_REGISTRATION = [
    'registration failed',
    'unknown registration status',
    'registration successful'
    ]
    LINES = [
    'download failed',
    ]
    LINES_CAPTCHA = [
    'ReCaptcha v2',
    ]


    ALL_LINES = \
    LINES_REGISTRATION + LINES + LINES_CAPTCHA


    def delete_existing_output():
    for _, _, files in os.walk(OUTPUT, topdown=False):
    for f in files:
    if f.endswith(".txt"):
    os.remove(OUTPUT + f)


    def get_file_map(lines: List[str]) -> List[dict]:
    return [{
    'file': open(f"{OUTPUT}{x}.txt", encoding='utf-8', mode='a+'),
    'file_urls': open(f"{OUTPUT}{x}_urls.txt", encoding='utf-8', mode='a+'),
    'line': x,
    } for x in lines]


    def match_log_line(line: str, file_map: List[dict]):
    for d in file_map:
    if d['line'] in line:
    d['file'].write(line)
    log_split = line.split(' ')
    if 'http' in log_split[-1]:
    d['file_urls'].write(log_split[-1])
    return


    def split_log_file(file_map: List[dict]):
    with open(INPUT) as file:
    for line in file:
    match_log_line(line, file_map)

    # import re
    # def get_gsa_log_messages():
    # messages = {}
    # regx = r"\d+[/]\d+.*"
    # fout = open("./gsa_log_msg.txt", encoding="utf-8", mode="a+")
    #
    # with open(INPUT) as file:
    # for line in file:
    # log_regx = re.findall(regx, line)
    # if log_regx:
    # gsa_log = []
    # msg = log_regx[0]
    # log_split: List[str] = msg.split(" ")
    # for l in log_split[1:]:
    # if not l.startswith("("):
    # gsa_log.append(l)
    # else:
    # msg = " ".join(gsa_log)
    # messages.setdefault(msg, 0)
    # messages[msg] += 1
    # print(messages)


    def split_log():
    delete_existing_output()
    f_map = get_file_map(ALL_LINES)
    split_log_file(f_map)


    if __name__ == '__main__':
    split_log()
Sign In or Register to comment.