I have a read log file function to get select operation from database log file like this:
def getSelectMySql(log):
with open(log,'r', encoding='utf-8', errors='ignore') as data:
lines = []
for baris in data:
bariss = baris.rstrip()
newBaris = re.sub(r'\t|\n|\r|\s{2,}',' ', bariss)
lines.append(newBaris)
result = []
buffer = []
success = False
for line in lines:
befSelect = re.compile(r'^.+?(?=SELECT)')
date = re.search(r"\b(\d{6})(?=\s\d{1,}:\d{2}:\d{2})\b", line)# (\d{1,}:\d{2}:\d{2})
select = re.search(r'\b(SELECT)\b',line)
parentheses = re.compile('[(){}]')
if date:
dat = datetime.datetime.strptime(date.group(), '%y%m%d').strftime('%Y-%m-%d')
if buffer:
result.append(tuple(buffer))
buffer.clear()
buffer.append(dat)
if line.endswith("important") or line.endswith("'%general_log%'") or line.endswith("puro"):
success = True if line.endswith("important") else False
else:
if success:
if select and not line.endswith("SELECT"):
line = re.sub(befSelect,'',line)
line = re.sub(parentheses,'',line)
buffer.append(line)
result.append(tuple(buffer))
print('Done\n')
return(result)
from the data, this function will one save select line after important word. the example of file, like this:
190413 7:55:31 32168376 Query SHOW variables like '%general_log%'
32168491 Connect puro@17#.##.#.## on puro
32168491 Query SELECT * FROM `file` WHERE `identifier` = 'ca28a3b30f893899556749679f8d3066' LIMIT 1
32168491 Quit
32168492 Connect [email protected]#.#.# on important
32168492 Query SET NAMES 'utf8'
32168492 Query SHOW FULL COLUMNS FROM `sys_user`
32168492 Query SELECT
kcu.constraint_name,
kcu.column_name,
kcu.referenced_table_name,
kcu.referenced_column_name
FROM information_schema.referential_constraints AS rc
JOIN information_schema.key_column_usage AS kcu ON
(
kcu.constraint_catalog = rc.constraint_catalog OR
(kcu.constraint_catalog IS NULL AND rc.constraint_catalog IS NULL)
) AND
kcu.constraint_schema = rc.constraint_schema AND
kcu.constraint_name = rc.constraint_name
WHERE rc.constraint_schema = database() AND kcu.table_schema = database()
AND rc.table_name = 'sys_user' AND kcu.table_name = 'sysx_user'
32168492 Query SELECT * FROM `lecturer_syllabus` WHERE ((`lec_id`='588') AND (`ta`='2016') AND (`sem_ta`='2')) AND (deleted !=1)
32168492 Query SHOW FULL COLUMNS FROM `lect_year_syllabus`
The output will be like:
[['190413', '7:55:31', SELECT * FROM `lecturer_syllabus` WHERE ((`lec_id`='588') AND (`ta`='2016') AND (`sem_ta`='2')) AND (deleted !=1)]]
But as this is my first try, I need an opinion about what I've tried, because my code runs slow with larger file.