How to parse a file and populate a python dictionary with its content

Question

So I have the following file summary1:

---
Project: pgm1
Last-Status: success
summary:  102 passed, 88 warnings in 26.11s
---
Project: pgm2
Last-Status: failed
summary:  1 failed, 316 passed, 204 warnings in 42.94s
---
Project: pgm3
Last-Status: success
summary:  400 passed, 40 skipped, 1 xfailed in 3.17s
---

And I need to parse it's contents, and then in a loop, create a dictionary with pre-defined values:

entry = dict()
entry =  {
        "{#STATUS}": 0,
        "{#PASSED}": 0,
        "{#FAILED}": 0,
        "{#WARNING}": 0,
        "{#SKIPPED}": 0,
        "{#XFAILED}": 0
          }

And then populate the corresponding dictionary keys, with the parsed values from the file, resulting in something like this:

entry =  {
              "{#STATUS}": 1
              "{#DESCRIPTION}": "kytos/mef_eline",
              "{#PASSED}": 316,
              "{#FAILED}": 1,
              "{#WARNING}": 0,
              "{#SKIPPED}": 0,
              "{#XFAILED}": 0,
}... And so on for all 3 Project-Desc data sections in the file

However I have not been able to figure out how to the parsing of the file and assigning of the variables, and through my searches, I've found that regex would be a good tool for this but I've never used it before.

The best way is to use regex and then by iterating on every, for example, 'Project-Desc' findings - populate your list on entries — Victor Ermakov
– Victor Ermakov, Commented Apr 28, 2021 at 13:46

Ajax1234 · Accepted Answer · 2021-04-28 16:11:28Z

2

You can parse out the various report values from each entry in the file and form separate dictionaries with the result:

import re, itertools as it
hds = {'passed': '{#PASSED}', 'failed': '{#FAILED}', 'warnings': '{#WARNING}', 'skipped': '{#SKIPPED}', 'xfailed': '{#XFAILED}'}
with open('your_file.txt') as f:
   contents = [i.strip('\n') for i in f]

d = [list(b) for a, b in it.groupby(contents, key=lambda x:x == '---') if not a]
def get_dict(entry):
   _, [d, s, ps] = zip(*[i.split(': ') for i in entry])
   d1 = {i.split()[-1]:i.split()[0] for i in re.findall('\d+\s[a-z]+', ps)}
   return {"{#STATUS}":s, "{#DESCRIPTION}":d, **({b:d1.get(a, 0) for a, b in hds.items()})}

result = [get_dict(i) for i in d]

Output

[{'{#STATUS}': 'success', '{#DESCRIPTION}': 'kytos/mef_eline', '{#PASSED}': '102', '{#FAILED}': 0, '{#WARNING}': '88', '{#SKIPPED}': 0, '{#XFAILED}': 0}, {'{#STATUS}': 'failed', '{#DESCRIPTION}': 'kytos/kytos', '{#PASSED}': '316', '{#FAILED}': '1', '{#WARNING}': '204', '{#SKIPPED}': 0, '{#XFAILED}': 0}, {'{#STATUS}': 'success', '{#DESCRIPTION}': 'kytos/python-openflow', '{#PASSED}': '400', '{#FAILED}': 0, '{#WARNING}': 0, '{#SKIPPED}': '40', '{#XFAILED}': '1'}]

edited Apr 28, 2021 at 16:11

answered Apr 28, 2021 at 15:47

Ajax1234

71.7k9 gold badges67 silver badges110 bronze badges

Sign up to request clarification or add additional context in comments.

4 Comments

user15278135 Over a year ago

thanks that's a great help. will that code also work for a file with 15 entries?

Ajax1234 Over a year ago

@Daniela Yes indeed

user15278135 Over a year ago

I get the following error when executing the code: d1 = {(j:=i.split())[-1]:j[0] for i in re.findall('\d+\s[a-z]+', ps)} ^ SyntaxError: invalid syntax

Ajax1234 Over a year ago

@Daniela := is an assignment expression, available in Python versions >= 3.8. I just updated my solution to be compatible with versions < 3.8

DevScheffer · Accepted Answer · 2021-04-28 15:36:59Z

def break_text(lst_text):
    import re

    desc = re.findall(r": (.*)", lst_text[1])
    status = re.findall(r": (.*)", lst_text[2])
    summa = re.findall(r"\d+ \w+", lst_text[3])
    return desc, status, summa


def create_dict(lst):
    entry = {
        "{#Status}": lst[1],
        "{#DESCRIPTION}": lst[0],
        "{#PASSED}": 0,
        "{#FAILED}": 0,
        "{#WARNING}": 0,
        "{#SKIPPED}": 0,
        "{#XFAILED}": 0,
    }
    dict_temp = {
        "passed": "{#PASSED}",
        "failed": "{#FAILED}",
        "warnings": "{#WARNING}",
        "skipped": "{#SKIPPED}",
        "xfailed": "{#XFAILED}",
    }
    for i in lst[2]:
        v, k = i.split()
        entry[dict_temp[k]] = v
    return entry


with open("t.txt", "r") as file:
    file = file.read().splitlines()

final_dict={}
c=0
for i in range(0, len(file), 4):#read 4 lines of the file each time
    text = file[i : i + 4] 
    if len(text) <= 1:
        continue
    res_tmp = break_text(text)
    res = create_dict(res_tmp)
    final_dict[c]=res
    c+=1

print(final_dict)

output

{0: {'{#Status}': ['success'], '{#DESCRIPTION}': ['kytos/mef_eline'], '{#PASSED}': '102', '{#FAILED}': 0, '{#WARNING}': '88', '{#SKIPPED}': 0, '{#XFAILED}': 0}, 1: {'{#Status}': ['failed'], '{#DESCRIPTION}': ['kytos/kytos'], '{#PASSED}': '316', '{#FAILED}': '1', '{#WARNING}': '204', '{#SKIPPED}': 0, '{#XFAILED}': 0}, 2: {'{#Status}': ['success'], '{#DESCRIPTION}': ['kytos/python-openflow'], '{#PASSED}': '400', '{#FAILED}': 0, '{#WARNING}': 0, '{#SKIPPED}': '40', '{#XFAILED}': '1'}}

Jan · Accepted Answer · 2021-04-28 20:47:25Z

This might be a bit over the top but your file can be seen as sort of a DSL - a domain specific language. That said, why not write yourself a little parser, e.g. with the help of parsimonious:

from parsimonious.grammar import Grammar
from parsimonious.grammar import NodeVisitor
import re

data = """
---
Project-Desc: kytos/mef_eline
Last-Status: success
pytest summary:  102 passed, 88 warnings in 26.11s
---
Project-Desc: kytos/kytos
Last-Status: failed
pytest summary:  1 failed, 316 passed, 204 warnings in 42.94s
---
Project-Desc: kytos/python-openflow
Last-Status: success
pytest summary:  400 passed, 40 skipped, 1 xfailed in 3.17s
---

"""

class DSL(NodeVisitor):
    rx = re.compile(r'(\d+)\s+(\w+).*')

    grammar = Grammar(r"""
        content = (block / ws)+
        block   = sep line*
        line    = key colon ws value nl?
        key     = ~"^[^:\n]+"m
        value   = ~".+"
        colon   = ":"
        nl      = ~"[\n\r]+"
        sep     = "---" nl
        ws      = ~"\s*"
    """)

    def generic_visit(self, node, visited_children):
        return visited_children or None

    def visit_line(self, node, visited_children):
        key, _, _, value, _ = visited_children
        if key:
            if len(value) > 1:
                values = {}
                for item in value:
                    item = item.strip()
                    value, key = self.rx.search(item).groups()
                    key = "#" + key.upper()
                    values[key] = value
                return values
            else:
                value = value[0]
                if key.endswith("Status"):
                    return {"#STATUS": value}
                elif key.endswith("Desc"):
                    return {"#DESCRIPTION": value}

    def visit_key(self, node, visited_children):
        return node.text

    def visit_value(self, node, visited_children):
        return node.text.split(",")

    def visit_block(self, node, visited_children):
        _, values = visited_children
        return values


    def visit_content(self, node, visited_children):
        for child in visited_children:
            if child[0]:
                yield {key: value for dct in child[0] for key, value in dct.items()}

dsl = DSL()
for block in dsl.parse(data):
    print(block)

Which will yield

{'#DESCRIPTION': 'kytos/mef_eline', '#STATUS': 'success', '#PASSED': '102', '#WARNINGS': '88'}
{'#DESCRIPTION': 'kytos/kytos', '#STATUS': 'failed', '#FAILED': '1', '#PASSED': '316', '#WARNINGS': '204'}
{'#DESCRIPTION': 'kytos/python-openflow', '#STATUS': 'success', '#PASSED': '400', '#SKIPPED': '40', '#XFAILED': '1'}

Yes, it is longer and more complicated to learn but very forgiving (try to add empty lines whereever you want).

Collectives™ on Stack Overflow

How to parse a file and populate a python dictionary with its content

3 Answers 3

4 Comments

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

3 Answers 3

4 Comments

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related