1

I am reading below json file and trying to store it in .xlsx format using pandas library of python.

JSON:

{ "log": [ 
 {
   "code": "info",
   "message": {"text": "[info] Activation of plug-in abcd rule processor (xule) successful, version Check version using Tools->Xule->Version on the GUI or --xule-version on the command line. - xule "},
   "refs": [{"href": "xule"}],
   "level": "info"
 }
,
{
   "code": "xyz.F1.all.7",
   "level": "error",
   "message": {
                "text": "[xyz.F1.all.7] The value for ForResale with a value of 63 has a unit of utr:MWh. This concept allows units of utr:MWh.\n\nElement : xyz:ForResale\nPeriod : 2016-01-01 to 2016-12-31\nUnit : utr:MWh\n\nRule Id:xyz.F1.all.7 - TestUtilitiesInc-428-2016Q4F1.abcd 4114",
                "severity": "error",
                "cid": "63096080",
                "filing_url": "C:\\Users\\TEST\\Desktop\\TestUtilitiesInc-428-2016Q4F1.abcd"
             },
   "refs": [
        {
         "properties": [
              [
               "label",
               "Megawatt hours sold, Sales for Resale"
              ],
              [
               "namespace",
               "http://xyzcom/form/2020-01-01/xyz"
              ],
              [
               "name",
               "ForResale"
              ],
              [
               "QName",
               "xyz:ForResale"
              ],
              [
               "contextRef",
               "c-177",
               [
                [
                 "entity",
                 "C002089",
                 [
                  [
                   "scheme",
                   "http://abcd.org/entity/identification/scheme"
                  ]
                 ]
                ],
                [
                 "startDate",
                 "2016-01-01"
                ],
                [
                 "endDate",
                 "2016-12-31"
                ],
                [
                 "dimensions",
                 "(1)",
                 [
                  [
                   "xyz:SalesForResaleAxis",
                   "<xyz:SalesForResaleDomain>0-3</xyz:SalesForResaleDomain>\n\t\t\t\t\n"
                  ]
                 ]
                ]
               ]
              ],
              [
               "unitRef",
               "u-03 (MWh)",
               [
                [
                 "measure",
                 "utr:MWh"
                ]
               ]
              ],
              [
               "decimals",
               "INF"
              ],
              [
               "precision",
               "None"
              ],
              [
               "xsi:nil",
               "false"
              ],
              [
               "value",
               "63"
              ]
             ],
         "href": "TestUtilitiesInc-428-2016Q4F1.abcd#f-731",
         "objectId": "__91261",
         "sourceLine": 4114
        }
   ]
}]}

With following python code,

import json
import pandas
from pandas.io.json import json_normalize

def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

with open('C:/Users/Desktop/SampleTestFiles/logfile_1.json', encoding="utf-8") as json_file:
    data = json.load(json_file)

flat = flatten_json(data)
normalizedjson = json_normalize(data)
writer = pandas.ExcelWriter('myDataFrame.xlsx')
normalizedjson.to_excel(writer, 'DataFrame')
writer.save()

I am able to create .xslx file with following format: enter image description here

But, I want to create output in following format in Excel file which should have columns for - code, message, refs and level. And, Whatever values are under these keys in json should be respective row for each log.

enter image description here

1 Answer 1

1
data1 = []
for i in range(len(data['log'])):
    code = data['log'][i]['code']
    message = data['log'][i]['message']
    refs = data['log'][i]['refs']
    level = data['log'][i]['level']
    data1.append((i,code, message, refs, level))

df = pd.DataFrame(data1, columns = ['log','code','message','refs','level'])

#op

enter image description here

Sign up to request clarification or add additional context in comments.

1 Comment

Thanks! This is what I was looking for :)

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.