1

Here we have a large json file with nested content in that. We want to convert it to csv file,so that it could use it for data modeling, however I feel as the code is missing something, which I am unable to spot. I am very new to python and need help.

Following is how the content in the file looks like:

[{
"address": " -, Gulbarga-585102", 
"college": "College (Architecture)", 
"courses": [
{
    "brief_details": "", 
    "college_name": "School of ArchitecturePoojya Doddappa Appa College of Engineering", 
    "course_branch": "B.Arch", 
    "course_duration": " 5-year", 
    "course_nature": " Full-Time", 
    "course_title": "", 
    "course_type": " B.Arch", 
    "no_of_seats": " 60", 
    "qualifications": "", 
    "selection_process": ""
}
], 
"email": " [email protected]", 
"fax": "08472-255685", 
"name": "School Of Architecturepoojya Doddappa Appa College Of    Engineering", 
"phone": "08472-224262 Extn. 435, 220742", 
"recognition": " V.t.u. Belgaum", 
"website": ""
}]

And following is my code

from bs4 import BeautifulSoup
from os import listdir
import os
from os.path import isfile, join
import fnmatch
import shelve
import json
import csv

def write_csv(read_file_path):
    data = json.loads(open(read_file_path).read())
    file_colleges = csv.writer(open(r"/home/maitreyee/SchoolCollege.com/collegesdb/colleges.csv", "w", newline=""))
    list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website']
    file_colleges.writerow(list_colleges_headers)
    list_courses.list_colleges_headers = ['course_title', 'course_type','course_duration','course_nature','qualifications','brief_details','selection_process', 'course_branch', 'no_of_seats']

    for d in data:
        file_colleges.writerow(
            [d['name'], d['college'], d['recognition'], d['address'], d['phone'], d['fax'], d['website']])
        file_course.writerow(list_courses_headers)
        for course in d['courses']:
            file_course.writerow(
                [
                 (course['course_title'] if course['course_title'] is not None or course['course_title'] != '' else 'NA'),
                 (course['course_type'] if course['course_type'] is not None or course['course_type'] != '' else 'NA'),
                 (course['course_duration'] if course['course_duration'] is not None or course['course_duration'] != '' else 'NA'),
                 (course['course_nature'] if course['course_nature'] is not None or course['course_nature'] != '' else 'NA'),
                 (course['qualifications'] if course['qualifications'] is not None or course['qualifications'] != '' else 'NA'),
                 (course['brief_details'] if course['brief_details'] is not None or course['brief_details'] != '' else 'NA'),
                 (course['selection_process'] if course['selection_process'] is not None or course['selection_process'] != '' else 'NA'),
                 (course['course_branch'] if course['course_branch'] is not None or course['course_branch'] != '' else 'NA'),
                 (course['no_of_seats'] if course['no_of_seats'] is not None or course['no_of_seats'] != '' else 'NA')])
        pass

#def write_file(file, colleges):
#   db = shelve.open(file)
#   for college in colleges:
#       db[college.name] = college
#   db.close()
read_file_path = r'/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
#colleges = read_colleges(r"/home/maitreyee/Downloads/SchoolCollege.com1/collegedb1.json")
#new_write_file(r'/home/maitreyee/Downloads/SchoolCollege.com1/')

And the code is returning an empty file

Below is the code of @7stud.have just modified the files location.

import json
import csv

def write_csv(jsonfile, outfile):

    with open(jsonfile) as f:
        data = json.loads(f.read())

    college_dict = data[0]

    college_keys = list(college_dict.keys())
    college_keys.remove('courses')
    college_keys.remove('college')

    courses_dict = data[0]['courses'][0]
    courses_keys = list(courses_dict.keys())
    courses_keys.remove('brief_details')

    with open(outfile, 'w', newline='') as f:
        csv_writer = csv.writer(f)
        headers = college_keys + courses_keys
        csv_writer.writerow(headers)

        row = (
            [
                college_dict[key] if college_dict[key] else 'NA'
                for key in college_keys
            ] 
            + 
            [
                courses_dict[key] if courses_dict[key] else 'NA'
                for key in courses_keys
            ]
        )

        csv_writer.writerow(row)

jsonfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
outfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesout.csv'

write_csv(jsonfile, outfile)

And below is the error

 maitreyee@Maitreyee:~/Downloads/SchoolCollege.com$ python json2csv4.py
Traceback (most recent call last):
  File "json2csv4.py", line 41, in <module>
    write_csv(jsonfile, outfile)
  File "json2csv4.py", line 15, in write_csv
    courses_dict = data[0]['courses'][0]
IndexError: list index out of range
1
  • This was silly of me. Sorry for this Commented Nov 15, 2015 at 10:50

1 Answer 1

2
  1. Do you plan on calling your write_csv() function in your program?

  2. If you do call write_csv(), you will get the error:

NameError: name 'list_courses' is not defined

If you just do this:

import json
import csv

def write_csv(read_file_path):
    data = json.loads(open(read_file_path).read())
    file_colleges = csv.writer(open('out.txt', "w", newline=""))
    list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website']
    file_colleges.writerow(list_colleges_headers)


infile = "json.txt"
write_csv(infile)

you'll see that the file does contain output:

$ cat out.txt
name,recognition,address,phone,fax,email,website

Edit:

If the column order in the csv file isn't important:

import json
import csv

def write_csv(jsonfile, outfile):

    with open(jsonfile) as f:
        data = json.loads(f.read())

    college_dict = data[0]

    college_keys = list(college_dict.keys())
    college_keys.remove('courses')
    college_keys.remove('college')

    courses_dict = data[0]['courses'][0]
    courses_keys = list(courses_dict.keys())
    courses_keys.remove('brief_details')

    with open(outfile, 'w', newline='') as f:
        csv_writer = csv.writer(f)
        headers = college_keys + courses_keys
        csv_writer.writerow(headers)

        row = (
            [
                college_dict[key] if college_dict[key] else 'NA'
                for key in college_keys
            ] 
            + 
            [
                courses_dict[key] if courses_dict[key] else 'NA'
                for key in courses_keys
            ]
        )

        csv_writer.writerow(row)

jsonfile = 'data.json'
outfile = 'out.csv'

write_csv(jsonfile, outfile)
Sign up to request clarification or add additional context in comments.

4 Comments

i also need the nested values for courses above in separate columns. and yes you are right that after doing the write csv I am getting the error, so I am doing some modifications to include the course values as well. I am going to put up the modified code here. If you have any suggestions on how to include the nested list kindly let me know.
@MaitreyeeTewari, See bottom of my answer.
Now i am getting the error 'List index out of range'?
@MaitreyeeTewari, Post the code that is causing the error along with the exact error message(copy and paste) at the bottom of your answer.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.