I recently got help from stackoverflow and corrected my script to convert json to csv. However, now the script is returning only one single row, kindly suggest the modifications in the below code, so that I could convert and have the entire json file to csv.
Below is a snippet from my json file. It has nested content for "courses", where some of the course values are there while it is null for some.
[{
"address": " Vidyanagar, Hubli-580031",
"college": "College (Architecture)",
"courses": [],
"email": " [email protected]",
"fax": "0836-2374985",
"name": "School Of Architecturebv Bhoomaradi College Of Engg. & Technology",
"phone": "0836-2378123, 2378201",
"recognition": " V.t.u. Belgaum",
"website": ""
},{
"address": " Udyambag, Belgaum-590008",
"college": "Institute (Architecture)",
"courses": [],
"email": " [email protected]",
"fax": "0831-2441909",
"name": "School Of Architecturelaw Society's Gogte Institute Of Technology",
"phone": "0831-2441104, 2405507",
"recognition": " V.t.u. Belgaum",
"website": ""
},{
"address": " Vidya Southa, Gokula Extn. Post, Bantwal-560054",
"college": "Institute (Architecture)",
"courses": [],
"email": " [email protected]",
"fax": "080-23603124",
"name": "School Of Architecturems Ramaiah Institute Of Technology ",
"phone": "080-23606934, 23600822",
"recognition": " V.t.u. Belgaum",
"website": ""
},{
"address": " -, Gulbarga-585102",
"college": "College (Architecture)",
"courses": [
{
"brief_details": "",
"college_name": "School of ArchitecturePoojya Doddappa Appa College of Engineering",
"course_branch": "B.Arch",
"course_duration": " 5-year",
"course_nature": " Full-Time",
"course_title": "",
"course_type": " B.Arch",
"no_of_seats": " 60",
"qualifications": "",
"selection_process": ""
}
],
"email": " [email protected]",
"fax": "08472-255685",
"name": "School Of Architecturepoojya Doddappa Appa College Of Engineering",
"phone": "08472-224262 Extn. 435, 220742",
"recognition": " V.t.u. Belgaum",
"website": ""
},{
"address": " R.v. Vidyaniketan P.o., Mysore Road, Bangalore-560059",
"college": "College (Architecture)",
"courses": [
{
"brief_details": "",
"college_name": "School of ArchitectureR.V. College of Engineering",
"course_branch": "B.Arch",
"course_duration": " 5-year",
"course_nature": " Full-Time",
"course_title": "",
"course_type": " B.Arch",
"no_of_seats": " 20",
"qualifications": "",
"selection_process": ""
}
],
"email": " [email protected]",
"fax": "080-28602914, 28602148, 28600337",
"name": "School Of Architecturer.v. College Of Engineering",
"phone": "080-28602170, 28601258, 28600184",
"recognition": " V.t.u. Belgaum",
"website": "www.rvce.ac.in"
}
And below is my code:
import json
import csv
def write_csv(jsonfile, outfile):
with open(jsonfile) as f:
data = json.loads(f.read())
college_dict = data[0]
college_keys = list(college_dict.keys())
college_keys.remove('courses')
college_keys.remove('college')
courses_dict = data[0]['courses'][0] if data[0]['courses'] else {'brief_details' : None}
courses_keys = list(courses_dict.keys())
courses_keys.remove('brief_details')
with open(outfile, 'wb') as f:
csv_writer = csv.writer(f)
headers = college_keys + courses_keys
csv_writer.writerow(headers)
row = (
[
college_dict[key] if college_dict[key] else 'NA'
for key in college_keys
]
+
[
courses_dict[key] if courses_dict[key] else 'NA'
for key in courses_keys
]
)
csv_writer.writerow(row)
jsonfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
outfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesout.csv'
write_csv(jsonfile, outfile)
This json file is huge, but below is the resulted csv:
website,fax,name,phone,address,email,recognition
NA,0836-2374985,School Of Architecturebv Bhoomaradi College Of Engg. & Technology,"0836-2378123, 2378201"," Vidyanagar, Hubli-580031", [email protected], V.t.u. Belgaum
college_dict = data[0]