Hi I have a nested Json file (see below). I need to read and insert its data into a SQL Table in PostgreSQL RDBMS using python.
I need to parse each nested tags to be a separate column (eg:
#1: [metadata_namespace] --> where "metadata" is the root tag and "namespace" --> is subtag within metadata root element
#2: [price_bands_retail_band1_price] --> where "price_bands" is the root tag and "retail" --> is subtag within metadata root element and "retail" --> is a sub-element of price_bands array and "band1" --> is a sub-element of retail element and "price" --> is a sub-element of band1 dictionary
)
Additionally I want to have a control on which data elements (columns from my example above to be inserted into the PostgreSQL Table)
I have my solution what reads and inserts all tags of JSon file array, but it works only for flat Json (not nested Json file). My solution's code is below. But please keep in mind that my solution works only in my Json file is of Array Type
The Json sample file below seems not to be an Array
To test out my solution you would need to have a SQL table created where column names should match the Json tags and subtags and name this table "json_data" And save the Json sample file below under this name "postgres-records.json".
{
"metadata": {
"namespace": "5.2.0",
"message_id": "3c80151b-fcf3-4cc3-ada0-635be5b5c95f",
"transmit_time": "2020-01-30T11:25:47.247394-06:00",
"message_type": "pricing",
"domain": "Merchandising",
"version": "2.0.0"
},
"price_bands": [
{
"retail": {
"band1": {
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": null,
"expiration_date": null,
"modified_date": "2019-08-30T02:14:39.044968-05:00"
},
"band2": {
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": null,
"expiration_date": null,
"modified_date": "2019-08-30T02:14:39.044968-05:00"
},
"ecom": {
"price": 39.99,
"effective_date": "2019-08-27T00:00:00-05:00",
"strikethrough": null,
"expiration_date": null,
"modified_date": "2019-08-30T02:14:50.615119-05:00"
}
},
"sku": {
"sku_id": "917513",
"item_number": "29469",
"retail_price": 6.49
},
"competitive": {
"price": 8.05,
"modified_date": "2020-01-08T16:26:51.1996861-06:00"
},
"trade_in": {
"base_price": 7,
"tier_prices": [
{
"id": 1,
"price": 16.4
}
]
},
"cost": 5.49
}
]
}
My Solution for flat Json Array file that inserts all elements (columns) of the file:
This solution was driven by this post https://kb.objectrocket.com/postgresql/insert-json-data-into-postgresql-using-python-part-2-1248
### Refer to https://kb.objectrocket.com/postgresql/insert-json-data-into-postgresql-using-python-part-2-1248
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# import the psycopg2 database adapter for PostgreSQL
from psycopg2 import connect, Error
# import Python's built-in JSON library
import json
# import the JSON library from psycopg2.extras
from psycopg2.extras import Json
# import psycopg2's 'json' using an alias
from psycopg2.extras import json as psycop_json
# import Python's 'sys' library
import sys
# accept command line arguments for the Postgres table name
#if len(sys.argv) > 1: --> commented out
# table_name = '_'.join(sys.argv[1:]) --> commented out as it does not give me the table name...
#else: --> commented out
# ..otherwise revert to a default table name
#table_name = "json_data" --> commented out
table_name = "json_data" # --> added
print ("\ntable name for JSON data:", table_name)
# use Python's open() function to load the JSON data
with open('postgres-records.json') as json_data:
# use load() rather than loads() for JSON files
record_list = json.load(json_data)
print ("\nrecords:", record_list)
print ("\nJSON records object type:", type(record_list)) # should return "<class 'list'>"
# concatenate an SQL string
sql_string = 'INSERT INTO {} '.format( table_name )
# if record list then get column names from first key
if type(record_list) == list:
first_record = record_list[0]
columns = list(first_record.keys())
print ("\ncolumn names:", columns)
# if just one dict obj or nested JSON dict
else:
print ("Needs to be an array of JSON objects")
sys.exit()
# enclose the column names within parenthesis
sql_string += "(" + ', '.join(columns) + ")\nVALUES "
# enumerate over the record
for i, record_dict in enumerate(record_list):
# iterate over the values of each record dict object
values = []
for col_names, val in record_dict.items():
# Postgres strings must be enclosed with single quotes
if type(val) == str:
# escape apostrophies with two single quotations
val = val.replace("'", "''")
val = "'" + val + "'"
values += [ str(val) ]
# join the list of values and enclose record in parenthesis
sql_string += "(" + ', '.join(values) + "),\n"
# remove the last comma and end statement with a semicolon
sql_string = sql_string[:-2] + ";"
print ("\nSQL string:")
print (sql_string)
## Code for psycopg2 to connect to Postgres
try:
# declare a new PostgreSQL connection object
# change the settings below to your PostgreSQL DB
conn = connect(
dbname = "python_data",
user = "postgres",
host = "192.168.100.123",
password = "1234",
# attempt to connect for 3 seconds then raise exception
connect_timeout = 3
)
cur = conn.cursor()
print ("\ncreated cursor object:", cur)
except (Exception, Error) as err:
print ("\npsycopg2 connect error:", err)
conn = None
cur = None
# only attempt to execute SQL if cursor is valid
if cur != None:
try:
cur.execute( sql_string )
conn.commit()
print ('\nfinished INSERT INTO execution')
except (Exception, Error) as error:
print("\nexecute_sql() error:", error)
conn.rollback()
# close the cursor and connection
cur.close()
conn.close()
# To save the script’s output to a file -- optional
with open('insert_json.sql', 'w') as output_file:
output_file.write( sql_string )
# NOTE: The 'w' file mode will simply truncate and re-write data to the .sql file
# every time you run the script, but you can also use the 'a' mode to append data to the end of the SQL file instead.