I have below JSON and I'm parsing it using play-json. Somehow "datafeeds/schema/fields" Node is not getting properly parsed.
I have created standard reads to parse this Json but "datafeeds" node seems not to be parsing correctly due to "format"(datafeeds/schema/fields) node being String or JsObject sometime and same goes for the "type" node.
If I consider Schema as JsObject then whole Json get parsed correctly and seems I then have to process Schema separately.
My Json looks like this
{
"entities": [
{
"name": "customers",
"number_of_buckets": 5,
"entity_column_name": "customer_id",
"entity_column_type": "integer"
},
{
"name": "accounts",
"number_of_buckets": 7,
"entity_column_name": "account_id",
"entity_column_type": "string"
},
{
"name": "products",
"number_of_buckets": 1,
"entity_column_name": "product_id",
"entity_column_type": "integer"
}
],
"datafeeds": [
{
"name": "customer_demographics",
"version": "1",
"delimiter": "|",
"filename_re_pattern": ".*(customer_demographics_v1_[0-9]{8}\\.psv)$",
"frequency": {
"days": 1
},
"from": "2015-07-01",
"drop_threshold": {
"rows": null,
"percentage": 0.05
},
"dry_run": false,
"header": true,
"text_qualifier": null,
"landing_path": "landing",
"schema": {
"fields": [
{
"time_key": true,
"format": "yyyy-MM-dd",
"metadata": {},
"name": "record_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"format": "yyyy-MM-dd",
"metadata": {},
"name": "extract_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"entity_type": "customers",
"metadata": {},
"name": "customer_id",
"nullable": false,
"primary_key": true,
"type": "integer"
},
{
"metadata": {},
"name": "year_of_birth",
"nullable": true,
"type": "integer"
},
{
"metadata": {},
"name": "month_of_birth",
"nullable": true,
"type": "integer"
},
{
"metadata": {},
"name": "postcode",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "state",
"nullable": true,
"type": "string"
},
{
"format": {
"false": "N",
"true": "Y"
},
"metadata": {},
"name": "marketing_consent",
"nullable": true,
"type": "boolean"
}
],
"type": "struct"
}
},
{
"name": "customer_statistics",
"version": "1",
"delimiter": "|",
"filename_re_pattern": ".*(customer_statistics_v1_[0-9]{8}\\.psv)$",
"frequency": {
"days": 1
},
"from": "2015-07-01",
"drop_threshold": {
"rows": null,
"percentage": 0.05
},
"dry_run": false,
"header": true,
"text_qualifier": null,
"landing_path": "landing",
"schema": {
"fields": [
{
"time_key": true,
"format": "yyyy-MM-dd",
"metadata": {},
"name": "record_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"format": "yyyy-MM-dd",
"metadata": {},
"name": "extract_date",
"nullable": false,
"primary_key": true,
"type": "timestamp",
"timezone": "Australia/Sydney"
},
{
"entity_type": "customers",
"metadata": {},
"name": "customer_id",
"nullable": false,
"primary_key": true,
"type": "integer"
},
{
"metadata": {},
"name": "risk_score",
"nullable": true,
"type": "double"
},
{
"metadata": {},
"name": "mkg_segments",
"nullable": true,
"type": {
"type":"array",
"elementType":"string",
"containsNull": false
}
},
{
"metadata": {},
"name": "avg_balance",
"nullable": true,
"type": "decimal"
},
{
"metadata": {},
"name": "num_accounts",
"nullable": true,
"type": "integer"
}
],
"type": "struct"
}
}
],
"tables": [
{
"name": "table_name",
"version": "version",
"augmentations": [
{
"left_table_name": "left_table_name",
"left_table_version": "v1",
"right_table_name": "right_table_name",
"right_table_version": "v1",
"columns": [
"column_a",
"column_b",
"column_c"
],
"join_cols": [
{
"left_table": "system_code",
"right_table": "key_a"
},
{
"left_table": "group_product_code",
"right_table": "key_b"
},
{
"left_table": "sub_product_code",
"right_table": "key_c"
}
]
}
],
"sources": [
{
"name": "table_name",
"version": "v1",
"mandatory": true,
"type": "datafeed | table"
}
],
"aggregations": [
{
"column_name": "customer_age_customer_age",
"column_type": "long",
"description": "date_diff",
"expression": "max_by",
"source_columns": [
{
"column_name": "customer_age_year_of_birth",
"source": {
"name": "customers",
"type": "table",
"version": "v1"
}
},
{
"column_name": "customer_age_month_of_birth",
"source": {
"name": "customers",
"type": "table",
"version": "v1"
}
}
]
}
],
"column_level_transformations": [
{
"column_name": "column_added",
"column_type": "long",
"description": "adding two columns to return something else",
"expression": "column_a+column_b",
"source_columns": [
{
"column_name": "column_a",
"source": {
"name": "source_a",
"type": "table",
"version": "v1"
}
},
{
"column_name": "column_b",
"source": {
"name": "source_b",
"type": "table",
"version": "v1"
}
}
]
}
],
"frequency": {
"months": 1
},
"joins": [
{
"name": "table_name",
"version": "v1"
},
{
"name": "table_name_b",
"version": "v2"
}
],
"from": "2015-07-01",
"format": "parquet",
"structure": "primitives",
"index_query": "sql statement",
"insert_query": "sql statement"
}
]
}
Any idea how to parse this Json?