Problem Statement : Remove/Rename Special characters (#,$, Back slash, & etc.) from json keys and replace in the main json file.
Approach :
- I am trying to get all the keys of deeply nested json first.
- Check for special characters in each key then rename/replace and write back to the json file.
Issue :
- Json I have is very deep nested so the logic i have written works for simple json but not deep nested json.
Code :
import json
import base64
def getKeys(object, prev_key = None, keys = []):
if type(object) != type({}):
keys.append(prev_key)
return keys
new_keys = []
for k, v in object.items():
if prev_key != None:
new_key = "{}.{}".format(prev_key, k)
else:
new_key = k
new_keys.extend(getKeys(v, new_key, []))
return new_keys
Above code works for below json : It print all the json keys
json_string= '{"Relate:0/name": "securityhub-ec2-instance-managed-by-ssm-dc0c9f18","RelatedAWSResources:0/type": "AWS::Config::ConfigRule","aws/securityhub/ProductName": "Security Hub","aws/securityhub/CompanyName": "AWS"}'
Output :
['Relate:0/name', 'RelatedAWSResources:0/type', 'aws/securityhub/ProductName', 'aws/securityhub/CompanyName']
But it does not work for below json :
{
"version": "0",
"id": "ffd8a756-9fe6-fa54-af4e-cf85fa3d2896",
"detail-type": "Security Hub Findings - Imported",
"source": "aws.securityhub",
"account": "220307202362",
"time": "2021-10-17T14:26:25Z",
"region": "us-west-2",
"resources": [
"arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f"
],
"detail": {
"findings": [
{
"ProductArn": "arn:aws:securityhub:us-west-2::product/aws/securityhub",
"Types": [
"Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS"
],
"Description": "This control checks for the CloudWatch metric filters using the following pattern { $.userIdentity.type = \"Root\" && $.userIdentity.invokedBy NOT EXISTS && $.eventType != \"AwsServiceEvent\" } It checks that the log group name is configured for use with active multi-region CloudTrail, that there is at least one Event Selector for a Trail with IncludeManagementEvents set to true and ReadWriteType set to All, and that there is at least one active subscriber to an SNS topic associated with the alarm.",
"Compliance": {
"Status": "FAILED",
"StatusReasons": [
{
"Description": "Multi region CloudTrail with the required configuration does not exist in the account",
"ReasonCode": "CLOUDTRAIL_MULTI_REGION_NOT_PRESENT"
}
],
"RelatedRequirements": [
"PCI DSS 7.2.1"
]
},
"ProductName": "Security Hub",
"FirstObservedAt": "2021-10-17T14:26:18.383Z",
"CreatedAt": "2021-10-17T14:26:18.383Z",
"LastObservedAt": "2021-10-17T14:26:21.346Z",
"CompanyName": "AWS",
"FindingProviderFields": {
"Types": [
"Software and Configuration Checks/Industry and Regulatory Standards/PCI-DSS"
],
"Severity": {
"Normalized": 40,
"Label": "MEDIUM",
"Product": 40,
"Original": "MEDIUM"
}
},
"ProductFields": {
"StandardsArn": "arn:aws:securityhub:::standards/pci-dss/v/3.2.1",
"StandardsSubscriptionArn": "arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1",
"ControlId": "PCI.CW.1",
"RecommendationUrl": "https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation",
"StandardsControlArn": "arn:aws:securityhub:us-west-2:220307202362:control/pci-dss/v/3.2.1/PCI.CW.1",
"aws/securityhub/ProductName": "Security Hub",
"aws/securityhub/CompanyName": "AWS",
"aws/securityhub/annotation": "Multi region CloudTrail with the required configuration does not exist in the account",
"Resources:0/Id": "arn:aws:iam::220307202362:root",
"aws/securityhub/FindingId": "arn:aws:securityhub:us-west-2::product/aws/securityhub/arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f"
},
"Remediation": {
"Recommendation": {
"Text": "For directions on how to fix this issue, consult the AWS Security Hub PCI DSS documentation.",
"Url": "https://docs.aws.amazon.com/console/securityhub/PCI.CW.1/remediation"
}
},
"SchemaVersion": "2018-10-08",
"GeneratorId": "pci-dss/v/3.2.1/PCI.CW.1",
"RecordState": "ACTIVE",
"Title": "PCI.CW.1 A log metric filter and alarm should exist for usage of the \"root\" user",
"Workflow": {
"Status": "NEW"
},
"Severity": {
"Normalized": 40,
"Label": "MEDIUM",
"Product": 40,
"Original": "MEDIUM"
},
"UpdatedAt": "2021-10-17T14:26:18.383Z",
"WorkflowState": "NEW",
"AwsAccountId": "220307202362",
"Region": "us-west-2",
"Id": "arn:aws:securityhub:us-west-2:220307202362:subscription/pci-dss/v/3.2.1/PCI.CW.1/finding/b5a325b7-eab1-439f-b14d-1dc52c3a423f",
"Resources": [
{
"Partition": "aws",
"Type": "AwsAccount",
"Region": "us-west-2",
"Id": "AWS::::Account:220307202362"
}
]
}
]
}
}
Strip punctuation function :
import string
from typing import Optional, Iterable, Union
delete_dict = {sp_character: '' for sp_character in string.punctuation}
PUNCT_TABLE = str.maketrans(delete_dict)
def strip_punctuation(s: str,
exclude_chars: Optional[Union[str, Iterable]] = None) -> str:
"""
Remove punctuation and spaces from a string.
If `exclude_chars` is passed, certain characters will not be removed
from the string.
"""
punct_table = PUNCT_TABLE.copy()
if exclude_chars:
for char in exclude_chars:
punct_table.pop(ord(char), None)
# Next, remove the desired punctuation from the string
return s.translate(punct_table)
Usage:
cleaned_keys = {json data}
for key, expected_key in cleaned_keys.items():
actual_key = strip_punctuation(key)
type({})is somewhat wasteful, as it creates a new dict object each time.aws-lambdatag, because the mere fact that you got the data from there has nothing to do with what you are actually doing with the data.