The code below is a Python module to flatten JSON-like structure (nested dictionaries and lists) to a single string. It also provides partial result in the form of flat list of strings. This flattening is intended to be used to generate commands for the command line variant of the Check Point Management API: https://sc1.checkpoint.com/documents/latest/APIs/
The details of how it works and examples are documented in the code which uses docstrings made to be able to generate documentation using Sphinx.
I am concerned mainly about:
- The style
- Identifier naming
- Docstrings - aren't they excessive (for the private functions)?
- Later I noticed I can replace
_is_convertible_to_str(value)by much simplerisinstance(value, _CONVERTIBLE_TO_STR)and define the constant tuple instead of the function. Which way should be preferred?
- The way of defining the default behaviour while allowing some flexibility
- The module API design (public functions, parameters)
""" JSON-like structure flattening
The module provides functions to flatten nested structures of dictionaries
and lists to a flat list of strings or a single string.
Functions:
* `flatten_to_list()`: Convert structure to a flat list of strings.
* `flist_to_str()`: Convert list of strings to a single string.
Examples:
>>> flat_list1 = flatten_to_list({'name': 'John', 'surname': 'Doe'})
>>> flist_to_str(flat_list1)
'name John surname Doe'
>>> flat_list2 = flatten_to_list({
... 'add': 'access-rule',
... 'layer': 'policy1 Network',
... 'position': {'bottom': 'RADIUS rules'},
... 'source': ['web_serers'],
... 'destination': ['internet'],
... 'action': 'Accept',
... 'track': {'type': 'Log'}},
... key_order=('add', 'layer', 'position'))
>>> flat_list2
['add', 'access-rule', 'layer', 'policy1 Network', 'position.bottom',\
'RADIUS rules', 'source.1', 'web_serers', 'destination.1', 'internet',\
'action', 'Accept', 'track.type', 'Log']
>>> flist_to_str(flat_list2)
'add access-rule layer "policy1 Network" position.bottom "RADIUS rules"\
source.1 web_serers destination.1 internet action Accept track.type Log'
"""
from __future__ import annotations
import string
from typing import (
Any, Hashable, ItemsView, Iterator, Union, Callable, Iterable, Generator)
# --- private constants
# whitespace characters which cause a string to require quoting
_WHITESPACE = set(string.whitespace)
# --- private helper functions
def _is_convertible_to_str(value: Any) -> bool:
"""Decide if we want to convert the value using `str(value)`.
Return `False` for container types. (`dict`, `list`...) The function
decides if we are willing to convert the `value` in a JSON-like
structure to a string.
Args:
value: the value to test the convertibility of
Returns:
`True` if we want to convert the value using `str(value)`
Examples:
>>> _is_convertible_to_str(1)
True
>>> _is_convertible_to_str([])
False
"""
return (isinstance(value, str)
or isinstance(value, int)
or isinstance(value, float))
def _ordered_dict_items(
dictionary: dict[Hashable, Any], key_order: Iterable[Hashable] = ()
) -> Generator[tuple[Hashable, Any], None, None]:
"""Iterate dictionary like `dict.items()`_ with optional key order.
Dictionary keys listed in `key_order` are iterated first in the order
as listed. The rest is iterated in unspecified order.
Args:
dictionary: dictionary to iterate
key_order: these keys will be iterated first in the given order
Yields:
`(key, value)` tuples as standard `dict.items()`_ does
Examples:
>>> list(_ordered_dict_items({'key': 42}))
[('key', 42)]
>>> list(_ordered_dict_items(
... {'key': 42, 'id': 8569, 'name': 'Marc'}, ['name', 'id']))
[('name', 'Marc'), ('id', 8569), ('key', 42)]
.. _dict.items():
https://docs.python.org/3/library/stdtypes.html#dict.items
"""
dictionary = dictionary.copy() # we will remove processed keys
for ordered_key in key_order:
if ordered_key in dictionary:
yield ordered_key, dictionary[ordered_key]
del dictionary[ordered_key]
yield from dictionary.items() # yield the rest in unspecified order
def _contains_any(set1: Iterable[Hashable], set2: Iterable[Hashable]) -> bool:
r"""Test if `set1` contains any elements of `set2` or vice versa.
The function tests if the intersection of the sets is not empty.
Unlike the plain `&` operator the function operates on any iterables
of `Hashable`. For example the function is useful to test if one string
contains any character from the other string (or any iterable of
characters).
Args:
set1: an iterable for the intersection test
set2: another iterable for the intersection test
Returns:
`True` if the intersection is not empty
Examples:
>>> _contains_any('good morning', ' \t')
True
>>> _contains_any('hello John', 'xXyY')
False
"""
if not isinstance(set1, set):
set1 = set(set1)
if not isinstance(set2, set):
set2 = set(set2)
return bool(set1 & set2)
# --- public functions
def flatten_to_list(
json_struct: Union[dict[str, Any], list[Any]], /, *,
parent: str = '', startindex: int = 1, parent_sep: str = '.',
key_order: Iterable[str] = (),
value_converter: Callable = str,
key_converter: Callable = str) -> list[str]:
"""Flatten JSON-like structure to a list of strings.
The JSON-like structure consists of dictionaries, lists and simple values.
The resulting list consists of pairs: `[key1, value1, key2, value2 ...]`.
Key produced for a JSON list item is an ordinal number of the position
in the list: `1, 2, 3, ...` Key from a nested container is preceded
by the parent container key: *parent_key.key*.
Args:
json_struct: the JSON-like structure to flatten
parent: parent key name
startindex: first number for indexing list items
parent_sep: parent key or index separator string
key_order: list of keys needing defined order
value_converter: function converting values to strings
key_converter: function converting keys to strings
Returns:
flat list of key, value pairs: `[key1, value1, key2, value2 ...]`
Examples:
>>> flatten_to_list({'name': 'John', 'surname': 'Doe'})
['name', 'John', 'surname', 'Doe']
>>> flatten_to_list({'name': 'Alice', 'siblings': ['Jeff', 'Anna']})
['name', 'Alice', 'siblings.1', 'Jeff', 'siblings.2', 'Anna']
>>> flatten_to_list({
... 'name': 'Zip',
... 'eye': {'left': 'red', 'right': 'black'}})
['name', 'Zip', 'eye.left', 'red', 'eye.right', 'black']
>>> flatten_to_list(['red', 'green', 'blue'],
... parent='color', startindex=0)
['color.0', 'red', 'color.1', 'green', 'color.2', 'blue']
>>> flatten_to_list({'name': 'John', 'surname': 'Doe'},\
key_order=['surname'])
['surname', 'Doe', 'name', 'John']
"""
result: list[str] = []
if parent:
parent = parent + parent_sep
struct_iterator: Union[ItemsView, Iterator] # will yield (key, value)
if isinstance(json_struct, dict):
struct_iterator = _ordered_dict_items(json_struct, key_order)
elif isinstance(json_struct, list):
struct_iterator = enumerate(json_struct, startindex)
else:
raise TypeError(
f"Unexpected data type {type(json_struct)} of the structure.")
for key, value in struct_iterator:
ext_key = parent + key_converter(key)
if isinstance(value, (list, dict)):
result.extend(flatten_to_list(
value, parent=ext_key, startindex=startindex,
parent_sep=parent_sep, key_order=key_order,
value_converter=value_converter,
key_converter=key_converter))
elif _is_convertible_to_str(value):
result.extend([ext_key, value_converter(value)])
else:
raise TypeError(
f"Unexpected data type {type(value)} inside structure.")
return result
def flist_to_str(
flist: list[str], /, *, separator: str = ' ',
quote_str: str = '"', quote_always: bool = False) -> str:
"""Convert flat list of strings to a string with quoting.
The function is useful to convert the resulting list from
:py:func:`flatten_to_list()` to a single string.
Args:
flist: flat list of strings to be converted to a single string
separator: separator between list items
quote_str: character or string to quote list items if needed
quote_always: if list items should be quoted even if not necessary
Examples:
>>> flist_to_str(['good', 'morning'])
'good morning'
>>> flist_to_str(['good morning'])
'"good morning"'
Todo:
* No escaping implemented for quote characters. We need to find out
which way of escaping does Check Point CLI API support.
"""
def quote(string1: str) -> str:
"""Quote string1 as needed"""
if quote_always or _contains_any(string1, _WHITESPACE) or not string1:
return quote_str + string1 + quote_str
return string1
return separator.join(quote(item) for item in flist)