I had a long post prepared which accidentally was lost due to inactivity. It could therefore be that I'm forgot some information, but I'll gladly provide more information.
I've read up on the specification of YAML and ruamel.yaml related post2 here on SO, but I'm not sure if what I'm trying to accomplish is possible. And, if so, it is possible, with ruamel (in a discussion with pyyaml maintainers it seems as if it's legal yaml and that it should also work with ruamel), e.g. for someKey: !do_not_inherit [ !tag taggedValue ].
An open source project that I'm involved with (gitlabform) is trying to expand our yaml syntax to be a bit more flexible and a possible solution would be if we can use custom implicit resolves.
Basically, we want to accomplish something like this and hence don't have to handles these additional keys/properties everywhere:
# normal case
someKey: someValue
# want to avoid
someKey:
value: someValue
do_not_inherit: true
# would prefer
someKey: !do_not_inherit someValue
or more advanced (not sure about valid syntax or, if it is possible)
project_settings:
!inherit force
a_list:
!keep_existing true
- !delete true A
- B
a_map:
!inherit no_force
A: !delete
B:
C: some_key
I've tried with some examples using ruamel but haven't been able to get it to work and I'm suspecting that I need to know more about ruamel to get to (potentially work).
#!/usr/bin/env python3
import sys
import pathlib
import ruamel.yaml
import pytest
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, Union, List
from collections import OrderedDict
# Enum for inherit values
class InheritEnum(str, Enum):
TRUE = "true"
FALSE = "false"
NEVER = "never"
ALWAYS = "always"
FORCE = "force"
# Custom Ordered Dict to store tags
@dataclass
class GitLabFormTagOrderedDict(OrderedDict):
"""A custom ordered dictionary that tracks parsed YAML tags."""
_tags: Dict[str, Any] = field(default_factory=dict, init=False)
def set_tag(self, key: str, tag: Any) -> None:
"""Associate a custom tag with a key."""
print(f"Setting tag: {key} = {tag}")
self._tags[key] = tag
def get_tags(self) -> Dict[str, Any]:
"""Retrieve all stored tags."""
print("Retrieving tags")
return self._tags
# Custom Scalar to store tags
@dataclass
class GitLabFormTagScalar:
"""A wrapper for scalar values that store tags."""
value: Any
tags: Dict[str, Any] = field(default_factory=dict)
# Custom List to store tags
@dataclass
class GitLabFormTagList(List[Any]):
"""A custom list that tracks parsed YAML tags."""
_tags: Dict[str, Any] = field(default_factory=dict, init=False)
def set_tag(self, key: str, tag: Any) -> None:
"""Associate a custom tag with the list."""
print(f"Setting tag on list: {key} = {tag}")
self._tags[key] = tag
def get_tags(self) -> Dict[str, Any]:
"""Retrieve all stored tags."""
print("Retrieving tags for list")
return self._tags
# Constructor for !enforce
def enforce_constructor(
loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.MappingNode
) -> GitLabFormTagOrderedDict:
print("Processing !enforce tag - Setting enforce=True")
result = GitLabFormTagOrderedDict()
result.set_tag("enforce", True)
for key_node, value_node in node.value:
key: str = key_node.value
value: Any = loader.construct_object(value_node)
print(f"Processing enforce key-value pair: {key}: {value}")
result[key] = value
return result
# Constructor for !delete
def delete_constructor(
loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.ScalarNode
) -> GitLabFormTagScalar:
value: bool = loader.construct_scalar(node).lower() == "true"
print(f"Processing !delete tag - Setting delete={value}")
return GitLabFormTagScalar(value, {"delete": value})
# Constructor for !inherit
def inherit_constructor(
loader: ruamel.yaml.constructor.Constructor,
node: Union[
ruamel.yaml.nodes.MappingNode,
ruamel.yaml.nodes.SequenceNode,
ruamel.yaml.nodes.ScalarNode,
],
) -> Any:
print("Processing !inherit tag - Checking structure")
if isinstance(node, ruamel.yaml.nodes.SequenceNode):
values: List[Any] = loader.construct_sequence(node, deep=True)
main_value: Any = values[0] if values else None
additional_tags: List[Any] = values[1:]
print(f"Parsed inherit value: {main_value}, additional tags: {additional_tags}")
if main_value in {e.value for e in InheritEnum}:
result = GitLabFormTagList(values[1:])
result.set_tag("inherit", main_value)
for extra in additional_tags:
if extra == "keep_existing":
print("Applying keep_existing with inherit")
result.set_tag("keep_existing", True)
else:
raise ValueError(
f"Invalid combination of tags with inherit: {extra}"
)
return result
value: str = loader.construct_scalar(node)
print(f"Parsed scalar inherit value: {value}")
if value in {e.value for e in InheritEnum}:
result = GitLabFormTagOrderedDict()
result.set_tag("inherit", value)
return result
raise ValueError(f"Invalid inherit value: {value}")
# Constructor for !keep_existing
def keep_existing_constructor(
loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.ScalarNode
) -> GitLabFormTagScalar:
value: bool = loader.construct_scalar(node).lower() == "true"
print(f"Processing !keep_existing tag - Setting keep_existing={value}")
return GitLabFormTagScalar(value, {"keep_existing": value})
# Constructor for !include
def include_constructor(
loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.ScalarNode
) -> Any:
print(f"Processing !include tag for file: {node.value}")
yaml = ruamel.yaml.YAML()
file_path = pathlib.Path(node.value)
if not file_path.exists():
raise IOError(f"Included external YAML file '{file_path}' does not exist.")
with file_path.open("r", encoding="utf-8") as f:
return yaml.load(f)
# Register YAML Processor
yaml: ruamel.yaml.YAML = ruamel.yaml.YAML()
yaml.constructor.add_constructor("!enforce", enforce_constructor)
yaml.constructor.add_constructor("!delete", delete_constructor)
yaml.constructor.add_constructor("!inherit", inherit_constructor)
yaml.constructor.add_constructor("!keep_existing", keep_existing_constructor)
yaml.constructor.add_constructor("!include", include_constructor)
# Test Cases
def test_yaml_parsing():
data: str = """
project_settings:
!inherit force
topics:
- topicA
- topicB
"""
parsed_data: Any = yaml.load(data)
assert isinstance(parsed_data, GitLabFormTagOrderedDict)
assert parsed_data.get_tags()["inherit"] == "force"
def test_delete_tag():
data: str = """
key: !delete false
"""
parsed_data = yaml.load(data)
assert isinstance(parsed_data, GitLabFormTagScalar)
assert parsed_data.tags["delete"] is False
def test_keep_existing_tag():
data: str = """
key: !keep_existing true
"""
parsed_data = yaml.load(data)
assert isinstance(parsed_data, GitLabFormTagScalar)
assert parsed_data.tags["keep_existing"] is True
def test_inherit_with_keep_existing():
data: str = """
settings: !inherit always [!keep_existing true]
"""
parsed_data = yaml.load(data)
assert isinstance(parsed_data, GitLabFormTagList)
assert parsed_data.get_tags()["inherit"] == "always"
assert parsed_data.get_tags()["keep_existing"] is True
def test_invalid_inherit_combination():
data: str = """
settings: !inherit true [!invalid_tag]
"""
with pytest.raises(ValueError):
yaml.load(data)
# Run Tests if Main
if __name__ == "__main__":
print("Running tests...")
test_yaml_parsing()
# test_delete_tag()
# test_keep_existing_tag()
# test_inherit_with_keep_existing()
# test_invalid_inherit_combination()
# print("All tests passed!")
someKey: !do_not_inherit someValueis available only while parsing the scalarsomeValueand I don't think it is possible to construct a mapping from that. Have you looked at loading (and possible round-tripping) what you want (including tags) and then walk the tree afterwards?ruamel.yamlwill safely load tags without the need to have registered constructor. You have far better context access when handling tags post loading.