0

I had a long post prepared which accidentally was lost due to inactivity. It could therefore be that I'm forgot some information, but I'll gladly provide more information.

I've read up on the specification of YAML and ruamel.yaml related post2 here on SO, but I'm not sure if what I'm trying to accomplish is possible. And, if so, it is possible, with ruamel (in a discussion with pyyaml maintainers it seems as if it's legal yaml and that it should also work with ruamel), e.g. for someKey: !do_not_inherit [ !tag taggedValue ].

An open source project that I'm involved with (gitlabform) is trying to expand our yaml syntax to be a bit more flexible and a possible solution would be if we can use custom implicit resolves.

Basically, we want to accomplish something like this and hence don't have to handles these additional keys/properties everywhere:

# normal case
someKey: someValue

# want to avoid
someKey:
  value: someValue
  do_not_inherit: true
  
# would prefer
someKey: !do_not_inherit someValue

or more advanced (not sure about valid syntax or, if it is possible)

project_settings: 
  !inherit force

  a_list:
    !keep_existing true
    - !delete true A
    - B
    
  a_map:
    !inherit no_force
    A: !delete
    B:
    C: some_key 

I've tried with some examples using ruamel but haven't been able to get it to work and I'm suspecting that I need to know more about ruamel to get to (potentially work).

#!/usr/bin/env python3

import sys
import pathlib
import ruamel.yaml
import pytest
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, Union, List
from collections import OrderedDict


# Enum for inherit values
class InheritEnum(str, Enum):
    TRUE = "true"
    FALSE = "false"
    NEVER = "never"
    ALWAYS = "always"
    FORCE = "force"


# Custom Ordered Dict to store tags
@dataclass
class GitLabFormTagOrderedDict(OrderedDict):
    """A custom ordered dictionary that tracks parsed YAML tags."""

    _tags: Dict[str, Any] = field(default_factory=dict, init=False)

    def set_tag(self, key: str, tag: Any) -> None:
        """Associate a custom tag with a key."""
        print(f"Setting tag: {key} = {tag}")
        self._tags[key] = tag

    def get_tags(self) -> Dict[str, Any]:
        """Retrieve all stored tags."""
        print("Retrieving tags")
        return self._tags


# Custom Scalar to store tags
@dataclass
class GitLabFormTagScalar:
    """A wrapper for scalar values that store tags."""

    value: Any
    tags: Dict[str, Any] = field(default_factory=dict)


# Custom List to store tags
@dataclass
class GitLabFormTagList(List[Any]):
    """A custom list that tracks parsed YAML tags."""

    _tags: Dict[str, Any] = field(default_factory=dict, init=False)

    def set_tag(self, key: str, tag: Any) -> None:
        """Associate a custom tag with the list."""
        print(f"Setting tag on list: {key} = {tag}")
        self._tags[key] = tag

    def get_tags(self) -> Dict[str, Any]:
        """Retrieve all stored tags."""
        print("Retrieving tags for list")
        return self._tags


# Constructor for !enforce
def enforce_constructor(
    loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.MappingNode
) -> GitLabFormTagOrderedDict:
    print("Processing !enforce tag - Setting enforce=True")
    result = GitLabFormTagOrderedDict()
    result.set_tag("enforce", True)

    for key_node, value_node in node.value:
        key: str = key_node.value
        value: Any = loader.construct_object(value_node)
        print(f"Processing enforce key-value pair: {key}: {value}")
        result[key] = value

    return result


# Constructor for !delete
def delete_constructor(
    loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.ScalarNode
) -> GitLabFormTagScalar:
    value: bool = loader.construct_scalar(node).lower() == "true"
    print(f"Processing !delete tag - Setting delete={value}")
    return GitLabFormTagScalar(value, {"delete": value})


# Constructor for !inherit
def inherit_constructor(
    loader: ruamel.yaml.constructor.Constructor,
    node: Union[
        ruamel.yaml.nodes.MappingNode,
        ruamel.yaml.nodes.SequenceNode,
        ruamel.yaml.nodes.ScalarNode,
    ],
) -> Any:
    print("Processing !inherit tag - Checking structure")

    if isinstance(node, ruamel.yaml.nodes.SequenceNode):
        values: List[Any] = loader.construct_sequence(node, deep=True)
        main_value: Any = values[0] if values else None
        additional_tags: List[Any] = values[1:]

        print(f"Parsed inherit value: {main_value}, additional tags: {additional_tags}")
        if main_value in {e.value for e in InheritEnum}:
            result = GitLabFormTagList(values[1:])
            result.set_tag("inherit", main_value)

            for extra in additional_tags:
                if extra == "keep_existing":
                    print("Applying keep_existing with inherit")
                    result.set_tag("keep_existing", True)
                else:
                    raise ValueError(
                        f"Invalid combination of tags with inherit: {extra}"
                    )

            return result

    value: str = loader.construct_scalar(node)
    print(f"Parsed scalar inherit value: {value}")
    if value in {e.value for e in InheritEnum}:
        result = GitLabFormTagOrderedDict()
        result.set_tag("inherit", value)
        return result

    raise ValueError(f"Invalid inherit value: {value}")


# Constructor for !keep_existing
def keep_existing_constructor(
    loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.ScalarNode
) -> GitLabFormTagScalar:
    value: bool = loader.construct_scalar(node).lower() == "true"
    print(f"Processing !keep_existing tag - Setting keep_existing={value}")
    return GitLabFormTagScalar(value, {"keep_existing": value})


# Constructor for !include
def include_constructor(
    loader: ruamel.yaml.constructor.Constructor, node: ruamel.yaml.nodes.ScalarNode
) -> Any:
    print(f"Processing !include tag for file: {node.value}")
    yaml = ruamel.yaml.YAML()
    file_path = pathlib.Path(node.value)

    if not file_path.exists():
        raise IOError(f"Included external YAML file '{file_path}' does not exist.")

    with file_path.open("r", encoding="utf-8") as f:
        return yaml.load(f)


# Register YAML Processor
yaml: ruamel.yaml.YAML = ruamel.yaml.YAML()
yaml.constructor.add_constructor("!enforce", enforce_constructor)
yaml.constructor.add_constructor("!delete", delete_constructor)
yaml.constructor.add_constructor("!inherit", inherit_constructor)
yaml.constructor.add_constructor("!keep_existing", keep_existing_constructor)
yaml.constructor.add_constructor("!include", include_constructor)


# Test Cases
def test_yaml_parsing():
    data: str = """
    project_settings: 
      !inherit force
      topics:
        - topicA
        - topicB
    """
    parsed_data: Any = yaml.load(data)
    assert isinstance(parsed_data, GitLabFormTagOrderedDict)
    assert parsed_data.get_tags()["inherit"] == "force"


def test_delete_tag():
    data: str = """
    key: !delete false
    """
    parsed_data = yaml.load(data)
    assert isinstance(parsed_data, GitLabFormTagScalar)
    assert parsed_data.tags["delete"] is False


def test_keep_existing_tag():
    data: str = """
    key: !keep_existing true
    """
    parsed_data = yaml.load(data)
    assert isinstance(parsed_data, GitLabFormTagScalar)
    assert parsed_data.tags["keep_existing"] is True


def test_inherit_with_keep_existing():
    data: str = """
    settings: !inherit always [!keep_existing true]
    """
    parsed_data = yaml.load(data)
    assert isinstance(parsed_data, GitLabFormTagList)
    assert parsed_data.get_tags()["inherit"] == "always"
    assert parsed_data.get_tags()["keep_existing"] is True


def test_invalid_inherit_combination():
    data: str = """
    settings: !inherit true [!invalid_tag]
    """
    with pytest.raises(ValueError):
        yaml.load(data)


# Run Tests if Main
if __name__ == "__main__":
    print("Running tests...")
    test_yaml_parsing()
    # test_delete_tag()
    # test_keep_existing_tag()
    # test_inherit_with_keep_existing()
    # test_invalid_inherit_combination()
    # print("All tests passed!")
1
  • My gut says you cannot do what you want by changing the constructor. The tag in someKey: !do_not_inherit someValue is available only while parsing the scalar someValue and I don't think it is possible to construct a mapping from that. Have you looked at loading (and possible round-tripping) what you want (including tags) and then walk the tree afterwards? ruamel.yaml will safely load tags without the need to have registered constructor. You have far better context access when handling tags post loading. Commented Apr 22 at 19:50

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.