Source code for spack.util.spack_yaml

# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

"""Enhanced YAML parsing for Spack.

- ``load()`` preserves YAML Marks on returned objects -- this allows
  us to access file and line information later.

- ``Our load methods use ``OrderedDict`` class instead of YAML's
  default unorderd dict.

"""
import collections
import collections.abc
import ctypes
import enum
import functools
import io
import re
from typing import IO, Any, Callable, Dict, List, Optional, Union

import ruamel.yaml
from ruamel.yaml import comments, constructor, emitter, error, representer

from llnl.util.tty.color import cextra, clen, colorize

import spack.error

# Only export load and dump
__all__ = ["load", "dump", "SpackYAMLError"]


# Make new classes so we can add custom attributes.
# Also, use OrderedDict instead of just dict.
class syaml_dict(collections.OrderedDict):
    def __repr__(self):
        mappings = (f"{k!r}: {v!r}" for k, v in self.items())
        return "{%s}" % ", ".join(mappings)


class syaml_list(list):
    __repr__ = list.__repr__


class syaml_str(str):
    __repr__ = str.__repr__


class syaml_int(int):
    __repr__ = int.__repr__


#: mapping from syaml type -> primitive type
syaml_types = {syaml_str: str, syaml_int: int, syaml_dict: dict, syaml_list: list}


markable_types = set(syaml_types) | {comments.CommentedSeq, comments.CommentedMap}


def syaml_type(obj):
    """Get the corresponding syaml wrapper type for a primitive type.

    Return:
        (object): syaml-typed copy of object, or the obj if no wrapper
    """
    for syaml_t, t in syaml_types.items():
        if type(obj) is not bool and isinstance(obj, t):
            return syaml_t(obj) if type(obj) is not syaml_t else obj
    return obj


def markable(obj):
    """Whether an object can be marked."""
    return type(obj) in markable_types


def mark(obj, node):
    """Add start and end markers to an object."""
    if hasattr(node, "start_mark"):
        obj._start_mark = node.start_mark
    elif hasattr(node, "_start_mark"):
        obj._start_mark = node._start_mark
    if hasattr(node, "end_mark"):
        obj._end_mark = node.end_mark
    elif hasattr(node, "_end_mark"):
        obj._end_mark = node._end_mark


def marked(obj):
    """Whether an object has been marked by spack_yaml."""
    return (
        hasattr(obj, "_start_mark")
        and obj._start_mark
        or hasattr(obj, "_end_mark")
        and obj._end_mark
    )


class OrderedLineConstructor(constructor.RoundTripConstructor):
    """YAML loader specifically intended for reading Spack configuration
    files. It preserves order and line numbers. It also has special-purpose
    logic for handling dictionary keys that indicate a Spack config
    override: namely any key that contains an "extra" ':' character.

    Mappings read in by this loader behave like an ordered dict.
    Sequences, mappings, and strings also have new attributes,
    ``_start_mark`` and ``_end_mark``, that preserve YAML line
    information in the output data.

    """

    #
    # Override construct_yaml_* so that we can apply _start_mark/_end_mark to
    # them. The superclass returns CommentedMap/CommentedSeq objects that we
    # can add attributes to (and we depend on their behavior to preserve
    # comments).
    #
    # The inherited sequence/dictionary constructors return empty instances
    # and fill in with mappings later.  We preserve this behavior.
    #

    def construct_yaml_str(self, node):
        value = super().construct_yaml_str(node)
        # There is no specific marker to indicate that we are parsing a key,
        # so this assumes we are talking about a Spack config override key if
        # it ends with a ':' and does not contain a '@' (which can appear
        # in config values that refer to Spack specs)
        if value and value.endswith(":") and "@" not in value:
            value = syaml_str(value[:-1])
            value.override = True
        else:
            value = syaml_str(value)
        mark(value, node)
        return value

    def construct_yaml_seq(self, node):
        gen = super().construct_yaml_seq(node)
        data = next(gen)
        if markable(data):
            mark(data, node)
        yield data
        for x in gen:
            pass

    def construct_yaml_map(self, node):
        gen = super().construct_yaml_map(node)
        data = next(gen)
        if markable(data):
            mark(data, node)
        yield data
        for x in gen:
            pass


# register above new constructors
OrderedLineConstructor.add_constructor(
    "tag:yaml.org,2002:map", OrderedLineConstructor.construct_yaml_map
)
OrderedLineConstructor.add_constructor(
    "tag:yaml.org,2002:seq", OrderedLineConstructor.construct_yaml_seq
)
OrderedLineConstructor.add_constructor(
    "tag:yaml.org,2002:str", OrderedLineConstructor.construct_yaml_str
)


class OrderedLineRepresenter(representer.RoundTripRepresenter):
    """Representer that preserves ordering and formats ``syaml_*`` objects.

    This representer preserves insertion ordering ``syaml_dict`` objects
    when they're written out.  It also has some custom formatters
    for ``syaml_*`` objects so that they are formatted like their
    regular Python equivalents, instead of ugly YAML pyobjects.
    """

    def ignore_aliases(self, _data):
        """Make the dumper NEVER print YAML aliases."""
        return True

    def represent_data(self, data):
        result = super().represent_data(data)
        if data is None:
            result.value = syaml_str("null")
        return result

    def represent_str(self, data):
        if hasattr(data, "override") and data.override:
            data = data + ":"
        return super().represent_str(data)


class SafeRepresenter(representer.RoundTripRepresenter):
    def ignore_aliases(self, _data):
        """Make the dumper NEVER print YAML aliases."""
        return True


# Make our special objects look like normal YAML ones.
representer.RoundTripRepresenter.add_representer(
    syaml_dict, representer.RoundTripRepresenter.represent_dict
)
representer.RoundTripRepresenter.add_representer(
    syaml_list, representer.RoundTripRepresenter.represent_list
)
representer.RoundTripRepresenter.add_representer(
    syaml_int, representer.RoundTripRepresenter.represent_int
)
representer.RoundTripRepresenter.add_representer(
    syaml_str, representer.RoundTripRepresenter.represent_str
)
OrderedLineRepresenter.add_representer(syaml_str, OrderedLineRepresenter.represent_str)


#: Max integer helps avoid passing too large a value to cyaml.
maxint = 2 ** (ctypes.sizeof(ctypes.c_int) * 8 - 1) - 1


def return_string_when_no_stream(func):
    @functools.wraps(func)
    def wrapper(data, stream=None, **kwargs):
        if stream:
            return func(data, stream=stream, **kwargs)
        stream = io.StringIO()
        func(data, stream=stream, **kwargs)
        return stream.getvalue()

    return wrapper


[docs] @return_string_when_no_stream def dump(data, stream=None, default_flow_style=False): handler = ConfigYAML(yaml_type=YAMLType.GENERIC_YAML) handler.yaml.default_flow_style = default_flow_style handler.yaml.width = maxint return handler.dump(data, stream=stream)
def file_line(mark): """Format a mark as <file>:<line> information.""" result = mark.name if mark.line: result += ":" + str(mark.line) return result #: Global for interactions between LineAnnotationDumper and dump_annotated(). #: This is nasty but YAML doesn't give us many ways to pass arguments -- #: yaml.dump() takes a class (not an instance) and instantiates the dumper #: itself, so we can't just pass an instance _ANNOTATIONS: List[str] = [] class LineAnnotationRepresenter(OrderedLineRepresenter): """Representer that generates per-line annotations. Annotations are stored in the ``_annotations`` global. After one dump pass, the strings in ``_annotations`` will correspond one-to-one with the lines output by the dumper. LineAnnotationDumper records blame information after each line is generated. As each line is parsed, it saves file/line info for each object printed. At the end of each line, it creates an annotation based on the saved mark and stores it in ``_annotations``. For an example of how to use this, see ``dump_annotated()``, which writes to a ``StringIO`` then joins the lines from that with annotations. """ def represent_data(self, data): """Force syaml_str to be passed through with marks.""" result = super().represent_data(data) if data is None: result.value = syaml_str("null") elif isinstance(result.value, str): result.value = syaml_str(data) if markable(result.value): mark(result.value, data) return result class LineAnnotationEmitter(emitter.Emitter): saved = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) del _ANNOTATIONS[:] self.colors = "KgrbmcyGRBMCY" self.filename_colors = {} def process_scalar(self): super().process_scalar() if marked(self.event.value): self.saved = self.event.value def write_line_break(self, data=None): super().write_line_break(data) if self.saved is None: _ANNOTATIONS.append(colorize("@K{---}")) return # append annotations at the end of each line if self.saved: mark = self.saved._start_mark color = self.filename_colors.get(mark.name) if not color: ncolors = len(self.colors) color = self.colors[len(self.filename_colors) % ncolors] self.filename_colors[mark.name] = color fmt = "@%s{%%s}" % color ann = fmt % mark.name if mark.line is not None: ann += ":@c{%s}" % (mark.line + 1) _ANNOTATIONS.append(colorize(ann)) else: _ANNOTATIONS.append("") def write_comment(self, comment, pre=False): pass class YAMLType(enum.Enum): """YAML configurations handled by Spack""" #: Generic YAML configuration GENERIC_YAML = enum.auto() #: A Spack config file with overrides SPACK_CONFIG_FILE = enum.auto() #: A Spack config file with line annotations ANNOTATED_SPACK_CONFIG_FILE = enum.auto() class ConfigYAML: """Handles the loading and dumping of Spack's YAML files.""" def __init__(self, yaml_type: YAMLType) -> None: self.yaml = ruamel.yaml.YAML(typ="rt", pure=True) if yaml_type == YAMLType.GENERIC_YAML: self.yaml.Representer = SafeRepresenter elif yaml_type == YAMLType.ANNOTATED_SPACK_CONFIG_FILE: self.yaml.Representer = LineAnnotationRepresenter self.yaml.Emitter = LineAnnotationEmitter self.yaml.Constructor = OrderedLineConstructor else: self.yaml.Representer = OrderedLineRepresenter self.yaml.Constructor = OrderedLineConstructor def load(self, stream: IO): """Loads the YAML data from a stream and returns it. Args: stream: stream to load from. Raises: SpackYAMLError: if anything goes wrong while loading """ try: return self.yaml.load(stream) except error.MarkedYAMLError as e: msg = "error parsing YAML" error_mark = e.context_mark if e.context_mark else e.problem_mark if error_mark: line, column = error_mark.line, error_mark.column msg += f": near {error_mark.name}, {str(line)}, {str(column)}" else: msg += f": {stream.name}" msg += f": {e.problem}" raise SpackYAMLError(msg, e) from e except Exception as e: msg = "cannot load Spack YAML configuration" raise SpackYAMLError(msg, e) from e def dump(self, data, stream: Optional[IO] = None, *, transform=None) -> None: """Dumps the YAML data to a stream. Args: data: data to be dumped stream: stream to dump the data into. Raises: SpackYAMLError: if anything goes wrong while dumping """ try: return self.yaml.dump(data, stream=stream, transform=transform) except Exception as e: msg = "cannot dump Spack YAML configuration" raise SpackYAMLError(msg, str(e)) from e def as_string(self, data) -> str: """Returns a string representing the YAML data passed as input.""" result = io.StringIO() self.dump(data, stream=result) return result.getvalue() def load_config(str_or_file): """Load but modify the loader instance so that it will add __line__ attributes to the returned object.""" handler = ConfigYAML(yaml_type=YAMLType.SPACK_CONFIG_FILE) return handler.load(str_or_file)
[docs] def load(*args, **kwargs): handler = ConfigYAML(yaml_type=YAMLType.GENERIC_YAML) return handler.load(*args, **kwargs)
@return_string_when_no_stream def dump_config(data, stream, *, default_flow_style=False, blame=False): if blame: handler = ConfigYAML(yaml_type=YAMLType.ANNOTATED_SPACK_CONFIG_FILE) handler.yaml.default_flow_style = default_flow_style handler.yaml.width = maxint return _dump_annotated(handler, data, stream) handler = ConfigYAML(yaml_type=YAMLType.SPACK_CONFIG_FILE) handler.yaml.default_flow_style = default_flow_style handler.yaml.width = maxint return handler.dump(data, stream) def _dump_annotated(handler, data, stream=None): sio = io.StringIO() handler.dump(data, sio) # write_line_break() is not called by YAML for empty lines, so we # skip empty lines here with \n+. lines = re.split(r"\n+", sio.getvalue().rstrip()) getvalue = None if stream is None: stream = io.StringIO() getvalue = stream.getvalue # write out annotations and lines, accounting for color width = max(clen(a) for a in _ANNOTATIONS) formats = ["%%-%ds %%s\n" % (width + cextra(a)) for a in _ANNOTATIONS] for f, a, l in zip(formats, _ANNOTATIONS, lines): stream.write(f % (a, l)) if getvalue: return getvalue() def sorted_dict(dict_like): """Return an ordered dict with all the fields sorted recursively. Args: dict_like (dict): dictionary to be sorted Returns: dictionary sorted recursively """ result = syaml_dict(sorted(dict_like.items())) for key, value in result.items(): if isinstance(value, collections.abc.Mapping): result[key] = sorted_dict(value) return result def extract_comments(data): """Extract and returns comments from some YAML data""" return getattr(data, comments.Comment.attrib, None) def set_comments(data, *, data_comments): """Set comments on some YAML data""" return setattr(data, comments.Comment.attrib, data_comments) def name_mark(name): """Returns a mark with just a name""" return error.StringMark(name, None, None, None, None, None) def anchorify(data: Union[dict, list], identifier: Callable[[Any], str] = repr) -> None: """Replace identical dict/list branches in tree with references to earlier instances. The YAML serializer generate anchors for them, resulting in small yaml files.""" anchors: Dict[str, Union[dict, list]] = {} stack: List[Union[dict, list]] = [data] while stack: item = stack.pop() for key, value in item.items() if isinstance(item, dict) else enumerate(item): if not isinstance(value, (dict, list)): continue id = identifier(value) anchor = anchors.get(id) if anchor is None: anchors[id] = value stack.append(value) else: item[key] = anchor # replace with reference
[docs] class SpackYAMLError(spack.error.SpackError): """Raised when there are issues with YAML parsing.""" def __init__(self, msg, yaml_error): super().__init__(msg, str(yaml_error))