Source code for configupdater.parser

"""Parser for configuration files (normally ``*.cfg/*.ini``)

A configuration file consists of sections, lead by a "[section]" header,
and followed by "name: value" entries, with continuations and such in
the style of RFC 822.

The basic idea of **ConfigUpdater** is that a configuration file consists of
three kinds of building blocks: sections, comments and spaces for separation.
A section itself consists of three kinds of blocks: options, comments and
spaces. This gives us the corresponding data structures to describe a
configuration file.

A general block object contains the lines which were parsed and make up
the block. If a block object was not changed then during writing the same
lines that were parsed will be used to express the block. In case a block,
e.g. an option, was changed, it is marked as `updated` and its values will
be transformed into a corresponding string during an update of a
configuration file.


.. note::

   ConfigUpdater is based on Python's ConfigParser source code, specially regarding the
   ``parser`` module.
   The main parsing rules and algorithm are preserved, however ConfigUpdater implements
   its own modified version of the abstract syntax tree to support retaining comments
   and whitespace in an attempt to provide format-preserving document manipulation.
   The copyright and license of the original ConfigParser code is included as an
   attachment to ConfigUpdater's own license, at the root of the source code repository;
   see the file LICENSE for details.
"""

import io
import os
import re
import sys
from configparser import (
    DuplicateOptionError,
    DuplicateSectionError,
    MissingSectionHeaderError,
    NoOptionError,
    NoSectionError,
    ParsingError,
)
from types import MappingProxyType as ReadOnlyMapping
from typing import Callable, Optional, Tuple, Type, TypeVar, Union, cast, overload

if sys.version_info[:2] >= (3, 9):  # pragma: no cover
    from collections.abc import Iterable, Mapping

    List = list
    Dict = dict
else:  # pragma: no cover
    from typing import Iterable, List, Dict, Mapping

from .block import Comment, Space
from .document import Document
from .option import Option
from .section import Section

__all__ = [
    "NoSectionError",
    "DuplicateOptionError",
    "DuplicateSectionError",
    "NoOptionError",
    "ParsingError",
    "MissingSectionHeaderError",
    "InconsistentStateError",
    "Parser",
]

T = TypeVar("T")
E = TypeVar("E", bound=Exception)
D = TypeVar("D", bound=Document)

if sys.version_info[:2] >= (3, 7):  # pragma: no cover
    PathLike = Union[str, bytes, os.PathLike]
else:  # pragma: no cover
    PathLike = Union[str, os.PathLike]

ConfigContent = Union["Section", "Comment", "Space"]



[docs]
class InconsistentStateError(Exception):  # pragma: no cover (not expected to happen)
    """Internal parser error, some of the parsing algorithm assumptions was violated,
    and the internal state machine ended up in an unpredicted state.
    """

    def __init__(self, msg, fpname="<???>", lineno: int = -1, line: str = "???"):
        super().__init__(msg)
        self.args = (msg, fpname, lineno, line)

    def __str__(self):
        (msg, fpname, lineno, line) = self.args
        return f"{msg}\n{fpname}({lineno}): {line!r}"




[docs]
class Parser:
    """Parser for updating configuration files.

    ConfigUpdater's parser follows ConfigParser with some differences:

      * inline comments are treated as part of a key's value,
      * only a single config file can be updated at a time,
      * the original case of sections and keys are kept,
      * control over the position of a new section/key.

    Following features are **deliberately not** implemented:

      * interpolation of values,
      * propagation of parameters from the default section,
      * conversions of values,
      * passing key/value-pairs with ``default`` argument,
      * non-strict mode allowing duplicate sections and keys.
    """

    # Regular expressions for parsing section headers and options
    _SECT_TMPL: str = r"""
        \[                                 # [
        (?P<header>.+)                     # very permissive!
        \]                                 # ]
        (?P<raw_comment>.*)                # match any suffix
        """
    _OPT_TMPL: str = r"""
        (?P<option>.*?)                    # very permissive!
        \s*(?P<vi>{delim})\s*              # any number of space/tab,
                                           # followed by any of the
                                           # allowed delimiters,
                                           # followed by any space/tab
        (?P<value>.*)$                     # everything up to eol
        """
    _OPT_NV_TMPL: str = r"""
        (?P<option>.*?)                    # very permissive!
        \s*(?:                             # any number of space/tab,
        (?P<vi>{delim})\s*                 # optionally followed by
                                           # any of the allowed
                                           # delimiters, followed by any
                                           # space/tab
        (?P<value>.*))?$                   # everything up to eol
        """
    # Compiled regular expression for matching sections
    SECTCRE = re.compile(_SECT_TMPL, re.VERBOSE)
    # Compiled regular expression for matching options with typical separators
    OPTCRE = re.compile(_OPT_TMPL.format(delim="=|:"), re.VERBOSE)
    # Compiled regular expression for matching options with optional values
    # delimited using typical separators
    OPTCRE_NV = re.compile(_OPT_NV_TMPL.format(delim="=|:"), re.VERBOSE)
    # Compiled regular expression for matching leading whitespace in a line
    NONSPACECRE = re.compile(r"\S")

    def __init__(
        self,
        allow_no_value=False,
        *,
        delimiters: Tuple[str, ...] = ("=", ":"),
        comment_prefixes: Tuple[str, ...] = ("#", ";"),
        inline_comment_prefixes: Optional[Tuple[str, ...]] = None,
        strict: bool = True,
        empty_lines_in_values: bool = True,
        space_around_delimiters: bool = True,
        optionxform: Callable[[str], str] = str,
    ):
        """Constructor of the Parser

        Args:
            allow_no_value (bool): allow keys without a value, default False
            delimiters (tuple): delimiters for key/value pairs, default =, :
            comment_prefixes (tuple): prefix of comments, default # and ;
            inline_comment_prefixes (tuple): prefix of inline comment,
                default None
            strict (bool): each section must be unique as well as every key
                within a section, default True
            empty_lines_in_values (bool): each empty line marks the end of an option.
                Otherwise, internal empty lines of a multiline option are kept as part
                of the value, default: True.
            space_around_delimiters (bool): add a space before and after the
                delimiter, default True
        """
        self._document: Document  # bind later
        self._optionxform_fn = optionxform
        self._lineno = -1
        self._fpname = "<???>"

        self._filename: Optional[str] = None
        self._space_around_delimiters: bool = space_around_delimiters

        self._dict = dict  # no reason to let the user change this
        # keeping _sections to keep code aligned with ConfigParser but
        # _document takes the actual role instead. Only use self._document!
        self._sections: Dict[str, Dict[str, List[str]]] = self._dict()
        self._delimiters: Tuple[str, ...] = tuple(delimiters)
        if delimiters == ("=", ":"):
            self._optcre = self.OPTCRE_NV if allow_no_value else self.OPTCRE
        else:
            d = "|".join(re.escape(d) for d in delimiters)
            if allow_no_value:
                self._optcre = re.compile(self._OPT_NV_TMPL.format(delim=d), re.VERBOSE)
            else:
                self._optcre = re.compile(self._OPT_TMPL.format(delim=d), re.VERBOSE)
        self._comment_prefixes: Tuple[str, ...] = tuple(comment_prefixes or ())
        self._inline_comment_prefixes: Tuple[str, ...] = tuple(
            inline_comment_prefixes or ()
        )
        self._strict = strict
        self._allow_no_value = allow_no_value
        self._empty_lines_in_values = empty_lines_in_values

    def _get_args(self) -> dict:
        args = (
            "allow_no_value",
            "delimiters",
            "comment_prefixes",
            "inline_comment_prefixes",
            "strict",
            "empty_lines_in_values",
            "space_around_delimiters",
        )
        return {attr: getattr(self, f"_{attr}") for attr in args}

    def __repr__(self) -> str:
        return f"<{self.__class__.__name__}: {self._get_args()!r}>"

    @property
    def syntax_options(self) -> Mapping:
        return ReadOnlyMapping(self._get_args())

    @overload
    def read(self, filename: PathLike, encoding: Optional[str] = None) -> Document: ...

    @overload
    def read(self, filename: PathLike, encoding: str, into: D) -> D: ...

    @overload
    def read(
        self, filename: PathLike, *, into: D, encoding: Optional[str] = None
    ) -> D: ...


[docs]
    def read(self, filename, encoding=None, into=None):
        """Read and parse a filename.

        Args:
            filename (str): path to file
            encoding (Optional[str]): encoding of file, default None
            into (Optional[Document]): object to be populated with the parsed config
        """
        document = Document() if into is None else into
        with open(filename, encoding=encoding) as fp:
            self._read(fp, str(filename), document)
        self._filename = os.path.abspath(filename)
        return document


    @overload
    def read_file(self, f: Iterable[str], source: Optional[str]) -> Document: ...

    @overload
    def read_file(self, f: Iterable[str], source: Optional[str], into: D) -> D: ...

    @overload
    def read_file(
        self, f: Iterable[str], *, into: D, source: Optional[str] = None
    ) -> D: ...


[docs]
    def read_file(self, f, source=None, into=None):
        """Like read() but the argument must be a file-like object.

        The ``f`` argument must be iterable, returning one line at a time.
        Optional second argument is the ``source`` specifying the name of the
        file being read. If not given, it is taken from f.name. If ``f`` has no
        ``name`` attribute, ``<???>`` is used.

        Args:
            f: file like object
            source (Optional[str]): reference name for file object, default None
            into (Optional[Document]): object to be populated with the parsed config
        """
        if isinstance(f, str):
            raise RuntimeError("f must be a file-like object, not string!")
        document = Document() if into is None else into
        if source is None:
            try:
                source = cast(str, cast(io.FileIO, f).name)
            except AttributeError:
                source = "<???>"
        self._read(f, source, document)
        return document


    @overload
    def read_string(self, string: str, source: str = "<string>") -> Document: ...

    @overload
    def read_string(self, string: str, source: str, into: D) -> D: ...

    @overload
    def read_string(self, string: str, *, into: D, source: str = "<string>") -> D: ...


[docs]
    def read_string(self, string, source="<string>", into=None):
        """Read configuration from a given string.

        Args:
            string (str): string containing a configuration
            source (str): reference name for file object, default '<string>'
            into (Optional[Document]): object to be populated with the parsed config
        """
        sfile = io.StringIO(string)
        return self.read_file(sfile, source, into)


    def optionxform(self, string: str) -> str:
        fn = self._optionxform_fn
        return fn(string)

    @property
    def _last_block(self):
        return self._document.last_block

    def _update_curr_block(
        self, block_type: Type[Union[Comment, Space]]
    ) -> Union[Comment, Space]:
        if isinstance(self._last_block, block_type):
            return self._last_block
        else:
            new_block = block_type(container=self._document)
            self._document.append(new_block)
            return new_block

    def _add_comment(self, line: str):
        if isinstance(self._last_block, Section):
            self._last_block.add_comment(line)
        else:
            self._update_curr_block(Comment).add_line(line)

    def _add_section(self, sectname: str, raw_comment: str, line: str):
        new_section = Section(
            sectname, container=self._document, raw_comment=raw_comment
        )
        new_section.add_line(line)
        self._document.append(new_section)

    def _add_option(self, key: str, vi: str, value: Optional[str], line: str):
        if not isinstance(self._last_block, Section):  # pragma: no cover
            msg = f"{self._last_block!r} should be Section"
            raise InconsistentStateError(msg, self._fpname, self._lineno, line)
        entry = Option(
            key,
            value=None,
            delimiter=vi,
            container=self._last_block,
            space_around_delimiters=self._space_around_delimiters,
            line=line,
        )
        # Initially add the value as further lines might follow
        entry.add_value(value)
        self._last_block.add_option(entry)

    def _add_option_line(self, line: str):
        last_section = self._last_block
        if not isinstance(last_section, Section):  # pragma: no cover
            msg = f"{last_section!r} should be Section"
            raise InconsistentStateError(msg, self._fpname, self._lineno, line)
        # if empty_lines_in_values is true, we later will merge options and whitespace
        # (in the _check_values_with_blank_lines function called at the end).
        # This allows option values to have empty new lines inside them
        # So for now we can add parts of option values to Space nodes, than we check if
        # that is an error or not.
        last_option = last_section.last_block
        # handle special case of unindented comment in multi-line value
        if isinstance(last_option, Comment):
            last_option, comment = (
                cast(Option, last_option.previous_block),
                last_option.detach(),
            )
            # move lines from comment to last option to keep it.
            for comment_line in comment.lines:
                last_option.add_line(comment_line)
        if not isinstance(last_option, (Option, Space)):  # pragma: no cover
            msg = f"{last_option!r} should be Option or Space"
            raise InconsistentStateError(msg, self._fpname, self._lineno, line)
        last_option.add_line(line)

    def _add_space(self, line: str):
        if isinstance(self._last_block, Section):
            self._last_block.add_space(line)
        else:
            self._update_curr_block(Space).add_line(line)

    def _read(self, fp: Iterable[str], fpname: str, into: Document):
        """Parse a sectioned configuration file.

        Each section in a configuration file contains a header, indicated by
        a name in square brackets (`[]`), plus key/value options, indicated by
        `name` and `value` delimited with a specific substring (`=` or `:` by
        default).

        Values can span multiple lines, as long as they are indented deeper
        than the first line of the value. Depending on the parser's mode, blank
        lines may be treated as parts of multiline values or ignored.

        Configuration files may include comments, prefixed by specific
        characters (`#` and `;` by default). Comments may appear on their own
        in an otherwise empty line or may be entered in lines holding values or
        section names.

        Note: This method was borrowed from ConfigParser and we keep this
        mess here as close as possible to the original messod (pardon
        this german pun) for consistency reasons and later upgrades.
        """
        self._document = into
        elements_added: set = set()
        cursect: Optional[Dict[str, List[str]]] = None  # None or dict
        sectname: Optional[str] = None
        optname: Optional[str] = None
        lineno = 0
        indent_level = 0
        e: Optional[Exception] = None  # None, or an exception
        self._fpname = fpname
        for lineno, line in enumerate(fp, start=1):
            self._lineno = lineno
            comment_start: Optional[int] = sys.maxsize
            # strip inline comments
            inline_prefixes = {p: -1 for p in self._inline_comment_prefixes}
            while comment_start == sys.maxsize and inline_prefixes:
                next_prefixes = {}
                for prefix, index in inline_prefixes.items():
                    index = line.find(prefix, index + 1)
                    if index == -1:
                        continue
                    next_prefixes[prefix] = index
                    if index == 0 or (index > 0 and line[index - 1].isspace()):
                        comment_start = min(comment_start, index)
                inline_prefixes = next_prefixes
            # strip full line comments
            for prefix in self._comment_prefixes:
                # configparser would do line.strip() here,
                # we do rstrip() to allow comments in multi-line options
                if line.rstrip().startswith(prefix):
                    comment_start = 0
                    self._add_comment(line)  # HOOK
                    break
            if comment_start == sys.maxsize:
                comment_start = None
            value = line[:comment_start].strip()
            if not value:
                if self._empty_lines_in_values:
                    # add empty line to the value, but only if there was no
                    # comment on the line
                    if (
                        comment_start is None
                        and cursect is not None
                        and optname
                        and cursect[optname] is not None
                    ):
                        cursect[optname].append("")  # newlines added at join
                        if line.strip():
                            self._add_option_line(line)  # HOOK
                else:
                    # empty line marks end of value
                    indent_level = sys.maxsize
                if comment_start is None:
                    self._add_space(line)
                continue
            # continuation line?
            first_nonspace = self.NONSPACECRE.search(line)
            cur_indent_level = first_nonspace.start() if first_nonspace else 0
            if cursect is not None and optname and cur_indent_level > indent_level:
                cursect[optname].append(value)
                self._add_option_line(line)  # HOOK
            # a section header or option header?
            else:
                indent_level = cur_indent_level
                # is it a section header?
                mo = self.SECTCRE.match(value)
                if mo:
                    sectname = mo.group("header")
                    if sectname in self._sections:
                        if self._strict and sectname in elements_added:
                            raise DuplicateSectionError(sectname, fpname, lineno)
                        cursect = self._sections[sectname]
                        elements_added.add(sectname)
                    else:
                        cursect = self._dict()
                        self._sections[sectname] = cursect
                        elements_added.add(sectname)
                    # So sections can't start with a continuation line
                    optname = None
                    self._add_section(sectname, mo.group("raw_comment"), line)  # HOOK
                # no section header in the file?
                elif cursect is None:
                    raise MissingSectionHeaderError(fpname, lineno, line)
                # an option line?
                else:
                    mo = self._optcre.match(value)
                    if mo:
                        optname, vi, optval = mo.group("option", "vi", "value")
                        if not optname:
                            e = self._handle_error(e, fpname, lineno, line)
                        # optname = self.optionxform(optname.rstrip())
                        # keep original case of key
                        optname = optname.rstrip()
                        if sectname is None:  # pragma: no cover
                            msg = f"Could not find the section name for {optname}"
                            raise InconsistentStateError(msg, fpname, lineno, line)
                        if self._strict and (sectname, optname) in elements_added:
                            args = (sectname, optname, fpname, lineno)
                            raise DuplicateOptionError(*args)
                        elements_added.add((sectname, optname))
                        # This check is fine because the OPTCRE cannot
                        # match if it would set optval to None
                        if optval is not None:
                            optval = optval.strip()
                            cursect[optname] = [optval]
                        else:
                            # valueless option handling
                            cursect[optname] = []  # None in Configparser
                        self._add_option(optname, vi, optval, line)  # HOOK
                    # handle indented comment
                    elif (
                        first_nonspace is not None
                        and first_nonspace.group(0) in self._comment_prefixes
                    ):
                        self._add_comment(line)  # HOOK
                    else:
                        # a non-fatal parsing error occurred. set up the
                        # exception but keep going. the exception will be
                        # raised at the end of the file and will contain a
                        # list of all bogus lines
                        e = self._handle_error(e, fpname, lineno, line)
        # if any parsing errors occurred, raise an exception
        if e:
            raise e
        # if empty_lines_in_values is true, we have to eliminate spurious newlines
        if self._empty_lines_in_values:
            self._check_values_with_blank_lines()

    def _handle_error(
        self, exc: Optional[E], fpname: str, lineno: int, line: str
    ) -> Union[ParsingError, E]:
        e = exc or ParsingError(fpname)
        if hasattr(e, "append"):
            e.append(lineno, repr(line))
            # ^  the typechecker cannot handle hasattr
        return e

    def _check_values_with_blank_lines(self):
        for section in self._document.section_blocks():
            for option in section.option_blocks():
                next_block = option.next_block
                if isinstance(next_block, Space):
                    # check if space is part of a multi-line value with blank lines
                    if "".join(next_block.lines).strip():
                        self._merge_option_with_space(option, next_block)

    def _merge_option_with_space(self, option: Option, space: Space):
        last_val_idx = max(i for i, line in enumerate(space.lines) if line.strip())
        value_lines = space.lines[: last_val_idx + 1]
        merge_vals = "".join(line.lstrip(" ") for line in value_lines)
        option._values.append(merge_vals)
        option._multiline_value_joined = False
        option.lines.extend(space.lines[: last_val_idx + 1])
        del space.lines[: last_val_idx + 1]
Source code for configupdater.parser

ConfigUpdater

Navigation

Related Topics