"""Parser for configuration files (normally ``*.cfg/*.ini``)
A configuration file consists of sections, lead by a "[section]" header,
and followed by "name: value" entries, with continuations and such in
the style of RFC 822.
The basic idea of **ConfigUpdater** is that a configuration file consists of
three kinds of building blocks: sections, comments and spaces for separation.
A section itself consists of three kinds of blocks: options, comments and
spaces. This gives us the corresponding data structures to describe a
configuration file.
A general block object contains the lines which were parsed and make up
the block. If a block object was not changed then during writing the same
lines that were parsed will be used to express the block. In case a block,
e.g. an option, was changed, it is marked as `updated` and its values will
be transformed into a corresponding string during an update of a
configuration file.
.. note::
ConfigUpdater is based on Python's ConfigParser source code, specially regarding the
``parser`` module.
The main parsing rules and algorithm are preserved, however ConfigUpdater implements
its own modified version of the abstract syntax tree to support retaining comments
and whitespace in an attempt to provide format-preserving document manipulation.
The copyright and license of the original ConfigParser code is included as an
attachment to ConfigUpdater's own license, at the root of the source code repository;
see the file LICENSE for details.
"""
import io
import os
import re
import sys
from configparser import (
DuplicateOptionError,
DuplicateSectionError,
MissingSectionHeaderError,
NoOptionError,
NoSectionError,
ParsingError,
)
from types import MappingProxyType as ReadOnlyMapping
from typing import Callable, Optional, Tuple, Type, TypeVar, Union, cast, overload
if sys.version_info[:2] >= (3, 9): # pragma: no cover
from collections.abc import Iterable, Mapping
List = list
Dict = dict
else: # pragma: no cover
from typing import Iterable, List, Dict, Mapping
from .block import Comment, Space
from .document import Document
from .option import Option
from .section import Section
__all__ = [
"NoSectionError",
"DuplicateOptionError",
"DuplicateSectionError",
"NoOptionError",
"ParsingError",
"MissingSectionHeaderError",
"InconsistentStateError",
"Parser",
]
T = TypeVar("T")
E = TypeVar("E", bound=Exception)
D = TypeVar("D", bound=Document)
if sys.version_info[:2] >= (3, 7): # pragma: no cover
PathLike = Union[str, bytes, os.PathLike]
else: # pragma: no cover
PathLike = Union[str, os.PathLike]
ConfigContent = Union["Section", "Comment", "Space"]
[docs]
class InconsistentStateError(Exception): # pragma: no cover (not expected to happen)
"""Internal parser error, some of the parsing algorithm assumptions was violated,
and the internal state machine ended up in an unpredicted state.
"""
def __init__(self, msg, fpname="<???>", lineno: int = -1, line: str = "???"):
super().__init__(msg)
self.args = (msg, fpname, lineno, line)
def __str__(self):
(msg, fpname, lineno, line) = self.args
return f"{msg}\n{fpname}({lineno}): {line!r}"
[docs]
class Parser:
"""Parser for updating configuration files.
ConfigUpdater's parser follows ConfigParser with some differences:
* inline comments are treated as part of a key's value,
* only a single config file can be updated at a time,
* the original case of sections and keys are kept,
* control over the position of a new section/key.
Following features are **deliberately not** implemented:
* interpolation of values,
* propagation of parameters from the default section,
* conversions of values,
* passing key/value-pairs with ``default`` argument,
* non-strict mode allowing duplicate sections and keys.
"""
# Regular expressions for parsing section headers and options
_SECT_TMPL: str = r"""
\[ # [
(?P<header>.+) # very permissive!
\] # ]
(?P<raw_comment>.*) # match any suffix
"""
_OPT_TMPL: str = r"""
(?P<option>.*?) # very permissive!
\s*(?P<vi>{delim})\s* # any number of space/tab,
# followed by any of the
# allowed delimiters,
# followed by any space/tab
(?P<value>.*)$ # everything up to eol
"""
_OPT_NV_TMPL: str = r"""
(?P<option>.*?) # very permissive!
\s*(?: # any number of space/tab,
(?P<vi>{delim})\s* # optionally followed by
# any of the allowed
# delimiters, followed by any
# space/tab
(?P<value>.*))?$ # everything up to eol
"""
# Compiled regular expression for matching sections
SECTCRE = re.compile(_SECT_TMPL, re.VERBOSE)
# Compiled regular expression for matching options with typical separators
OPTCRE = re.compile(_OPT_TMPL.format(delim="=|:"), re.VERBOSE)
# Compiled regular expression for matching options with optional values
# delimited using typical separators
OPTCRE_NV = re.compile(_OPT_NV_TMPL.format(delim="=|:"), re.VERBOSE)
# Compiled regular expression for matching leading whitespace in a line
NONSPACECRE = re.compile(r"\S")
def __init__(
self,
allow_no_value=False,
*,
delimiters: Tuple[str, ...] = ("=", ":"),
comment_prefixes: Tuple[str, ...] = ("#", ";"),
inline_comment_prefixes: Optional[Tuple[str, ...]] = None,
strict: bool = True,
empty_lines_in_values: bool = True,
space_around_delimiters: bool = True,
optionxform: Callable[[str], str] = str,
):
"""Constructor of the Parser
Args:
allow_no_value (bool): allow keys without a value, default False
delimiters (tuple): delimiters for key/value pairs, default =, :
comment_prefixes (tuple): prefix of comments, default # and ;
inline_comment_prefixes (tuple): prefix of inline comment,
default None
strict (bool): each section must be unique as well as every key
within a section, default True
empty_lines_in_values (bool): each empty line marks the end of an option.
Otherwise, internal empty lines of a multiline option are kept as part
of the value, default: True.
space_around_delimiters (bool): add a space before and after the
delimiter, default True
"""
self._document: Document # bind later
self._optionxform_fn = optionxform
self._lineno = -1
self._fpname = "<???>"
self._filename: Optional[str] = None
self._space_around_delimiters: bool = space_around_delimiters
self._dict = dict # no reason to let the user change this
# keeping _sections to keep code aligned with ConfigParser but
# _document takes the actual role instead. Only use self._document!
self._sections: Dict[str, Dict[str, List[str]]] = self._dict()
self._delimiters: Tuple[str, ...] = tuple(delimiters)
if delimiters == ("=", ":"):
self._optcre = self.OPTCRE_NV if allow_no_value else self.OPTCRE
else:
d = "|".join(re.escape(d) for d in delimiters)
if allow_no_value:
self._optcre = re.compile(self._OPT_NV_TMPL.format(delim=d), re.VERBOSE)
else:
self._optcre = re.compile(self._OPT_TMPL.format(delim=d), re.VERBOSE)
self._comment_prefixes: Tuple[str, ...] = tuple(comment_prefixes or ())
self._inline_comment_prefixes: Tuple[str, ...] = tuple(
inline_comment_prefixes or ()
)
self._strict = strict
self._allow_no_value = allow_no_value
self._empty_lines_in_values = empty_lines_in_values
def _get_args(self) -> dict:
args = (
"allow_no_value",
"delimiters",
"comment_prefixes",
"inline_comment_prefixes",
"strict",
"empty_lines_in_values",
"space_around_delimiters",
)
return {attr: getattr(self, f"_{attr}") for attr in args}
def __repr__(self) -> str:
return f"<{self.__class__.__name__}: {self._get_args()!r}>"
@property
def syntax_options(self) -> Mapping:
return ReadOnlyMapping(self._get_args())
@overload
def read(self, filename: PathLike, encoding: Optional[str] = None) -> Document:
...
@overload
def read(self, filename: PathLike, encoding: str, into: D) -> D:
...
@overload
def read(self, filename: PathLike, *, into: D, encoding: Optional[str] = None) -> D:
...
[docs]
def read(self, filename, encoding=None, into=None):
"""Read and parse a filename.
Args:
filename (str): path to file
encoding (Optional[str]): encoding of file, default None
into (Optional[Document]): object to be populated with the parsed config
"""
document = Document() if into is None else into
with open(filename, encoding=encoding) as fp:
self._read(fp, str(filename), document)
self._filename = os.path.abspath(filename)
return document
@overload
def read_file(self, f: Iterable[str], source: Optional[str]) -> Document:
...
@overload
def read_file(self, f: Iterable[str], source: Optional[str], into: D) -> D:
...
@overload
def read_file(
self, f: Iterable[str], *, into: D, source: Optional[str] = None
) -> D:
...
[docs]
def read_file(self, f, source=None, into=None):
"""Like read() but the argument must be a file-like object.
The ``f`` argument must be iterable, returning one line at a time.
Optional second argument is the ``source`` specifying the name of the
file being read. If not given, it is taken from f.name. If ``f`` has no
``name`` attribute, ``<???>`` is used.
Args:
f: file like object
source (Optional[str]): reference name for file object, default None
into (Optional[Document]): object to be populated with the parsed config
"""
if isinstance(f, str):
raise RuntimeError("f must be a file-like object, not string!")
document = Document() if into is None else into
if source is None:
try:
source = cast(str, cast(io.FileIO, f).name)
except AttributeError:
source = "<???>"
self._read(f, source, document)
return document
@overload
def read_string(self, string: str, source: str = "<string>") -> Document:
...
@overload
def read_string(self, string: str, source: str, into: D) -> D:
...
@overload
def read_string(self, string: str, *, into: D, source: str = "<string>") -> D:
...
[docs]
def read_string(self, string, source="<string>", into=None):
"""Read configuration from a given string.
Args:
string (str): string containing a configuration
source (str): reference name for file object, default '<string>'
into (Optional[Document]): object to be populated with the parsed config
"""
sfile = io.StringIO(string)
return self.read_file(sfile, source, into)
def optionxform(self, string: str) -> str:
fn = self._optionxform_fn
return fn(string)
@property
def _last_block(self):
return self._document.last_block
def _update_curr_block(
self, block_type: Type[Union[Comment, Space]]
) -> Union[Comment, Space]:
if isinstance(self._last_block, block_type):
return self._last_block
else:
new_block = block_type(container=self._document)
self._document.append(new_block)
return new_block
def _add_comment(self, line: str):
if isinstance(self._last_block, Section):
self._last_block.add_comment(line)
else:
self._update_curr_block(Comment).add_line(line)
def _add_section(self, sectname: str, raw_comment: str, line: str):
new_section = Section(
sectname, container=self._document, raw_comment=raw_comment
)
new_section.add_line(line)
self._document.append(new_section)
def _add_option(self, key: str, vi: str, value: Optional[str], line: str):
if not isinstance(self._last_block, Section): # pragma: no cover
msg = f"{self._last_block!r} should be Section"
raise InconsistentStateError(msg, self._fpname, self._lineno, line)
entry = Option(
key,
value=None,
delimiter=vi,
container=self._last_block,
space_around_delimiters=self._space_around_delimiters,
line=line,
)
# Initially add the value as further lines might follow
entry.add_value(value)
self._last_block.add_option(entry)
def _add_option_line(self, line: str):
last_section = self._last_block
if not isinstance(last_section, Section): # pragma: no cover
msg = f"{last_section!r} should be Section"
raise InconsistentStateError(msg, self._fpname, self._lineno, line)
# if empty_lines_in_values is true, we later will merge options and whitespace
# (in the _check_values_with_blank_lines function called at the end).
# This allows option values to have empty new lines inside them
# So for now we can add parts of option values to Space nodes, than we check if
# that is an error or not.
last_option = last_section.last_block
# handle special case of unindented comment in multi-line value
if isinstance(last_option, Comment):
last_option, comment = (
cast(Option, last_option.previous_block),
last_option.detach(),
)
# move lines from comment to last option to keep it.
for comment_line in comment.lines:
last_option.add_line(comment_line)
if not isinstance(last_option, (Option, Space)): # pragma: no cover
msg = f"{last_option!r} should be Option or Space"
raise InconsistentStateError(msg, self._fpname, self._lineno, line)
last_option.add_line(line)
def _add_space(self, line: str):
if isinstance(self._last_block, Section):
self._last_block.add_space(line)
else:
self._update_curr_block(Space).add_line(line)
def _read(self, fp: Iterable[str], fpname: str, into: Document):
"""Parse a sectioned configuration file.
Each section in a configuration file contains a header, indicated by
a name in square brackets (`[]`), plus key/value options, indicated by
`name` and `value` delimited with a specific substring (`=` or `:` by
default).
Values can span multiple lines, as long as they are indented deeper
than the first line of the value. Depending on the parser's mode, blank
lines may be treated as parts of multiline values or ignored.
Configuration files may include comments, prefixed by specific
characters (`#` and `;` by default). Comments may appear on their own
in an otherwise empty line or may be entered in lines holding values or
section names.
Note: This method was borrowed from ConfigParser and we keep this
mess here as close as possible to the original messod (pardon
this german pun) for consistency reasons and later upgrades.
"""
self._document = into
elements_added: set = set()
cursect: Optional[Dict[str, List[str]]] = None # None or dict
sectname: Optional[str] = None
optname: Optional[str] = None
lineno = 0
indent_level = 0
e: Optional[Exception] = None # None, or an exception
self._fpname = fpname
for lineno, line in enumerate(fp, start=1):
self._lineno = lineno
comment_start: Optional[int] = sys.maxsize
# strip inline comments
inline_prefixes = {p: -1 for p in self._inline_comment_prefixes}
while comment_start == sys.maxsize and inline_prefixes:
next_prefixes = {}
for prefix, index in inline_prefixes.items():
index = line.find(prefix, index + 1)
if index == -1:
continue
next_prefixes[prefix] = index
if index == 0 or (index > 0 and line[index - 1].isspace()):
comment_start = min(comment_start, index)
inline_prefixes = next_prefixes
# strip full line comments
for prefix in self._comment_prefixes:
# configparser would do line.strip() here,
# we do rstrip() to allow comments in multi-line options
if line.rstrip().startswith(prefix):
comment_start = 0
self._add_comment(line) # HOOK
break
if comment_start == sys.maxsize:
comment_start = None
value = line[:comment_start].strip()
if not value:
if self._empty_lines_in_values:
# add empty line to the value, but only if there was no
# comment on the line
if (
comment_start is None
and cursect is not None
and optname
and cursect[optname] is not None
):
cursect[optname].append("") # newlines added at join
if line.strip():
self._add_option_line(line) # HOOK
else:
# empty line marks end of value
indent_level = sys.maxsize
if comment_start is None:
self._add_space(line)
continue
# continuation line?
first_nonspace = self.NONSPACECRE.search(line)
cur_indent_level = first_nonspace.start() if first_nonspace else 0
if cursect is not None and optname and cur_indent_level > indent_level:
cursect[optname].append(value)
self._add_option_line(line) # HOOK
# a section header or option header?
else:
indent_level = cur_indent_level
# is it a section header?
mo = self.SECTCRE.match(value)
if mo:
sectname = mo.group("header")
if sectname in self._sections:
if self._strict and sectname in elements_added:
raise DuplicateSectionError(sectname, fpname, lineno)
cursect = self._sections[sectname]
elements_added.add(sectname)
else:
cursect = self._dict()
self._sections[sectname] = cursect
elements_added.add(sectname)
# So sections can't start with a continuation line
optname = None
self._add_section(sectname, mo.group("raw_comment"), line) # HOOK
# no section header in the file?
elif cursect is None:
raise MissingSectionHeaderError(fpname, lineno, line)
# an option line?
else:
mo = self._optcre.match(value)
if mo:
optname, vi, optval = mo.group("option", "vi", "value")
if not optname:
e = self._handle_error(e, fpname, lineno, line)
# optname = self.optionxform(optname.rstrip())
# keep original case of key
optname = optname.rstrip()
if sectname is None: # pragma: no cover
msg = f"Could not find the section name for {optname}"
raise InconsistentStateError(msg, fpname, lineno, line)
if self._strict and (sectname, optname) in elements_added:
args = (sectname, optname, fpname, lineno)
raise DuplicateOptionError(*args)
elements_added.add((sectname, optname))
# This check is fine because the OPTCRE cannot
# match if it would set optval to None
if optval is not None:
optval = optval.strip()
cursect[optname] = [optval]
else:
# valueless option handling
cursect[optname] = [] # None in Configparser
self._add_option(optname, vi, optval, line) # HOOK
# handle indented comment
elif (
first_nonspace is not None
and first_nonspace.group(0) in self._comment_prefixes
):
self._add_comment(line) # HOOK
else:
# a non-fatal parsing error occurred. set up the
# exception but keep going. the exception will be
# raised at the end of the file and will contain a
# list of all bogus lines
e = self._handle_error(e, fpname, lineno, line)
# if any parsing errors occurred, raise an exception
if e:
raise e
# if empty_lines_in_values is true, we have to eliminate spurious newlines
if self._empty_lines_in_values:
self._check_values_with_blank_lines()
def _handle_error(
self, exc: Optional[E], fpname: str, lineno: int, line: str
) -> Union[ParsingError, E]:
e = exc or ParsingError(fpname)
if hasattr(e, "append"):
e.append(lineno, repr(line))
# ^ the typechecker cannot handle hasattr
return e
def _check_values_with_blank_lines(self):
for section in self._document.section_blocks():
for option in section.option_blocks():
next_block = option.next_block
if isinstance(next_block, Space):
# check if space is part of a multi-line value with blank lines
if "".join(next_block.lines).strip():
self._merge_option_with_space(option, next_block)
def _merge_option_with_space(self, option: Option, space: Space):
last_val_idx = max(i for i, line in enumerate(space.lines) if line.strip())
value_lines = space.lines[: last_val_idx + 1]
merge_vals = "".join(line.lstrip(" ") for line in value_lines)
option._values.append(merge_vals)
option._multiline_value_joined = False
option.lines.extend(space.lines[: last_val_idx + 1])
del space.lines[: last_val_idx + 1]