# SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2019 Stefan Bakker <s.bakker777@gmail.com>
# SPDX-FileCopyrightText: 2019 Kirill Elagin <kirelagin@gmail.com>
# SPDX-FileCopyrightText: 2020 Dmitry Bogatov
# SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
# SPDX-FileCopyrightText: 2021 Alvar Penning
# SPDX-FileCopyrightText: 2021 Alliander N.V. <https://alliander.com>
# SPDX-FileCopyrightText: 2021 Robin Vobruba <hoijui.quaero@gmail.com>
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2022 Yaman Qalieh
# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <carmenbianca@fsfe.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Functions for manipulating the comment headers of files."""
import logging
import re
from gettext import gettext as _
from typing import NamedTuple, Optional, Sequence, Tuple, Type, cast
from boolean.boolean import ParseError
from jinja2 import Environment, PackageLoader, Template
from license_expression import ExpressionError
from . import ReuseInfo
from ._util import (
contains_reuse_info,
extract_reuse_info,
merge_copyright_lines,
)
from .comment import (
CommentCreateError,
CommentParseError,
CommentStyle,
EmptyCommentStyle,
PythonCommentStyle,
)
_LOGGER = logging.getLogger(__name__)
_ENV = Environment(loader=PackageLoader("reuse", "templates"), trim_blocks=True)
DEFAULT_TEMPLATE = _ENV.get_template("default_template.jinja2")
_NEWLINE_PATTERN = re.compile(r"\n", re.MULTILINE)
class _TextSections(NamedTuple):
"""Used to split up text in three parts."""
before: str
middle: str
after: str
[docs]class MissingReuseInfo(Exception):
"""Some REUSE information is missing from the result."""
def _create_new_header(
reuse_info: ReuseInfo,
template: Optional[Template] = None,
template_is_commented: bool = False,
style: Optional[Type[CommentStyle]] = None,
force_multi: bool = False,
) -> str:
"""Format a new header from scratch.
Raises:
CommentCreateError: if a comment could not be created.
MissingReuseInfo: if the generated comment is missing SPDX information.
"""
if template is None:
template = DEFAULT_TEMPLATE
if style is None:
style = cast(Type[CommentStyle], PythonCommentStyle)
rendered = template.render(
copyright_lines=sorted(reuse_info.copyright_lines),
contributor_lines=sorted(reuse_info.contributor_lines),
spdx_expressions=sorted(map(str, reuse_info.spdx_expressions)),
).strip("\n")
if template_is_commented:
result = rendered
else:
result = style.create_comment(rendered, force_multi=force_multi).strip(
"\n"
)
# Verify that the result contains all ReuseInfo.
new_reuse_info = extract_reuse_info(result)
if (
reuse_info.copyright_lines != new_reuse_info.copyright_lines
and reuse_info.spdx_expressions != new_reuse_info.spdx_expressions
):
_LOGGER.debug(
_(
"generated comment is missing copyright lines or license"
" expressions"
)
)
_LOGGER.debug(result)
raise MissingReuseInfo()
return result
# pylint: disable=too-many-arguments
def _indices_of_newlines(text: str) -> Sequence[int]:
indices = [0]
start = 0
while True:
match = _NEWLINE_PATTERN.search(text, start)
if match:
start = match.span()[1]
indices.append(start)
else:
break
return indices
def _find_first_spdx_comment(
text: str, style: Optional[Type[CommentStyle]] = None
) -> _TextSections:
"""Find the first SPDX comment in the file. Return a tuple with everything
preceding the comment, the comment itself, and everything following it.
Raises:
MissingReuseInfo: if no REUSE info can be found in any comment
"""
if style is None:
style = PythonCommentStyle
indices = _indices_of_newlines(text)
for index in indices:
try:
comment = style.comment_at_first_character(text[index:])
except CommentParseError:
continue
if contains_reuse_info(comment):
return _TextSections(
text[:index], comment + "\n", text[index + len(comment) + 1 :]
)
raise MissingReuseInfo()
def _extract_shebang(prefix: str, text: str) -> Tuple[str, str]:
"""Remove all lines that start with the shebang prefix from *text*. Return a
tuple of (shebang, reduced_text).
"""
shebang_lines = []
for line in text.splitlines():
if line.startswith(prefix):
shebang_lines.append(line)
text = text.replace(line, "", 1)
else:
break
shebang = "\n".join(shebang_lines)
return (shebang, text)
# pylint: disable=too-many-arguments
[docs]def find_and_replace_header(
text: str,
reuse_info: ReuseInfo,
template: Optional[Template] = None,
template_is_commented: bool = False,
style: Optional[Type[CommentStyle]] = None,
force_multi: bool = False,
merge_copyrights: bool = False,
) -> str:
"""Find the first SPDX comment block in *text*. That comment block is
replaced by a new comment block containing *reuse_info*. It is formatted as
according to *template*. The template is normally uncommented, but if it is
already commented, *template_is_commented* should be :const:`True`.
If both *style* and *template_is_commented* are provided, *style* is only
used to find the header comment.
If the comment block already contained some REUSE information, that
information is merged into *reuse_info*.
If no header exists, one is simply created.
*text* is returned with a new header.
Raises:
CommentCreateError: if a comment could not be created.
MissingReuseInfo: if the generated comment is missing SPDX information.
"""
if style is None:
style = PythonCommentStyle
try:
before, header, after = _find_first_spdx_comment(text, style=style)
except MissingReuseInfo:
before, header, after = "", "", text
# Workaround. EmptyCommentStyle should always be completely replaced.
if style is EmptyCommentStyle:
after = ""
_LOGGER.debug(f"before = {repr(before)}")
_LOGGER.debug(f"header = {repr(header)}")
_LOGGER.debug(f"after = {repr(after)}")
# Keep special first-line-of-file lines as the first line in the file,
# or say, move our comments after it.
if style.SHEBANGS:
for shebang in style.SHEBANGS:
# Extract shebang from header and put it in before. It's a bit
# messy, but it ends up working.
if header.startswith(shebang) and not before.strip():
before, header = _extract_shebang(shebang, header)
elif after.startswith(shebang) and not any((before, header)):
before, after = _extract_shebang(shebang, after)
else:
continue
break
header = create_header(
reuse_info,
header,
template=template,
template_is_commented=template_is_commented,
style=style,
force_multi=force_multi,
merge_copyrights=merge_copyrights,
)
new_text = f"{header}\n"
if before.strip():
new_text = f"{before.rstrip()}\n\n{new_text}"
if after.strip():
new_text = f"{new_text}\n{after.lstrip()}"
return new_text
# pylint: disable=too-many-arguments