Source code for reuse.header

# SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2019 Stefan Bakker <s.bakker777@gmail.com>
# SPDX-FileCopyrightText: 2019 Kirill Elagin <kirelagin@gmail.com>
# SPDX-FileCopyrightText: 2020 Dmitry Bogatov
# SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
# SPDX-FileCopyrightText: 2021 Alvar Penning
# SPDX-FileCopyrightText: 2021 Alliander N.V. <https://alliander.com>
# SPDX-FileCopyrightText: 2021 Robin Vobruba <hoijui.quaero@gmail.com>
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2022 Yaman Qalieh
# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <carmenbianca@fsfe.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""Functions for manipulating the comment headers of files."""

import logging
import re
from gettext import gettext as _
from typing import NamedTuple, Optional, Sequence, Tuple, Type, cast

from boolean.boolean import ParseError
from jinja2 import Environment, PackageLoader, Template
from license_expression import ExpressionError

from . import ReuseInfo
from ._util import (
    contains_reuse_info,
    extract_reuse_info,
    merge_copyright_lines,
)
from .comment import (
    CommentCreateError,
    CommentParseError,
    CommentStyle,
    EmptyCommentStyle,
    PythonCommentStyle,
)

_LOGGER = logging.getLogger(__name__)

_ENV = Environment(loader=PackageLoader("reuse", "templates"), trim_blocks=True)
DEFAULT_TEMPLATE = _ENV.get_template("default_template.jinja2")

_NEWLINE_PATTERN = re.compile(r"\n", re.MULTILINE)


class _TextSections(NamedTuple):
    """Used to split up text in three parts."""

    before: str
    middle: str
    after: str


[docs]class MissingReuseInfo(Exception): """Some REUSE information is missing from the result."""
def _create_new_header( reuse_info: ReuseInfo, template: Optional[Template] = None, template_is_commented: bool = False, style: Optional[Type[CommentStyle]] = None, force_multi: bool = False, ) -> str: """Format a new header from scratch. Raises: CommentCreateError: if a comment could not be created. MissingReuseInfo: if the generated comment is missing SPDX information. """ if template is None: template = DEFAULT_TEMPLATE if style is None: style = cast(Type[CommentStyle], PythonCommentStyle) rendered = template.render( copyright_lines=sorted(reuse_info.copyright_lines), contributor_lines=sorted(reuse_info.contributor_lines), spdx_expressions=sorted(map(str, reuse_info.spdx_expressions)), ).strip("\n") if template_is_commented: result = rendered else: result = style.create_comment(rendered, force_multi=force_multi).strip( "\n" ) # Verify that the result contains all ReuseInfo. new_reuse_info = extract_reuse_info(result) if ( reuse_info.copyright_lines != new_reuse_info.copyright_lines and reuse_info.spdx_expressions != new_reuse_info.spdx_expressions ): _LOGGER.debug( _( "generated comment is missing copyright lines or license" " expressions" ) ) _LOGGER.debug(result) raise MissingReuseInfo() return result # pylint: disable=too-many-arguments
[docs]def create_header( reuse_info: ReuseInfo, header: Optional[str] = None, template: Optional[Template] = None, template_is_commented: bool = False, style: Optional[Type[CommentStyle]] = None, force_multi: bool = False, merge_copyrights: bool = False, ) -> str: """Create a header containing *reuse_info*. *header* is an optional argument containing a header which should be modified to include *reuse_info*. If *header* is not given, a brand new header is created. *template*, *template_is_commented*, and *style* determine what the header will look like, and whether it will be commented or not. Raises: CommentCreateError: if a comment could not be created. MissingReuseInfo: if the generated comment is missing SPDX information. """ if template is None: template = DEFAULT_TEMPLATE if style is None: style = PythonCommentStyle new_header = "" if header: try: existing_spdx = extract_reuse_info(header) except (ExpressionError, ParseError) as err: raise CommentCreateError( "existing header contains an erroneous SPDX expression" ) from err if merge_copyrights: spdx_copyrights = merge_copyright_lines( reuse_info.copyright_lines.union(existing_spdx.copyright_lines), ) else: spdx_copyrights = reuse_info.copyright_lines.union( existing_spdx.copyright_lines ) # TODO: This behaviour does not match the docstring. reuse_info = existing_spdx | reuse_info reuse_info = reuse_info.copy(copyright_lines=spdx_copyrights) new_header += _create_new_header( reuse_info, template=template, template_is_commented=template_is_commented, style=style, force_multi=force_multi, ) return new_header
def _indices_of_newlines(text: str) -> Sequence[int]: indices = [0] start = 0 while True: match = _NEWLINE_PATTERN.search(text, start) if match: start = match.span()[1] indices.append(start) else: break return indices def _find_first_spdx_comment( text: str, style: Optional[Type[CommentStyle]] = None ) -> _TextSections: """Find the first SPDX comment in the file. Return a tuple with everything preceding the comment, the comment itself, and everything following it. Raises: MissingReuseInfo: if no REUSE info can be found in any comment """ if style is None: style = PythonCommentStyle indices = _indices_of_newlines(text) for index in indices: try: comment = style.comment_at_first_character(text[index:]) except CommentParseError: continue if contains_reuse_info(comment): return _TextSections( text[:index], comment + "\n", text[index + len(comment) + 1 :] ) raise MissingReuseInfo() def _extract_shebang(prefix: str, text: str) -> Tuple[str, str]: """Remove all lines that start with the shebang prefix from *text*. Return a tuple of (shebang, reduced_text). """ shebang_lines = [] for line in text.splitlines(): if line.startswith(prefix): shebang_lines.append(line) text = text.replace(line, "", 1) else: break shebang = "\n".join(shebang_lines) return (shebang, text) # pylint: disable=too-many-arguments
[docs]def find_and_replace_header( text: str, reuse_info: ReuseInfo, template: Optional[Template] = None, template_is_commented: bool = False, style: Optional[Type[CommentStyle]] = None, force_multi: bool = False, merge_copyrights: bool = False, ) -> str: """Find the first SPDX comment block in *text*. That comment block is replaced by a new comment block containing *reuse_info*. It is formatted as according to *template*. The template is normally uncommented, but if it is already commented, *template_is_commented* should be :const:`True`. If both *style* and *template_is_commented* are provided, *style* is only used to find the header comment. If the comment block already contained some REUSE information, that information is merged into *reuse_info*. If no header exists, one is simply created. *text* is returned with a new header. Raises: CommentCreateError: if a comment could not be created. MissingReuseInfo: if the generated comment is missing SPDX information. """ if style is None: style = PythonCommentStyle try: before, header, after = _find_first_spdx_comment(text, style=style) except MissingReuseInfo: before, header, after = "", "", text # Workaround. EmptyCommentStyle should always be completely replaced. if style is EmptyCommentStyle: after = "" _LOGGER.debug(f"before = {repr(before)}") _LOGGER.debug(f"header = {repr(header)}") _LOGGER.debug(f"after = {repr(after)}") # Keep special first-line-of-file lines as the first line in the file, # or say, move our comments after it. if style.SHEBANGS: for shebang in style.SHEBANGS: # Extract shebang from header and put it in before. It's a bit # messy, but it ends up working. if header.startswith(shebang) and not before.strip(): before, header = _extract_shebang(shebang, header) elif after.startswith(shebang) and not any((before, header)): before, after = _extract_shebang(shebang, after) else: continue break header = create_header( reuse_info, header, template=template, template_is_commented=template_is_commented, style=style, force_multi=force_multi, merge_copyrights=merge_copyrights, ) new_text = f"{header}\n" if before.strip(): new_text = f"{before.rstrip()}\n\n{new_text}" if after.strip(): new_text = f"{new_text}\n{after.lstrip()}" return new_text
# pylint: disable=too-many-arguments
[docs]def add_new_header( text: str, reuse_info: ReuseInfo, template: Optional[Template] = None, template_is_commented: bool = False, style: Optional[Type[CommentStyle]] = None, force_multi: bool = False, merge_copyrights: bool = False, ) -> str: """Add a new header at the very top of *text*, similar to find_and_replace_header. But in this function, do not replace any headers or search for any existing REUSE information. Raises: CommentCreateError: if a comment could not be created. """ if style is None: style = PythonCommentStyle shebang = "" if style.SHEBANGS: for shebang_prefix in style.SHEBANGS: if text.startswith(shebang_prefix): shebang, text = _extract_shebang(shebang_prefix, text) break header = create_header( reuse_info, None, template=template, template_is_commented=template_is_commented, style=style, force_multi=force_multi, merge_copyrights=merge_copyrights, ) new_text = f"{header}\n" if shebang.strip(): new_text = f"{shebang.rstrip()}\n\n{new_text}" if text.strip(): new_text = f"{new_text}\n{text.lstrip()}" return new_text