diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py
index 416f7d944..720c52710 100644
--- a/src/reuse/__init__.py
+++ b/src/reuse/__init__.py
@@ -22,13 +22,13 @@
 import gettext
 import logging
 import os
-import re
 from dataclasses import dataclass, field
 from enum import Enum
 from importlib.metadata import PackageNotFoundError, version
 from typing import Any, Optional
 
 from boolean.boolean import Expression
+from license_expression import Licensing
 
 try:
     __version__ = version("reuse")
@@ -43,6 +43,8 @@
 
 _LOGGER = logging.getLogger(__name__)
 
+_LICENSING = Licensing()
+
 _PACKAGE_PATH = os.path.dirname(__file__)
 _LOCALE_DIR = os.path.join(_PACKAGE_PATH, "locale")
 
@@ -54,43 +56,6 @@
     _LOGGER.debug("no translations found at %s", _LOCALE_DIR)
 
 
-_IGNORE_DIR_PATTERNS = [
-    re.compile(r"^\.git$"),
-    re.compile(r"^\.hg$"),
-    re.compile(r"^\.sl$"),  # Used by Sapling SCM
-    re.compile(r"^LICENSES$"),
-    re.compile(r"^\.reuse$"),
-]
-
-_IGNORE_MESON_PARENT_DIR_PATTERNS = [
-    re.compile(r"^subprojects$"),
-]
-
-_IGNORE_FILE_PATTERNS = [
-    # LICENSE, LICENSE-MIT, LICENSE.txt
-    re.compile(r"^LICEN[CS]E([-\.].*)?$"),
-    re.compile(r"^COPYING([-\.].*)?$"),
-    # ".git" as file happens in submodules
-    re.compile(r"^\.git$"),
-    re.compile(r"^\.hgtags$"),
-    re.compile(r".*\.license$"),
-    re.compile(r"^REUSE\.toml$"),
-    # Workaround for https://github.com/fsfe/reuse-tool/issues/229
-    re.compile(r"^CAL-1.0(-Combined-Work-Exception)?(\..+)?$"),
-    re.compile(r"^SHL-2.1(\..+)?$"),
-]
-
-_IGNORE_SPDX_PATTERNS = [
-    # SPDX files from
-    # https://spdx.github.io/spdx-spec/conformance/#44-standard-data-format-requirements
-    re.compile(r".*\.spdx$"),
-    re.compile(r".*\.spdx.(rdf|json|xml|ya?ml)$"),
-]
-
-# Combine SPDX patterns into file patterns to ease default ignore usage
-_IGNORE_FILE_PATTERNS.extend(_IGNORE_SPDX_PATTERNS)
-
-
 class SourceType(Enum):
     """
     An enumeration representing the types of sources for license information.
diff --git a/src/reuse/_annotate.py b/src/reuse/_annotate.py
index 564c0341b..2c49d6ac2 100644
--- a/src/reuse/_annotate.py
+++ b/src/reuse/_annotate.py
@@ -23,11 +23,7 @@
 from jinja2.exceptions import TemplateNotFound
 
 from . import ReuseInfo
-from ._util import (
-    _determine_license_suffix_path,
-    contains_reuse_info,
-    detect_line_endings,
-)
+from ._util import _determine_license_suffix_path
 from .comment import (
     NAME_STYLE_MAP,
     CommentStyle,
@@ -35,6 +31,7 @@
     get_comment_style,
 )
 from .exceptions import CommentCreateError, MissingReuseInfoError
+from .extract import contains_reuse_info, detect_line_endings
 from .header import add_new_header, find_and_replace_header
 from .i18n import _
 from .project import Project
diff --git a/src/reuse/_util.py b/src/reuse/_util.py
index 04af137d5..af226cf1d 100644
--- a/src/reuse/_util.py
+++ b/src/reuse/_util.py
@@ -16,116 +16,16 @@
 
 import logging
 import os
-import re
-import shutil
 import subprocess
-from collections import Counter
 from hashlib import sha1
 from inspect import cleandoc
-from itertools import chain
 from pathlib import Path
-from typing import IO, Any, BinaryIO, Iterator, Optional, Union
+from typing import IO, Any, Optional, Union
 
-from boolean.boolean import ParseError
-from license_expression import ExpressionError, Licensing
-
-from . import ReuseInfo, SourceType
-from .comment import _all_style_classes  # TODO: This import is not ideal here.
-from .i18n import _
 from .types import StrPath
 
-GIT_EXE = shutil.which("git")
-HG_EXE = shutil.which("hg")
-JUJUTSU_EXE = shutil.which("jj")
-PIJUL_EXE = shutil.which("pijul")
-
-REUSE_IGNORE_START = "REUSE-IgnoreStart"
-REUSE_IGNORE_END = "REUSE-IgnoreEnd"
-
-SPDX_SNIPPET_INDICATOR = b"SPDX-SnippetBegin"
-
-_LOGGER = logging.getLogger(__name__)
-_LICENSING = Licensing()
-
 # REUSE-IgnoreStart
 
-_END_PATTERN = r"{}$".format(
-    "".join(
-        {
-            r"(?:{})*".format(item)  # pylint: disable=consider-using-f-string
-            for item in chain(
-                (
-                    re.escape(style.MULTI_LINE.end)
-                    for style in _all_style_classes()
-                    if style.MULTI_LINE.end
-                ),
-                # These are special endings which do not belong to specific
-                # comment styles, but which we want to nonetheless strip away
-                # while parsing.
-                (
-                    ending
-                    for ending in [
-                        # ex: <tag value="Copyright Jane Doe">
-                        r'"\s*/*>',
-                        r"'\s*/*>",
-                        # ex: [SPDX-License-Identifier: GPL-3.0-or-later] ::
-                        r"\]\s*::",
-                    ]
-                ),
-            )
-        }
-    )
-)
-_LICENSE_IDENTIFIER_PATTERN = re.compile(
-    r"^(.*?)SPDX-License-Identifier:[ \t]+(.*?)" + _END_PATTERN, re.MULTILINE
-)
-_CONTRIBUTOR_PATTERN = re.compile(
-    r"^(.*?)SPDX-FileContributor:[ \t]+(.*?)" + _END_PATTERN, re.MULTILINE
-)
-# The keys match the relevant attributes of ReuseInfo.
-_SPDX_TAGS: dict[str, re.Pattern] = {
-    "spdx_expressions": _LICENSE_IDENTIFIER_PATTERN,
-    "contributor_lines": _CONTRIBUTOR_PATTERN,
-}
-
-_COPYRIGHT_PATTERNS = [
-    re.compile(
-        r"(?P<copyright>(?P<prefix>SPDX-(File|Snippet)CopyrightText:"
-        r"(\s(\([Cc]\)|©|Copyright(\s(©|\([Cc]\)))?))?)\s+"
-        r"((?P<year>\d{4} ?- ?\d{4}|\d{4}),?\s+)?"
-        r"(?P<statement>.*?))" + _END_PATTERN
-    ),
-    re.compile(
-        r"(?P<copyright>(?P<prefix>Copyright(\s(\([Cc]\)|©))?)\s+"
-        r"((?P<year>\d{4} ?- ?\d{4}|\d{4}),?\s+)?"
-        r"(?P<statement>.*?))" + _END_PATTERN
-    ),
-    re.compile(
-        r"(?P<copyright>(?P<prefix>©)\s+"
-        r"((?P<year>\d{4} ?- ?\d{4}|\d{4}),?\s+)?"
-        r"(?P<statement>.*?))" + _END_PATTERN
-    ),
-]
-_COPYRIGHT_PREFIXES = {
-    "spdx": "SPDX-FileCopyrightText:",
-    "spdx-c": "SPDX-FileCopyrightText: (C)",
-    "spdx-string-c": "SPDX-FileCopyrightText: Copyright (C)",
-    "spdx-string": "SPDX-FileCopyrightText: Copyright",
-    "spdx-string-symbol": "SPDX-FileCopyrightText: Copyright ©",
-    "spdx-symbol": "SPDX-FileCopyrightText: ©",
-    "string": "Copyright",
-    "string-c": "Copyright (C)",
-    "string-symbol": "Copyright ©",
-    "symbol": "©",
-}
-
-_LICENSEREF_PATTERN = re.compile("LicenseRef-[a-zA-Z0-9-.]+$")
-
-# Amount of bytes that we assume will be big enough to contain the entire
-# comment header (including SPDX tags), so that we don't need to read the
-# entire file.
-_HEADER_BYTES = 4096
-
 
 def setup_logging(level: int = logging.WARNING) -> None:
     """Configure logging for reuse.
@@ -191,22 +91,6 @@ def find_licenses_directory(root: Optional[StrPath] = None) -> Path:
     return licenses_path
 
 
-def decoded_text_from_binary(
-    binary_file: BinaryIO, size: Optional[int] = None
-) -> str:
-    """Given a binary file object, detect its encoding and return its contents
-    as a decoded string. Do not throw any errors if the encoding contains
-    errors:  Just replace the false characters.
-
-    If *size* is specified, only read so many bytes.
-    """
-    if size is None:
-        size = -1
-    rawdata = binary_file.read(size)
-    result = rawdata.decode("utf-8", errors="replace")
-    return result.replace("\r\n", "\n")
-
-
 def _determine_license_path(path: StrPath) -> Path:
     """Given a path FILE, return FILE.license if it exists, otherwise return
     FILE.
@@ -225,169 +109,6 @@ def _determine_license_suffix_path(path: StrPath) -> Path:
     return Path(f"{path}.license")
 
 
-def _parse_copyright_year(year: Optional[str]) -> list[str]:
-    """Parse copyright years and return list."""
-    ret: list[str] = []
-    if not year:
-        return ret
-    if re.match(r"\d{4}$", year):
-        ret = [year]
-    elif re.match(r"\d{4} ?- ?\d{4}$", year):
-        ret = [year[:4], year[-4:]]
-    return ret
-
-
-def _contains_snippet(binary_file: BinaryIO) -> bool:
-    """Check if a file seems to contain a SPDX snippet"""
-    # Assumes that if SPDX_SNIPPET_INDICATOR (SPDX-SnippetBegin) is found in a
-    # file, the file contains a snippet
-    content = binary_file.read()
-    if SPDX_SNIPPET_INDICATOR in content:
-        return True
-    return False
-
-
-def merge_copyright_lines(copyright_lines: set[str]) -> set[str]:
-    """Parse all copyright lines and merge identical statements making years
-    into a range.
-
-    If a same statement uses multiple prefixes, use only the most frequent one.
-    """
-    # pylint: disable=too-many-locals
-    # TODO: Rewrite this function. It's a bit of a mess.
-    copyright_in = []
-    for line in copyright_lines:
-        for pattern in _COPYRIGHT_PATTERNS:
-            match = pattern.search(line)
-            if match is not None:
-                copyright_in.append(
-                    {
-                        "statement": match.groupdict()["statement"],
-                        "year": _parse_copyright_year(
-                            match.groupdict()["year"]
-                        ),
-                        "prefix": match.groupdict()["prefix"],
-                    }
-                )
-                break
-
-    copyright_out = set()
-    for line_info in copyright_in:
-        statement = str(line_info["statement"])
-        copyright_list = [
-            item for item in copyright_in if item["statement"] == statement
-        ]
-
-        # Get the most common prefix.
-        most_common = str(
-            Counter([item["prefix"] for item in copyright_list]).most_common(1)[
-                0
-            ][0]
-        )
-        prefix = "spdx"
-        for key, value in _COPYRIGHT_PREFIXES.items():
-            if most_common == value:
-                prefix = key
-                break
-
-        # get year range if any
-        years: list[str] = []
-        for copy in copyright_list:
-            years += copy["year"]
-
-        year: Optional[str] = None
-        if years:
-            if min(years) == max(years):
-                year = min(years)
-            else:
-                year = f"{min(years)} - {max(years)}"
-
-        copyright_out.add(make_copyright_line(statement, year, prefix))
-    return copyright_out
-
-
-def extract_reuse_info(text: str) -> ReuseInfo:
-    """Extract REUSE information from comments in a string.
-
-    Raises:
-        ExpressionError: if an SPDX expression could not be parsed.
-        ParseError: if an SPDX expression could not be parsed.
-    """
-    text = filter_ignore_block(text)
-    spdx_tags: dict[str, set[str]] = {}
-    for tag, pattern in _SPDX_TAGS.items():
-        spdx_tags[tag] = set(find_spdx_tag(text, pattern))
-    # License expressions and copyright matches are special cases.
-    expressions = set()
-    copyright_matches = set()
-    for expression in spdx_tags.pop("spdx_expressions"):
-        try:
-            expressions.add(_LICENSING.parse(expression))
-        except (ExpressionError, ParseError):
-            _LOGGER.error(
-                _("Could not parse '{expression}'").format(
-                    expression=expression
-                )
-            )
-            raise
-    for line in text.splitlines():
-        for pattern in _COPYRIGHT_PATTERNS:
-            match = pattern.search(line)
-            if match is not None:
-                copyright_matches.add(match.groupdict()["copyright"].strip())
-                break
-
-    return ReuseInfo(
-        spdx_expressions=expressions,
-        copyright_lines=copyright_matches,
-        **spdx_tags,  # type: ignore
-    )
-
-
-def reuse_info_of_file(
-    path: StrPath, original_path: StrPath, root: StrPath
-) -> ReuseInfo:
-    """Open *path* and return its :class:`ReuseInfo`.
-
-    Normally only the first few :const:`_HEADER_BYTES` are read. But if a
-    snippet was detected, the entire file is read.
-    """
-    path = Path(path)
-    with path.open("rb") as fp:
-        try:
-            read_limit: Optional[int] = _HEADER_BYTES
-            # Completely read the file once
-            # to search for possible snippets
-            if _contains_snippet(fp):
-                _LOGGER.debug(f"'{path}' seems to contain an SPDX Snippet")
-                read_limit = None
-            # Reset read position
-            fp.seek(0)
-            # Scan the file for REUSE info, possibly limiting the read
-            # length
-            file_result = extract_reuse_info(
-                decoded_text_from_binary(fp, size=read_limit)
-            )
-            if file_result.contains_copyright_or_licensing():
-                source_type = SourceType.FILE_HEADER
-                if path.suffix == ".license":
-                    source_type = SourceType.DOT_LICENSE
-                return file_result.copy(
-                    path=relative_from_root(original_path, root).as_posix(),
-                    source_path=relative_from_root(path, root).as_posix(),
-                    source_type=source_type,
-                )
-
-        except (ExpressionError, ParseError):
-            _LOGGER.error(
-                _(
-                    "'{path}' holds an SPDX expression that cannot be"
-                    " parsed, skipping the file"
-                ).format(path=path)
-            )
-    return ReuseInfo()
-
-
 def relative_from_root(path: StrPath, root: StrPath) -> Path:
     """A helper function to get *path* relative to *root*."""
     path = Path(path)
@@ -397,88 +118,6 @@ def relative_from_root(path: StrPath, root: StrPath) -> Path:
         return Path(os.path.relpath(path, start=root))
 
 
-def find_spdx_tag(text: str, pattern: re.Pattern) -> Iterator[str]:
-    """Extract all the values in *text* matching *pattern*'s regex, taking care
-    of stripping extraneous whitespace of formatting.
-    """
-    for prefix, value in pattern.findall(text):
-        prefix, value = prefix.strip(), value.strip()
-
-        # Some comment headers have ASCII art to "frame" the comment, like this:
-        #
-        # /***********************\
-        # |*  This is a comment  *|
-        # \***********************/
-        #
-        # To ensure we parse them correctly, if the line ends with the inverse
-        # of the comment prefix, we strip that suffix. See #343 for a real
-        # world example of a project doing this (LLVM).
-        suffix = prefix[::-1]
-        if suffix and value.endswith(suffix):
-            value = value[: -len(suffix)]
-
-        yield value.strip()
-
-
-def filter_ignore_block(text: str) -> str:
-    """Filter out blocks beginning with REUSE_IGNORE_START and ending with
-    REUSE_IGNORE_END to remove lines that should not be treated as copyright and
-    licensing information.
-    """
-    ignore_start = None
-    ignore_end = None
-    if REUSE_IGNORE_START in text:
-        ignore_start = text.index(REUSE_IGNORE_START)
-    if REUSE_IGNORE_END in text:
-        ignore_end = text.index(REUSE_IGNORE_END) + len(REUSE_IGNORE_END)
-    if not ignore_start:
-        return text
-    if not ignore_end:
-        return text[:ignore_start]
-    if ignore_end > ignore_start:
-        return text[:ignore_start] + filter_ignore_block(text[ignore_end:])
-    rest = text[ignore_start + len(REUSE_IGNORE_START) :]
-    if REUSE_IGNORE_END in rest:
-        ignore_end = rest.index(REUSE_IGNORE_END) + len(REUSE_IGNORE_END)
-        return text[:ignore_start] + filter_ignore_block(rest[ignore_end:])
-    return text[:ignore_start]
-
-
-def contains_reuse_info(text: str) -> bool:
-    """The text contains REUSE info."""
-    try:
-        return bool(extract_reuse_info(text))
-    except (ExpressionError, ParseError):
-        return False
-
-
-def make_copyright_line(
-    statement: str, year: Optional[str] = None, copyright_prefix: str = "spdx"
-) -> str:
-    """Given a statement, prefix it with ``SPDX-FileCopyrightText:`` if it is
-    not already prefixed with some manner of copyright tag.
-    """
-    if "\n" in statement:
-        raise RuntimeError(f"Unexpected newline in '{statement}'")
-
-    prefix = _COPYRIGHT_PREFIXES.get(copyright_prefix)
-    if prefix is None:
-        # TODO: Maybe translate this. Also maybe reduce DRY here.
-        raise RuntimeError(
-            "Unexpected copyright prefix: Need 'spdx', 'spdx-c', "
-            "'spdx-symbol', 'string', 'string-c', "
-            "'string-symbol', or 'symbol'"
-        )
-
-    for pattern in _COPYRIGHT_PATTERNS:
-        match = pattern.search(statement)
-        if match is not None:
-            return statement
-    if year is not None:
-        return f"{prefix} {year} {statement}"
-    return f"{prefix} {statement}"
-
-
 def _checksum(path: StrPath) -> str:
     path = Path(path)
 
@@ -490,17 +129,6 @@ def _checksum(path: StrPath) -> str:
     return file_sha1.hexdigest()
 
 
-def detect_line_endings(text: str) -> str:
-    """Return one of '\n', '\r' or '\r\n' depending on the line endings used in
-    *text*. Return os.linesep if there are no line endings.
-    """
-    line_endings = ["\r\n", "\r", "\n"]
-    for line_ending in line_endings:
-        if line_ending in text:
-            return line_ending
-    return os.linesep
-
-
 def cleandoc_nl(text: str) -> str:
     """Like :func:`inspect.cleandoc`, but with a newline at the end."""
     return cleandoc(text) + "\n"
diff --git a/src/reuse/cli/annotate.py b/src/reuse/cli/annotate.py
index 4d3dd3904..ff40bfd01 100644
--- a/src/reuse/cli/annotate.py
+++ b/src/reuse/cli/annotate.py
@@ -29,12 +29,7 @@
 
 from .. import ReuseInfo
 from .._annotate import add_header_to_file
-from .._util import (
-    _COPYRIGHT_PREFIXES,
-    _determine_license_path,
-    _determine_license_suffix_path,
-    make_copyright_line,
-)
+from .._util import _determine_license_path, _determine_license_suffix_path
 from ..comment import (
     NAME_STYLE_MAP,
     CommentStyle,
@@ -42,6 +37,7 @@
     has_style,
     is_uncommentable,
 )
+from ..copyright import _COPYRIGHT_PREFIXES, make_copyright_line
 from ..i18n import _
 from ..project import Project
 from .common import ClickObj, MutexOption, spdx_identifier
diff --git a/src/reuse/cli/common.py b/src/reuse/cli/common.py
index c2ce22132..0345cf24f 100644
--- a/src/reuse/cli/common.py
+++ b/src/reuse/cli/common.py
@@ -12,7 +12,7 @@
 from boolean.boolean import Expression, ParseError
 from license_expression import ExpressionError
 
-from .._util import _LICENSING
+from .. import _LICENSING
 from ..exceptions import GlobalLicensingConflictError, GlobalLicensingParseError
 from ..i18n import _
 from ..project import Project
diff --git a/src/reuse/cli/spdx.py b/src/reuse/cli/spdx.py
index 11a9933b5..1f29e4e0a 100644
--- a/src/reuse/cli/spdx.py
+++ b/src/reuse/cli/spdx.py
@@ -12,7 +12,7 @@
 
 import click
 
-from .. import _IGNORE_SPDX_PATTERNS
+from ..covered_files import _IGNORE_SPDX_PATTERNS
 from ..i18n import _
 from ..report import ProjectReport
 from .common import ClickObj
diff --git a/src/reuse/copyright.py b/src/reuse/copyright.py
new file mode 100644
index 000000000..4f4df9ff2
--- /dev/null
+++ b/src/reuse/copyright.py
@@ -0,0 +1,122 @@
+# SPDX-FileCopyrightText: 2024 Free Software Foundation Europe e.V. <https://fsfe.org>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Utilities related to the parsing and storing of copyright notices."""
+
+import re
+from collections import Counter
+from typing import Optional
+
+from .extract import _COPYRIGHT_PATTERNS  # TODO: Get rid of this import.
+
+_COPYRIGHT_PREFIXES = {
+    "spdx": "SPDX-FileCopyrightText:",
+    "spdx-c": "SPDX-FileCopyrightText: (C)",
+    "spdx-string-c": "SPDX-FileCopyrightText: Copyright (C)",
+    "spdx-string": "SPDX-FileCopyrightText: Copyright",
+    "spdx-string-symbol": "SPDX-FileCopyrightText: Copyright ©",
+    "spdx-symbol": "SPDX-FileCopyrightText: ©",
+    "string": "Copyright",
+    "string-c": "Copyright (C)",
+    "string-symbol": "Copyright ©",
+    "symbol": "©",
+}
+
+
+def merge_copyright_lines(copyright_lines: set[str]) -> set[str]:
+    """Parse all copyright lines and merge identical statements making years
+    into a range.
+
+    If a same statement uses multiple prefixes, use only the most frequent one.
+    """
+    # pylint: disable=too-many-locals
+    # TODO: Rewrite this function. It's a bit of a mess.
+    copyright_in = []
+    for line in copyright_lines:
+        for pattern in _COPYRIGHT_PATTERNS:
+            match = pattern.search(line)
+            if match is not None:
+                copyright_in.append(
+                    {
+                        "statement": match.groupdict()["statement"],
+                        "year": _parse_copyright_year(
+                            match.groupdict()["year"]
+                        ),
+                        "prefix": match.groupdict()["prefix"],
+                    }
+                )
+                break
+
+    copyright_out = set()
+    for line_info in copyright_in:
+        statement = str(line_info["statement"])
+        copyright_list = [
+            item for item in copyright_in if item["statement"] == statement
+        ]
+
+        # Get the most common prefix.
+        most_common = str(
+            Counter([item["prefix"] for item in copyright_list]).most_common(1)[
+                0
+            ][0]
+        )
+        prefix = "spdx"
+        for key, value in _COPYRIGHT_PREFIXES.items():
+            if most_common == value:
+                prefix = key
+                break
+
+        # get year range if any
+        years: list[str] = []
+        for copy in copyright_list:
+            years += copy["year"]
+
+        year: Optional[str] = None
+        if years:
+            if min(years) == max(years):
+                year = min(years)
+            else:
+                year = f"{min(years)} - {max(years)}"
+
+        copyright_out.add(make_copyright_line(statement, year, prefix))
+    return copyright_out
+
+
+def make_copyright_line(
+    statement: str, year: Optional[str] = None, copyright_prefix: str = "spdx"
+) -> str:
+    """Given a statement, prefix it with ``SPDX-FileCopyrightText:`` if it is
+    not already prefixed with some manner of copyright tag.
+    """
+    if "\n" in statement:
+        raise RuntimeError(f"Unexpected newline in '{statement}'")
+
+    prefix = _COPYRIGHT_PREFIXES.get(copyright_prefix)
+    if prefix is None:
+        # TODO: Maybe translate this. Also maybe reduce DRY here.
+        raise RuntimeError(
+            "Unexpected copyright prefix: Need 'spdx', 'spdx-c', "
+            "'spdx-symbol', 'string', 'string-c', "
+            "'string-symbol', or 'symbol'"
+        )
+
+    for pattern in _COPYRIGHT_PATTERNS:
+        match = pattern.search(statement)
+        if match is not None:
+            return statement
+    if year is not None:
+        return f"{prefix} {year} {statement}"
+    return f"{prefix} {statement}"
+
+
+def _parse_copyright_year(year: Optional[str]) -> list[str]:
+    """Parse copyright years and return list."""
+    ret: list[str] = []
+    if not year:
+        return ret
+    if re.match(r"\d{4}$", year):
+        ret = [year]
+    elif re.match(r"\d{4} ?- ?\d{4}$", year):
+        ret = [year[:4], year[-4:]]
+    return ret
diff --git a/src/reuse/covered_files.py b/src/reuse/covered_files.py
index ad4e019b6..cbe143550 100644
--- a/src/reuse/covered_files.py
+++ b/src/reuse/covered_files.py
@@ -11,19 +11,51 @@
 import contextlib
 import logging
 import os
+import re
 from pathlib import Path
 from typing import Collection, Generator, Optional, cast
 
-from . import (
-    _IGNORE_DIR_PATTERNS,
-    _IGNORE_FILE_PATTERNS,
-    _IGNORE_MESON_PARENT_DIR_PATTERNS,
-)
 from .types import StrPath
 from .vcs import VCSStrategy
 
 _LOGGER = logging.getLogger(__name__)
 
+_IGNORE_DIR_PATTERNS = [
+    re.compile(r"^\.git$"),
+    re.compile(r"^\.hg$"),
+    re.compile(r"^\.sl$"),  # Used by Sapling SCM
+    re.compile(r"^LICENSES$"),
+    re.compile(r"^\.reuse$"),
+]
+
+_IGNORE_MESON_PARENT_DIR_PATTERNS = [
+    re.compile(r"^subprojects$"),
+]
+
+_IGNORE_FILE_PATTERNS = [
+    # LICENSE, LICENSE-MIT, LICENSE.txt
+    re.compile(r"^LICEN[CS]E([-\.].*)?$"),
+    re.compile(r"^COPYING([-\.].*)?$"),
+    # ".git" as file happens in submodules
+    re.compile(r"^\.git$"),
+    re.compile(r"^\.hgtags$"),
+    re.compile(r".*\.license$"),
+    re.compile(r"^REUSE\.toml$"),
+    # Workaround for https://github.com/fsfe/reuse-tool/issues/229
+    re.compile(r"^CAL-1.0(-Combined-Work-Exception)?(\..+)?$"),
+    re.compile(r"^SHL-2.1(\..+)?$"),
+]
+
+_IGNORE_SPDX_PATTERNS = [
+    # SPDX files from
+    # https://spdx.github.io/spdx-spec/conformance/#44-standard-data-format-requirements
+    re.compile(r".*\.spdx$"),
+    re.compile(r".*\.spdx.(rdf|json|xml|ya?ml)$"),
+]
+
+# Combine SPDX patterns into file patterns to ease default ignore usage
+_IGNORE_FILE_PATTERNS.extend(_IGNORE_SPDX_PATTERNS)
+
 
 def is_path_ignored(
     path: Path,
diff --git a/src/reuse/download.py b/src/reuse/download.py
index 06ef1c944..677a0d094 100644
--- a/src/reuse/download.py
+++ b/src/reuse/download.py
@@ -15,7 +15,8 @@
 from urllib.error import URLError
 from urllib.parse import urljoin
 
-from ._util import _LICENSEREF_PATTERN, find_licenses_directory
+from ._util import find_licenses_directory
+from .extract import _LICENSEREF_PATTERN
 from .project import Project
 from .types import StrPath
 from .vcs import VCSStrategyNone
diff --git a/src/reuse/extract.py b/src/reuse/extract.py
new file mode 100644
index 000000000..34bcd0faf
--- /dev/null
+++ b/src/reuse/extract.py
@@ -0,0 +1,282 @@
+# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
+# SPDX-FileCopyrightText: 2020 Tuomas Siipola <tuomas@zpl.fi>
+# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <carmenbianca@fsfe.org>
+# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
+# SPDX-FileCopyrightText: 2022 Nico Rikken <nico.rikken@fsfe.org>
+# SPDX-FileCopyrightText: 2022 Pietro Albini <pietro.albini@ferrous-systems.com>
+# SPDX-FileCopyrightText: 2023 DB Systel GmbH
+# SPDX-FileCopyrightText: 2023 Johannes Zarl-Zierl <johannes@zarl-zierl.at>
+# SPDX-FileCopyrightText: 2024 Rivos Inc.
+# SPDX-FileCopyrightText: 2024 Skyler Grey <sky@a.starrysky.fyi>
+# SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Utilities related to the extraction of REUSE information out of files."""
+
+import logging
+import os
+import re
+from itertools import chain
+from pathlib import Path
+from typing import BinaryIO, Iterator, Optional
+
+from boolean.boolean import ParseError
+from license_expression import ExpressionError
+
+from . import _LICENSING, ReuseInfo, SourceType
+from ._util import relative_from_root
+from .comment import _all_style_classes
+from .i18n import _
+from .types import StrPath
+
+REUSE_IGNORE_START = "REUSE-IgnoreStart"
+REUSE_IGNORE_END = "REUSE-IgnoreEnd"
+
+# REUSE-IgnoreStart
+
+SPDX_SNIPPET_INDICATOR = b"SPDX-SnippetBegin"
+
+_LOGGER = logging.getLogger(__name__)
+
+_END_PATTERN = r"{}$".format(
+    "".join(
+        {
+            r"(?:{})*".format(item)  # pylint: disable=consider-using-f-string
+            for item in chain(
+                (
+                    re.escape(style.MULTI_LINE.end)
+                    for style in _all_style_classes()
+                    if style.MULTI_LINE.end
+                ),
+                # These are special endings which do not belong to specific
+                # comment styles, but which we want to nonetheless strip away
+                # while parsing.
+                (
+                    ending
+                    for ending in [
+                        # ex: <tag value="Copyright Jane Doe">
+                        r'"\s*/*>',
+                        r"'\s*/*>",
+                        # ex: [SPDX-License-Identifier: GPL-3.0-or-later] ::
+                        r"\]\s*::",
+                    ]
+                ),
+            )
+        }
+    )
+)
+_LICENSE_IDENTIFIER_PATTERN = re.compile(
+    r"^(.*?)SPDX-License-Identifier:[ \t]+(.*?)" + _END_PATTERN, re.MULTILINE
+)
+_CONTRIBUTOR_PATTERN = re.compile(
+    r"^(.*?)SPDX-FileContributor:[ \t]+(.*?)" + _END_PATTERN, re.MULTILINE
+)
+# The keys match the relevant attributes of ReuseInfo.
+_SPDX_TAGS: dict[str, re.Pattern] = {
+    "spdx_expressions": _LICENSE_IDENTIFIER_PATTERN,
+    "contributor_lines": _CONTRIBUTOR_PATTERN,
+}
+
+_COPYRIGHT_PATTERNS = [
+    re.compile(
+        r"(?P<copyright>(?P<prefix>SPDX-(File|Snippet)CopyrightText:"
+        r"(\s(\([Cc]\)|©|Copyright(\s(©|\([Cc]\)))?))?)\s+"
+        r"((?P<year>\d{4} ?- ?\d{4}|\d{4}),?\s+)?"
+        r"(?P<statement>.*?))" + _END_PATTERN
+    ),
+    re.compile(
+        r"(?P<copyright>(?P<prefix>Copyright(\s(\([Cc]\)|©))?)\s+"
+        r"((?P<year>\d{4} ?- ?\d{4}|\d{4}),?\s+)?"
+        r"(?P<statement>.*?))" + _END_PATTERN
+    ),
+    re.compile(
+        r"(?P<copyright>(?P<prefix>©)\s+"
+        r"((?P<year>\d{4} ?- ?\d{4}|\d{4}),?\s+)?"
+        r"(?P<statement>.*?))" + _END_PATTERN
+    ),
+]
+
+_LICENSEREF_PATTERN = re.compile("LicenseRef-[a-zA-Z0-9-.]+$")
+
+# Amount of bytes that we assume will be big enough to contain the entire
+# comment header (including SPDX tags), so that we don't need to read the
+# entire file.
+_HEADER_BYTES = 4096
+
+
+def decoded_text_from_binary(
+    binary_file: BinaryIO, size: Optional[int] = None
+) -> str:
+    """Given a binary file object, detect its encoding and return its contents
+    as a decoded string. Do not throw any errors if the encoding contains
+    errors:  Just replace the false characters.
+
+    If *size* is specified, only read so many bytes.
+    """
+    if size is None:
+        size = -1
+    rawdata = binary_file.read(size)
+    result = rawdata.decode("utf-8", errors="replace")
+    return result.replace("\r\n", "\n")
+
+
+def _contains_snippet(binary_file: BinaryIO) -> bool:
+    """Check if a file seems to contain a SPDX snippet"""
+    # Assumes that if SPDX_SNIPPET_INDICATOR (SPDX-SnippetBegin) is found in a
+    # file, the file contains a snippet
+    content = binary_file.read()
+    if SPDX_SNIPPET_INDICATOR in content:
+        return True
+    return False
+
+
+def extract_reuse_info(text: str) -> ReuseInfo:
+    """Extract REUSE information from comments in a string.
+
+    Raises:
+        ExpressionError: if an SPDX expression could not be parsed.
+        ParseError: if an SPDX expression could not be parsed.
+    """
+    text = filter_ignore_block(text)
+    spdx_tags: dict[str, set[str]] = {}
+    for tag, pattern in _SPDX_TAGS.items():
+        spdx_tags[tag] = set(find_spdx_tag(text, pattern))
+    # License expressions and copyright matches are special cases.
+    expressions = set()
+    copyright_matches = set()
+    for expression in spdx_tags.pop("spdx_expressions"):
+        try:
+            expressions.add(_LICENSING.parse(expression))
+        except (ExpressionError, ParseError):
+            _LOGGER.error(
+                _("Could not parse '{expression}'").format(
+                    expression=expression
+                )
+            )
+            raise
+    for line in text.splitlines():
+        for pattern in _COPYRIGHT_PATTERNS:
+            match = pattern.search(line)
+            if match is not None:
+                copyright_matches.add(match.groupdict()["copyright"].strip())
+                break
+
+    return ReuseInfo(
+        spdx_expressions=expressions,
+        copyright_lines=copyright_matches,
+        **spdx_tags,  # type: ignore
+    )
+
+
+def reuse_info_of_file(
+    path: StrPath, original_path: StrPath, root: StrPath
+) -> ReuseInfo:
+    """Open *path* and return its :class:`ReuseInfo`.
+
+    Normally only the first few :const:`_HEADER_BYTES` are read. But if a
+    snippet was detected, the entire file is read.
+    """
+    path = Path(path)
+    with path.open("rb") as fp:
+        try:
+            read_limit: Optional[int] = _HEADER_BYTES
+            # Completely read the file once
+            # to search for possible snippets
+            if _contains_snippet(fp):
+                _LOGGER.debug(f"'{path}' seems to contain an SPDX Snippet")
+                read_limit = None
+            # Reset read position
+            fp.seek(0)
+            # Scan the file for REUSE info, possibly limiting the read
+            # length
+            file_result = extract_reuse_info(
+                decoded_text_from_binary(fp, size=read_limit)
+            )
+            if file_result.contains_copyright_or_licensing():
+                source_type = SourceType.FILE_HEADER
+                if path.suffix == ".license":
+                    source_type = SourceType.DOT_LICENSE
+                return file_result.copy(
+                    path=relative_from_root(original_path, root).as_posix(),
+                    source_path=relative_from_root(path, root).as_posix(),
+                    source_type=source_type,
+                )
+
+        except (ExpressionError, ParseError):
+            _LOGGER.error(
+                _(
+                    "'{path}' holds an SPDX expression that cannot be"
+                    " parsed, skipping the file"
+                ).format(path=path)
+            )
+    return ReuseInfo()
+
+
+def find_spdx_tag(text: str, pattern: re.Pattern) -> Iterator[str]:
+    """Extract all the values in *text* matching *pattern*'s regex, taking care
+    of stripping extraneous whitespace of formatting.
+    """
+    for prefix, value in pattern.findall(text):
+        prefix, value = prefix.strip(), value.strip()
+
+        # Some comment headers have ASCII art to "frame" the comment, like this:
+        #
+        # /***********************\
+        # |*  This is a comment  *|
+        # \***********************/
+        #
+        # To ensure we parse them correctly, if the line ends with the inverse
+        # of the comment prefix, we strip that suffix. See #343 for a real
+        # world example of a project doing this (LLVM).
+        suffix = prefix[::-1]
+        if suffix and value.endswith(suffix):
+            value = value[: -len(suffix)]
+
+        yield value.strip()
+
+
+def filter_ignore_block(text: str) -> str:
+    """Filter out blocks beginning with REUSE_IGNORE_START and ending with
+    REUSE_IGNORE_END to remove lines that should not be treated as copyright and
+    licensing information.
+    """
+    ignore_start = None
+    ignore_end = None
+    if REUSE_IGNORE_START in text:
+        ignore_start = text.index(REUSE_IGNORE_START)
+    if REUSE_IGNORE_END in text:
+        ignore_end = text.index(REUSE_IGNORE_END) + len(REUSE_IGNORE_END)
+    if not ignore_start:
+        return text
+    if not ignore_end:
+        return text[:ignore_start]
+    if ignore_end > ignore_start:
+        return text[:ignore_start] + filter_ignore_block(text[ignore_end:])
+    rest = text[ignore_start + len(REUSE_IGNORE_START) :]
+    if REUSE_IGNORE_END in rest:
+        ignore_end = rest.index(REUSE_IGNORE_END) + len(REUSE_IGNORE_END)
+        return text[:ignore_start] + filter_ignore_block(rest[ignore_end:])
+    return text[:ignore_start]
+
+
+def contains_reuse_info(text: str) -> bool:
+    """The text contains REUSE info."""
+    try:
+        return bool(extract_reuse_info(text))
+    except (ExpressionError, ParseError):
+        return False
+
+
+def detect_line_endings(text: str) -> str:
+    """Return one of '\n', '\r' or '\r\n' depending on the line endings used in
+    *text*. Return os.linesep if there are no line endings.
+    """
+    line_endings = ["\r\n", "\r", "\n"]
+    for line_ending in line_endings:
+        if line_ending in text:
+            return line_ending
+    return os.linesep
+
+
+# REUSE-IgnoreEnd
diff --git a/src/reuse/global_licensing.py b/src/reuse/global_licensing.py
index bacc8f92a..c8ad9cf1e 100644
--- a/src/reuse/global_licensing.py
+++ b/src/reuse/global_licensing.py
@@ -33,8 +33,7 @@
 from debian.copyright import Error as DebianError
 from license_expression import ExpressionError
 
-from . import ReuseInfo, SourceType
-from ._util import _LICENSING
+from . import _LICENSING, ReuseInfo, SourceType
 from .covered_files import iter_files
 from .exceptions import (
     GlobalLicensingParseError,
diff --git a/src/reuse/header.py b/src/reuse/header.py
index 6000d9a84..0538155a3 100644
--- a/src/reuse/header.py
+++ b/src/reuse/header.py
@@ -23,17 +23,14 @@
 from license_expression import ExpressionError
 
 from . import ReuseInfo
-from ._util import (
-    contains_reuse_info,
-    extract_reuse_info,
-    merge_copyright_lines,
-)
 from .comment import CommentStyle, EmptyCommentStyle, PythonCommentStyle
+from .copyright import merge_copyright_lines
 from .exceptions import (
     CommentCreateError,
     CommentParseError,
     MissingReuseInfoError,
 )
+from .extract import contains_reuse_info, extract_reuse_info
 from .i18n import _
 
 _LOGGER = logging.getLogger(__name__)
diff --git a/src/reuse/project.py b/src/reuse/project.py
index cf6ca24d5..4b7425f5b 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -23,17 +23,13 @@
 
 from . import ReuseInfo
 from ._licenses import EXCEPTION_MAP, LICENSE_MAP
-from ._util import (
-    _LICENSEREF_PATTERN,
-    _determine_license_path,
-    relative_from_root,
-    reuse_info_of_file,
-)
+from ._util import _determine_license_path, relative_from_root
 from .covered_files import iter_files
 from .exceptions import (
     GlobalLicensingConflictError,
     SpdxIdentifierNotFoundError,
 )
+from .extract import _LICENSEREF_PATTERN, reuse_info_of_file
 from .global_licensing import (
     GlobalLicensing,
     NestedReuseTOML,
diff --git a/src/reuse/report.py b/src/reuse/report.py
index dd2a0355a..4277a9cec 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -31,8 +31,9 @@
 )
 from uuid import uuid4
 
-from . import __REUSE_version__, __version__
-from ._util import _LICENSEREF_PATTERN, _LICENSING, _checksum
+from . import _LICENSING, __REUSE_version__, __version__
+from ._util import _checksum
+from .extract import _LICENSEREF_PATTERN
 from .global_licensing import ReuseDep5
 from .i18n import _
 from .project import Project, ReuseInfo
diff --git a/src/reuse/vcs.py b/src/reuse/vcs.py
index 135c91393..8c1cb5c8e 100644
--- a/src/reuse/vcs.py
+++ b/src/reuse/vcs.py
@@ -12,19 +12,13 @@
 
 import logging
 import os
+import shutil
 from abc import ABC, abstractmethod
 from inspect import isclass
 from pathlib import Path
 from typing import TYPE_CHECKING, Generator, Optional, Type
 
-from ._util import (
-    GIT_EXE,
-    HG_EXE,
-    JUJUTSU_EXE,
-    PIJUL_EXE,
-    execute_command,
-    relative_from_root,
-)
+from ._util import execute_command, relative_from_root
 from .types import StrPath
 
 if TYPE_CHECKING:
@@ -32,6 +26,11 @@
 
 _LOGGER = logging.getLogger(__name__)
 
+GIT_EXE = shutil.which("git")
+HG_EXE = shutil.which("hg")
+JUJUTSU_EXE = shutil.which("jj")
+PIJUL_EXE = shutil.which("pijul")
+
 
 class VCSStrategy(ABC):
     """Strategy pattern for version control systems."""
diff --git a/tests/conftest.py b/tests/conftest.py
index 075a5fe4d..929533075 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -39,14 +39,9 @@
 except ImportError:
     sys.path.append(os.path.join(Path(__file__).parent.parent, "src"))
 finally:
-    from reuse._util import (
-        GIT_EXE,
-        HG_EXE,
-        JUJUTSU_EXE,
-        PIJUL_EXE,
-        setup_logging,
-    )
+    from reuse._util import setup_logging
     from reuse.global_licensing import ReuseDep5
+    from reuse.vcs import GIT_EXE, HG_EXE, JUJUTSU_EXE, PIJUL_EXE
 
 CWD = Path.cwd()
 
@@ -117,7 +112,6 @@ def optional_git_exe(
     """Run the test with or without git."""
     exe = GIT_EXE if request.param else ""
     monkeypatch.setattr("reuse.vcs.GIT_EXE", exe)
-    monkeypatch.setattr("reuse._util.GIT_EXE", exe)
     yield exe
 
 
@@ -136,7 +130,6 @@ def optional_hg_exe(
     """Run the test with or without mercurial."""
     exe = HG_EXE if request.param else ""
     monkeypatch.setattr("reuse.vcs.HG_EXE", exe)
-    monkeypatch.setattr("reuse._util.HG_EXE", exe)
     yield exe
 
 
@@ -155,7 +148,6 @@ def optional_jujutsu_exe(
     """Run the test with or without Jujutsu."""
     exe = JUJUTSU_EXE if request.param else ""
     monkeypatch.setattr("reuse.vcs.JUJUTSU_EXE", exe)
-    monkeypatch.setattr("reuse._util.JUJUTSU_EXE", exe)
     yield exe
 
 
@@ -174,7 +166,6 @@ def optional_pijul_exe(
     """Run the test with or without Pijul."""
     exe = PIJUL_EXE if request.param else ""
     monkeypatch.setattr("reuse.vcs.PIJUL_EXE", exe)
-    monkeypatch.setattr("reuse._util.PIJUL_EXE", exe)
     yield exe
 
 
diff --git a/tests/test_cli_annotate.py b/tests/test_cli_annotate.py
index 4aecfd85e..0326af280 100644
--- a/tests/test_cli_annotate.py
+++ b/tests/test_cli_annotate.py
@@ -17,8 +17,8 @@
 import pytest
 from click.testing import CliRunner
 
-from reuse._util import _COPYRIGHT_PREFIXES
 from reuse.cli.main import main
+from reuse.copyright import _COPYRIGHT_PREFIXES
 
 # pylint: disable=too-many-public-methods,too-many-lines,unused-argument
 
diff --git a/tests/test_copyright.py b/tests/test_copyright.py
new file mode 100644
index 000000000..662b5ecb5
--- /dev/null
+++ b/tests/test_copyright.py
@@ -0,0 +1,135 @@
+# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
+# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <carmenbianca@fsfe.org>
+# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
+# SPDX-FileCopyrightText: 2022 Nico Rikken <nico.rikken@fsfe.org>
+# SPDX-FileCopyrightText: 2022 Pietro Albini <pietro.albini@ferrous-systems.com>
+# SPDX-FileCopyrightText: 2024 Rivos Inc.
+# SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Tests for reuse.copyright"""
+
+import pytest
+
+from reuse.copyright import make_copyright_line
+
+# REUSE-IgnoreStart
+
+
+def test_make_copyright_line_simple():
+    """Given a simple statement, make it a copyright line."""
+    assert make_copyright_line("hello") == "SPDX-FileCopyrightText: hello"
+
+
+def test_make_copyright_line_year():
+    """Given a simple statement and a year, make it a copyright line."""
+    assert (
+        make_copyright_line("hello", year="2019")
+        == "SPDX-FileCopyrightText: 2019 hello"
+    )
+
+
+def test_make_copyright_line_prefix_spdx():
+    """Given a simple statement and prefix, make it a copyright line."""
+    statement = make_copyright_line("hello", copyright_prefix="spdx")
+    assert statement == "SPDX-FileCopyrightText: hello"
+
+
+def test_make_copyright_line_prefix_spdx_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line("hello", year=2019, copyright_prefix="spdx")
+    assert statement == "SPDX-FileCopyrightText: 2019 hello"
+
+
+def test_make_copyright_line_prefix_spdx_c_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="spdx-c"
+    )
+    assert statement == "SPDX-FileCopyrightText: (C) 2019 hello"
+
+
+def test_make_copyright_line_prefix_spdx_symbol_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="spdx-symbol"
+    )
+    assert statement == "SPDX-FileCopyrightText: © 2019 hello"
+
+
+def test_make_copyright_line_prefix_string_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="string"
+    )
+    assert statement == "Copyright 2019 hello"
+
+
+def test_make_copyright_line_prefix_string_c_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="string-c"
+    )
+    assert statement == "Copyright (C) 2019 hello"
+
+
+def test_make_copyright_line_prefix_spdx_string_c_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="spdx-string-c"
+    )
+    assert statement == "SPDX-FileCopyrightText: Copyright (C) 2019 hello"
+
+
+def test_make_copyright_line_prefix_spdx_string_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="spdx-string"
+    )
+    assert statement == "SPDX-FileCopyrightText: Copyright 2019 hello"
+
+
+def test_make_copyright_line_prefix_spdx_string_symbol_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="spdx-string-symbol"
+    )
+    assert statement == "SPDX-FileCopyrightText: Copyright © 2019 hello"
+
+
+def test_make_copyright_line_prefix_string_symbol_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="string-symbol"
+    )
+    assert statement == "Copyright © 2019 hello"
+
+
+def test_make_copyright_line_prefix_symbol_year():
+    """Given a simple statement, prefix and a year, make it a copyright line."""
+    statement = make_copyright_line(
+        "hello", year=2019, copyright_prefix="symbol"
+    )
+    assert statement == "© 2019 hello"
+
+
+def test_make_copyright_line_existing_spdx_copyright():
+    """Given a copyright line, do nothing."""
+    value = "SPDX-FileCopyrightText: hello"
+    assert make_copyright_line(value) == value
+
+
+def test_make_copyright_line_existing_other_copyright():
+    """Given a non-SPDX copyright line, do nothing."""
+    value = "© hello"
+    assert make_copyright_line(value) == value
+
+
+def test_make_copyright_line_multine_error():
+    """Given a multiline argument, expect an error."""
+    with pytest.raises(RuntimeError):
+        make_copyright_line("hello\nworld")
+
+
+# REUSE-IgnoreEnd
diff --git a/tests/test_util.py b/tests/test_extract.py
similarity index 62%
rename from tests/test_util.py
rename to tests/test_extract.py
index e26445fe8..46bcad68e 100644
--- a/tests/test_util.py
+++ b/tests/test_extract.py
@@ -8,7 +8,7 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Tests for reuse._util"""
+"""Tests for reuse.extract"""
 
 import os
 from inspect import cleandoc
@@ -17,8 +17,13 @@
 import pytest
 from boolean.boolean import ParseError
 
-from reuse import _util
-from reuse._util import _LICENSING
+from reuse import _LICENSING, ReuseInfo
+from reuse.extract import (
+    decoded_text_from_binary,
+    detect_line_endings,
+    extract_reuse_info,
+    filter_ignore_block,
+)
 
 # REUSE-IgnoreStart
 
@@ -27,15 +32,13 @@ def test_extract_expression():
     """Parse various expressions."""
     expressions = ["GPL-3.0+", "GPL-3.0 AND CC0-1.0", "nonsense"]
     for expression in expressions:
-        result = _util.extract_reuse_info(
-            f"SPDX-License-Identifier: {expression}"
-        )
+        result = extract_reuse_info(f"SPDX-License-Identifier: {expression}")
         assert result.spdx_expressions == {_LICENSING.parse(expression)}
 
 
 def test_extract_expression_from_ascii_art_frame():
     """Parse an expression from an ASCII art frame"""
-    result = _util.extract_reuse_info(
+    result = extract_reuse_info(
         cleandoc(
             """
              /**********************************\\
@@ -51,20 +54,20 @@ def test_extract_erroneous_expression():
     """Parse an incorrect expression."""
     expression = "SPDX-License-Identifier: GPL-3.0-or-later AND (MIT OR)"
     with pytest.raises(ParseError):
-        _util.extract_reuse_info(expression)
+        extract_reuse_info(expression)
 
 
 def test_extract_no_info():
     """Given a string without REUSE information, return an empty ReuseInfo
     object.
     """
-    result = _util.extract_reuse_info("")
-    assert result == _util.ReuseInfo()
+    result = extract_reuse_info("")
+    assert result == ReuseInfo()
 
 
 def test_extract_tab():
     """A tag followed by a tab is also valid."""
-    result = _util.extract_reuse_info("SPDX-License-Identifier:\tMIT")
+    result = extract_reuse_info("SPDX-License-Identifier:\tMIT")
     assert result.spdx_expressions == {_LICENSING.parse("MIT")}
 
 
@@ -72,14 +75,14 @@ def test_extract_many_whitespace():
     """When a tag is followed by a lot of whitespace, the whitespace should be
     filtered out.
     """
-    result = _util.extract_reuse_info("SPDX-License-Identifier:    MIT")
+    result = extract_reuse_info("SPDX-License-Identifier:    MIT")
     assert result.spdx_expressions == {_LICENSING.parse("MIT")}
 
 
 def test_extract_bibtex_comment():
     """A special case for BibTex comments."""
     expression = "@Comment{SPDX-License-Identifier: GPL-3.0-or-later}"
-    result = _util.extract_reuse_info(expression)
+    result = extract_reuse_info(expression)
     assert str(list(result.spdx_expressions)[0]) == "GPL-3.0-or-later"
 
 
@@ -88,23 +91,21 @@ def test_extract_copyright():
     information.
     """
     copyright_line = "SPDX-FileCopyrightText: 2019 Jane Doe"
-    result = _util.extract_reuse_info(copyright_line)
+    result = extract_reuse_info(copyright_line)
     assert result.copyright_lines == {copyright_line}
 
 
 def test_extract_copyright_duplicate():
     """When a copyright line is duplicated, only yield one."""
     copyright_line = "SPDX-FileCopyrightText: 2019 Jane Doe"
-    result = _util.extract_reuse_info(
-        "\n".join((copyright_line, copyright_line))
-    )
+    result = extract_reuse_info("\n".join((copyright_line, copyright_line)))
     assert result.copyright_lines == {copyright_line}
 
 
 def test_extract_copyright_tab():
     """A tag followed by a tab is also valid."""
     copyright_line = "SPDX-FileCopyrightText:\t2019 Jane Doe"
-    result = _util.extract_reuse_info(copyright_line)
+    result = extract_reuse_info(copyright_line)
     assert result.copyright_lines == {copyright_line}
 
 
@@ -113,7 +114,7 @@ def test_extract_copyright_many_whitespace():
     whitespace is not filtered out.
     """
     copyright_line = "SPDX-FileCopyrightText:    2019 Jane Doe"
-    result = _util.extract_reuse_info(copyright_line)
+    result = extract_reuse_info(copyright_line)
     assert result.copyright_lines == {copyright_line}
 
 
@@ -133,7 +134,7 @@ def test_extract_copyright_variations():
         """
     )
 
-    result = _util.extract_reuse_info(text)
+    result = extract_reuse_info(text)
     lines = text.splitlines()
     for line in lines:
         assert line in result.copyright_lines
@@ -155,7 +156,7 @@ def test_extract_with_ignore_block():
         SPDX-FileCopyrightText: 2019 Eve
         """
     )
-    result = _util.extract_reuse_info(text)
+    result = extract_reuse_info(text)
     assert len(result.copyright_lines) == 2
     assert len(result.spdx_expressions) == 1
 
@@ -165,7 +166,7 @@ def test_extract_sameline_multiline():
     do not include the comment end pattern as part of the copyright.
     """
     text = "<!-- SPDX-FileCopyrightText: Jane Doe -->"
-    result = _util.extract_reuse_info(text)
+    result = extract_reuse_info(text)
     assert len(result.copyright_lines) == 1
     assert result.copyright_lines == {"SPDX-FileCopyrightText: Jane Doe"}
 
@@ -185,7 +186,7 @@ def test_extract_special_endings():
         [Copyright 2019 Ajnulo] ::
         """
     )
-    result = _util.extract_reuse_info(text)
+    result = extract_reuse_info(text)
     for item in result.copyright_lines:
         assert ">" not in item
         assert "] ::" not in item
@@ -198,7 +199,7 @@ def test_extract_contributors():
         # SPDX-FileContributor: Jane Doe
         """
     )
-    result = _util.extract_reuse_info(text)
+    result = extract_reuse_info(text)
     assert result.contributor_lines == {"Jane Doe"}
 
 
@@ -217,7 +218,7 @@ def test_filter_ignore_block_with_comment_style():
     )
     expected = "Relevant text\n# \nOther relevant text"
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
@@ -242,7 +243,7 @@ def test_filter_ignore_block_non_comment_style():
         """
     )
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
@@ -267,7 +268,7 @@ def test_filter_ignore_block_with_ignored_information_on_same_line():
         """
     )
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
@@ -284,7 +285,7 @@ def test_filter_ignore_block_with_relevant_information_on_same_line():
     )
     expected = "Relevant textOther relevant text"
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
@@ -305,7 +306,7 @@ def test_filter_ignore_block_with_beginning_and_end_on_same_line_correct_order()
         """
     )
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
@@ -316,7 +317,7 @@ def test_filter_ignore_block_with_beginning_and_end_on_same_line_wrong_order():
     text = "Relevant textREUSE-IgnoreEndOther relevant textREUSE-IgnoreStartIgnored text"  # pylint: disable=line-too-long
     expected = "Relevant textREUSE-IgnoreEndOther relevant text"
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
@@ -334,7 +335,7 @@ def test_filter_ignore_block_without_end():
     )
     expected = "Relevant text\n"
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
@@ -365,166 +366,49 @@ def test_filter_ignore_block_with_multiple_ignore_blocks():
         """
     )
 
-    result = _util.filter_ignore_block(text)
+    result = filter_ignore_block(text)
     assert result == expected
 
 
-def test_make_copyright_line_simple():
-    """Given a simple statement, make it a copyright line."""
-    assert _util.make_copyright_line("hello") == "SPDX-FileCopyrightText: hello"
-
-
-def test_make_copyright_line_year():
-    """Given a simple statement and a year, make it a copyright line."""
-    assert (
-        _util.make_copyright_line("hello", year="2019")
-        == "SPDX-FileCopyrightText: 2019 hello"
-    )
-
-
-def test_make_copyright_line_prefix_spdx():
-    """Given a simple statement and prefix, make it a copyright line."""
-    statement = _util.make_copyright_line("hello", copyright_prefix="spdx")
-    assert statement == "SPDX-FileCopyrightText: hello"
-
-
-def test_make_copyright_line_prefix_spdx_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="spdx"
-    )
-    assert statement == "SPDX-FileCopyrightText: 2019 hello"
-
-
-def test_make_copyright_line_prefix_spdx_c_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="spdx-c"
-    )
-    assert statement == "SPDX-FileCopyrightText: (C) 2019 hello"
-
-
-def test_make_copyright_line_prefix_spdx_symbol_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="spdx-symbol"
-    )
-    assert statement == "SPDX-FileCopyrightText: © 2019 hello"
-
-
-def test_make_copyright_line_prefix_string_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="string"
-    )
-    assert statement == "Copyright 2019 hello"
-
-
-def test_make_copyright_line_prefix_string_c_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="string-c"
-    )
-    assert statement == "Copyright (C) 2019 hello"
-
-
-def test_make_copyright_line_prefix_spdx_string_c_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="spdx-string-c"
-    )
-    assert statement == "SPDX-FileCopyrightText: Copyright (C) 2019 hello"
-
-
-def test_make_copyright_line_prefix_spdx_string_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="spdx-string"
-    )
-    assert statement == "SPDX-FileCopyrightText: Copyright 2019 hello"
-
-
-def test_make_copyright_line_prefix_spdx_string_symbol_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="spdx-string-symbol"
-    )
-    assert statement == "SPDX-FileCopyrightText: Copyright © 2019 hello"
-
-
-def test_make_copyright_line_prefix_string_symbol_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="string-symbol"
-    )
-    assert statement == "Copyright © 2019 hello"
-
-
-def test_make_copyright_line_prefix_symbol_year():
-    """Given a simple statement, prefix and a year, make it a copyright line."""
-    statement = _util.make_copyright_line(
-        "hello", year=2019, copyright_prefix="symbol"
-    )
-    assert statement == "© 2019 hello"
-
-
-def test_make_copyright_line_existing_spdx_copyright():
-    """Given a copyright line, do nothing."""
-    value = "SPDX-FileCopyrightText: hello"
-    assert _util.make_copyright_line(value) == value
-
-
-def test_make_copyright_line_existing_other_copyright():
-    """Given a non-SPDX copyright line, do nothing."""
-    value = "© hello"
-    assert _util.make_copyright_line(value) == value
-
-
-def test_make_copyright_line_multine_error():
-    """Given a multiline argument, expect an error."""
-    with pytest.raises(RuntimeError):
-        _util.make_copyright_line("hello\nworld")
-
-
 def test_decoded_text_from_binary_simple():
     """A unicode string encoded as bytes object decodes back correctly."""
     text = "Hello, world ☺"
     encoded = text.encode("utf-8")
-    assert _util.decoded_text_from_binary(BytesIO(encoded)) == text
+    assert decoded_text_from_binary(BytesIO(encoded)) == text
 
 
 def test_decoded_text_from_binary_size():
     """Only a given amount of bytes is decoded."""
     text = "Hello, world ☺"
     encoded = text.encode("utf-8")
-    assert _util.decoded_text_from_binary(BytesIO(encoded), size=5) == "Hello"
+    assert decoded_text_from_binary(BytesIO(encoded), size=5) == "Hello"
 
 
 def test_decoded_text_from_binary_crlf():
     """Given CRLF line endings, convert to LF."""
     text = "Hello\r\nworld"
     encoded = text.encode("utf-8")
-    assert _util.decoded_text_from_binary(BytesIO(encoded)) == "Hello\nworld"
+    assert decoded_text_from_binary(BytesIO(encoded)) == "Hello\nworld"
 
 
 def test_detect_line_endings_windows():
     """Given a CRLF string, detect the line endings."""
-    assert _util.detect_line_endings("hello\r\nworld") == "\r\n"
+    assert detect_line_endings("hello\r\nworld") == "\r\n"
 
 
 def test_detect_line_endings_mac():
     """Given a CR string, detect the line endings."""
-    assert _util.detect_line_endings("hello\rworld") == "\r"
+    assert detect_line_endings("hello\rworld") == "\r"
 
 
 def test_detect_line_endings_linux():
     """Given a LF string, detect the line endings."""
-    assert _util.detect_line_endings("hello\nworld") == "\n"
+    assert detect_line_endings("hello\nworld") == "\n"
 
 
 def test_detect_line_endings_no_newlines():
     """Given a file without line endings, default to os.linesep."""
-    assert _util.detect_line_endings("hello world") == os.linesep
+    assert detect_line_endings("hello world") == os.linesep
 
 
-# REUSE-IgnoreEnd
+# Reuse-IgnoreEnd
diff --git a/tests/test_global_licensing.py b/tests/test_global_licensing.py
index 62a69557a..1b7385381 100644
--- a/tests/test_global_licensing.py
+++ b/tests/test_global_licensing.py
@@ -13,8 +13,7 @@
 from debian.copyright import Copyright
 from license_expression import LicenseSymbol
 
-from reuse import ReuseInfo, SourceType
-from reuse._util import _LICENSING
+from reuse import _LICENSING, ReuseInfo, SourceType
 from reuse.exceptions import (
     GlobalLicensingParseError,
     GlobalLicensingParseTypeError,
diff --git a/tests/test_project.py b/tests/test_project.py
index 448a2bf79..3606816d3 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -19,8 +19,7 @@
 from conftest import RESOURCES_DIRECTORY
 from license_expression import LicenseSymbol
 
-from reuse import ReuseInfo, SourceType
-from reuse._util import _LICENSING
+from reuse import _LICENSING, ReuseInfo, SourceType
 from reuse.covered_files import iter_files
 from reuse.exceptions import (
     GlobalLicensingConflictError,