From 07e849f03d23a2ef425918dde3952c3c74898e10 Mon Sep 17 00:00:00 2001
From: Jairo Llopis <yajo.sk8@gmail.com>
Date: Mon, 30 Dec 2024 10:19:01 +0000
Subject: [PATCH] feat(external_data): load data from other YAML files

When composing templates, it's often needed to be able to load answers from other templates that you know are usually combined with yours. Or any other kind of external data.

@moduon MT-8282
---
 copier/errors.py          |  4 ++
 copier/main.py            | 91 +++++++++++++++++++++++++++----------
 copier/template.py        | 10 ++++-
 copier/types.py           |  2 +
 copier/user_data.py       | 42 +++++++++++------
 docs/configuring.md       | 69 ++++++++++++++++++++++++++++
 docs/creating.md          | 11 +++++
 tests/test_answersfile.py | 95 ++++++++++++++++++++++++++++++++++++++-
 tests/test_config.py      |  2 +-
 9 files changed, 285 insertions(+), 41 deletions(-)

diff --git a/copier/errors.py b/copier/errors.py
index 05d4180ee..5d9cb5f23 100644
--- a/copier/errors.py
+++ b/copier/errors.py
@@ -131,3 +131,7 @@ class DirtyLocalWarning(UserWarning, CopierWarning):
 
 class ShallowCloneWarning(UserWarning, CopierWarning):
     """The template repository is a shallow clone."""
+
+
+class MissingFileWarning(UserWarning, CopierWarning):
+    """I still couldn't find what I'm looking for."""
diff --git a/copier/main.py b/copier/main.py
index 351e42fe7..dd2540836 100644
--- a/copier/main.py
+++ b/copier/main.py
@@ -60,16 +60,31 @@
 from .types import (
     MISSING,
     AnyByStrDict,
+    AnyByStrMutableMapping,
     JSONSerializable,
     RelativePath,
     StrOrPath,
 )
-from .user_data import DEFAULT_DATA, AnswersMap, Question
+from .user_data import AnswersMap, Question, load_answersfile_data
 from .vcs import get_git
 
 _T = TypeVar("_T")
 
 
+# HACK https://github.com/copier-org/copier/pull/1880#discussion_r1887491497
+class _LazyDict:
+    """A dict where values are functions that get evaluated only once when requested."""
+
+    def __init__(self, **kwargs: Callable[[], Any]):
+        self.pending = kwargs
+        self.done = {}
+
+    def __getitem__(self, key: str) -> Any:
+        if key not in self.done:
+            self.done[key] = self.pending[key]()
+        return self.done[key]
+
+
 @dataclass(config=ConfigDict(extra="forbid"))
 class Worker:
     """Copier process state manager.
@@ -263,7 +278,27 @@ def _check_unsafe(self, mode: Literal["copy", "update"]) -> None:
         if features:
             raise UnsafeTemplateError(sorted(features))
 
+    def _external_data(self) -> _LazyDict:
+        """Load external data lazily.
+
+        Result keys are used for rendering, and values are the parsed contents
+        of the YAML files specified in [external_data][].
+
+        Files will only be parsed lazily on 1st access. This helps avoiding
+        circular dependencies when the file name also comes from a variable.
+        """
+        return _LazyDict(
+            **{
+                name: lambda path=path: load_answersfile_data(
+                    self.dst_path, self._render_string(path)
+                )
+                for name, path in self.template.external_data.items()
+            }
+        )
+
     def _print_message(self, message: str) -> None:
+        # On first use, at least we need the system render context
+        self.answers.system = self._system_render_context()
         if message and not self.quiet:
             print(self._render_string(message), file=sys.stderr)
 
@@ -330,12 +365,18 @@ def _execute_tasks(self, tasks: Sequence[Task]) -> None:
             with local.cwd(working_directory), local.env(**extra_env):
                 subprocess.run(task_cmd, shell=use_shell, check=True, env=local.env)
 
-    def _render_context(self) -> Mapping[str, Any]:
-        """Produce render context for Jinja."""
+    def _system_render_context(self) -> AnyByStrMutableMapping:
+        """System reserved render context.
+
+        Most keys start with `_` because they're reserved.
+
+        Resolution of computed values is deferred until used for the 1st time.
+        """
         # Backwards compatibility
         # FIXME Remove it?
         conf = asdict(self)
         conf.pop("_cleanup_hooks")
+        conf.pop("answers")
         conf.update(
             {
                 "answers_file": self.answers_relpath,
@@ -345,12 +386,10 @@ def _render_context(self) -> Mapping[str, Any]:
                 "os": OS,
             }
         )
-
         return dict(
-            DEFAULT_DATA,
-            **self.answers.combined,
             _copier_answers=self._answers_to_remember(),
             _copier_conf=conf,
+            _external_data=self._external_data(),
             _folder_name=self.subproject.local_abspath.name,
             _copier_python=sys.executable,
         )
@@ -455,41 +494,42 @@ def _render_allowed(
 
     def _ask(self) -> None:  # noqa: C901
         """Ask the questions of the questionnaire and record their answers."""
-        result = AnswersMap(
+        self.answers = AnswersMap(
             user_defaults=self.user_defaults,
             init=self.data,
             last=self.subproject.last_answers,
             metadata=self.template.metadata,
+            system=self._system_render_context(),
         )
 
         for var_name, details in self.template.questions_data.items():
             question = Question(
-                answers=result,
+                answers=self.answers,
                 jinja_env=self.jinja_env,
                 var_name=var_name,
                 **details,
             )
             # Delete last answer if it cannot be parsed or validated, so a new
             # valid answer can be provided.
-            if var_name in result.last:
+            if var_name in self.answers.last:
                 try:
-                    answer = question.parse_answer(result.last[var_name])
+                    answer = question.parse_answer(self.answers.last[var_name])
                 except Exception:
-                    del result.last[var_name]
+                    del self.answers.last[var_name]
                 else:
                     if question.validate_answer(answer):
-                        del result.last[var_name]
+                        del self.answers.last[var_name]
             # Skip a question when the skip condition is met.
             if not question.get_when():
                 # Omit its answer from the answers file.
-                result.hide(var_name)
+                self.answers.hide(var_name)
                 # Skip immediately to the next question when it has no default
                 # value.
                 if question.default is MISSING:
                     continue
-            if var_name in result.init:
+            if var_name in self.answers.init:
                 # Try to parse the answer value.
-                answer = question.parse_answer(result.init[var_name])
+                answer = question.parse_answer(self.answers.init[var_name])
                 # Try to validate the answer value if the question has a
                 # validator.
                 if err_msg := question.validate_answer(answer):
@@ -498,10 +538,10 @@ def _ask(self) -> None:  # noqa: C901
                     )
                 # At this point, the answer value is valid. Do not ask the
                 # question again, but set answer as the user's answer instead.
-                result.user[var_name] = answer
+                self.answers.user[var_name] = answer
                 continue
             # Skip a question when the user already answered it.
-            if self.skip_answered and var_name in result.last:
+            if self.skip_answered and var_name in self.answers.last:
                 continue
 
             # Display TUI and ask user interactively only without --defaults
@@ -516,10 +556,12 @@ def _ask(self) -> None:  # noqa: C901
                         answers={question.var_name: question.get_default()},
                     )[question.var_name]
             except KeyboardInterrupt as err:
-                raise CopierAnswersInterrupt(result, question, self.template) from err
-            result.user[var_name] = new_answer
-
-        self.answers = result
+                raise CopierAnswersInterrupt(
+                    self.answers, question, self.template
+                ) from err
+            self.answers.user[var_name] = new_answer
+        # Update system render context, which may depend on answers
+        self.answers.system = self._system_render_context()
 
     @property
     def answers_relpath(self) -> Path:
@@ -644,7 +686,7 @@ def _render_file(self, src_relpath: Path, dst_relpath: Path) -> None:
                 # suffix is empty, fallback to copy
                 new_content = src_abspath.read_bytes()
             else:
-                new_content = tpl.render(**self._render_context()).encode()
+                new_content = tpl.render(**self.answers.combined).encode()
         else:
             new_content = src_abspath.read_bytes()
         dst_abspath = self.subproject.local_abspath / dst_relpath
@@ -766,7 +808,7 @@ def _render_string(
                 Additional variables to use for rendering the template.
         """
         tpl = self.jinja_env.from_string(string)
-        return tpl.render(**self._render_context(), **(extra_context or {}))
+        return tpl.render(**self.answers.combined, **(extra_context or {}))
 
     def _render_value(
         self, value: _T, extra_context: AnyByStrDict | None = None
@@ -984,7 +1026,7 @@ def _apply_update(self) -> None:  # noqa: C901
                 )
             # Clear last answers cache to load possible answers migration, if skip_answered flag is not set
             if self.skip_answered is False:
-                self.answers = AnswersMap()
+                self.answers = AnswersMap(system=self._system_render_context())
                 with suppress(AttributeError):
                     del self.subproject.last_answers
             # Do a normal update in final destination
@@ -1000,6 +1042,7 @@ def _apply_update(self) -> None:  # noqa: C901
             ) as current_worker:
                 current_worker.run_copy()
                 self.answers = current_worker.answers
+                self.answers.system = self._system_render_context()
             # Render with the same answers in an empty dir to avoid pollution
             with replace(
                 self,
diff --git a/copier/template.py b/copier/template.py
index 6e39fea9b..d58dbd978 100644
--- a/copier/template.py
+++ b/copier/template.py
@@ -10,7 +10,7 @@
 from functools import cached_property
 from pathlib import Path, PurePosixPath
 from shutil import rmtree
-from typing import Any, Literal, Mapping, Sequence
+from typing import Any, Dict, Literal, Mapping, Sequence
 from warnings import warn
 
 import dunamai
@@ -329,6 +329,14 @@ def exclude(self) -> tuple[str, ...]:
             )
         )
 
+    @cached_property
+    def external_data(self) -> Dict[str, str]:
+        """Get external data files specified in the template.
+
+        See [external_data][].
+        """
+        return self.config_data.get("external_data", {})
+
     @cached_property
     def jinja_extensions(self) -> tuple[str, ...]:
         """Get Jinja2 extensions specified in the template, or `()`.
diff --git a/copier/types.py b/copier/types.py
index be9a5de91..740cf7dd4 100644
--- a/copier/types.py
+++ b/copier/types.py
@@ -7,6 +7,7 @@
     Dict,
     Literal,
     Mapping,
+    MutableMapping,
     NewType,
     Optional,
     Sequence,
@@ -19,6 +20,7 @@
 # simple types
 StrOrPath = Union[str, Path]
 AnyByStrDict = Dict[str, Any]
+AnyByStrMutableMapping = MutableMapping[str, Any]
 
 # sequences
 IntSeq = Sequence[int]
diff --git a/copier/user_data.py b/copier/user_data.py
index 6672128e0..80d5f1b84 100644
--- a/copier/user_data.py
+++ b/copier/user_data.py
@@ -23,9 +23,9 @@
 from pygments.lexers.data import JsonLexer, YamlLexer
 from questionary.prompts.common import Choice
 
-from .errors import InvalidTypeError, UserMessageError
+from .errors import InvalidTypeError, MissingFileWarning, UserMessageError
 from .tools import cast_to_bool, cast_to_str, force_str_end
-from .types import MISSING, AnyByStrDict, MissingType, OptStrOrPath, StrOrPath
+from .types import MISSING, AnyByStrDict, AnyByStrMutableMapping, MissingType, StrOrPath
 
 
 # TODO Remove these two functions as well as DEFAULT_DATA in a future release
@@ -83,17 +83,21 @@ class AnswersMap:
             Default data from the user e.g. previously completed and restored data.
 
             See [copier.main.Worker][].
+
+        system:
+            Automatic context generated by the [Worker][copier.main.Worker].
     """
 
     # Private
     hidden: set[str] = field(default_factory=set, init=False)
 
     # Public
-    user: AnyByStrDict = field(default_factory=dict)
-    init: AnyByStrDict = field(default_factory=dict)
-    metadata: AnyByStrDict = field(default_factory=dict)
-    last: AnyByStrDict = field(default_factory=dict)
-    user_defaults: AnyByStrDict = field(default_factory=dict)
+    user: AnyByStrMutableMapping = field(default_factory=dict)
+    init: AnyByStrMutableMapping = field(default_factory=dict)
+    metadata: AnyByStrMutableMapping = field(default_factory=dict)
+    last: AnyByStrMutableMapping = field(default_factory=dict)
+    user_defaults: AnyByStrMutableMapping = field(default_factory=dict)
+    system: AnyByStrMutableMapping = field(default_factory=dict)
 
     @property
     def combined(self) -> Mapping[str, Any]:
@@ -105,6 +109,7 @@ def combined(self) -> Mapping[str, Any]:
                 self.metadata,
                 self.last,
                 self.user_defaults,
+                self.system,
                 DEFAULT_DATA,
             )
         )
@@ -125,6 +130,15 @@ class Question:
     All attributes are init kwargs.
 
     Attributes:
+        var_name:
+            Question name in the answers dict.
+
+        answers:
+            A map containing the answers provided by the user.
+
+        jinja_env:
+            The Jinja environment used to rendering answers.
+
         choices:
             Selections available for the user if the question requires them.
             Can be templated.
@@ -155,13 +169,10 @@ class Question:
             If the question type is str, it will hide user input on the screen
             by displaying asterisks: `****`.
 
-        type_name:
+        type:
             The type of question. Affects the rendering, validation and filtering.
             Can be templated.
 
-        var_name:
-            Question name in the answers dict.
-
         validator:
             Jinja template with which to validate the user input. This template
             will be rendered with the combined answers as variables; it should
@@ -487,13 +498,16 @@ def parse_yaml_string(string: str) -> Any:
 
 def load_answersfile_data(
     dst_path: StrOrPath,
-    answers_file: OptStrOrPath = None,
+    answers_file: StrOrPath = ".copier-answers.yml",
 ) -> AnyByStrDict:
     """Load answers data from a `$dst_path/$answers_file` file if it exists."""
     try:
-        with open(Path(dst_path) / (answers_file or ".copier-answers.yml")) as fd:
+        with open(Path(dst_path) / answers_file) as fd:
             return yaml.safe_load(fd)
-    except FileNotFoundError:
+    except (FileNotFoundError, IsADirectoryError):
+        warnings.warn(
+            f"File not found; returning empty dict: {answers_file}", MissingFileWarning
+        )
         return {}
 
 
diff --git a/docs/configuring.md b/docs/configuring.md
index 636f04ea3..a9342ddcd 100644
--- a/docs/configuring.md
+++ b/docs/configuring.md
@@ -841,6 +841,75 @@ contains your data.
 
     Command line arguments passed via `--data` always take precedence over the data file.
 
+### `external_data`
+
+-   Format: `dict[str, str]`
+-   CLI flags: N/A
+-   Default value: `{}`
+
+This allows using preexisting data inside the rendering context. The format is a dict of
+strings, where:
+
+-   The dict key will be the namespace of the data under [`_external_data`][].
+-   The dict value is the relative path (from the subproject destination) where the YAML
+    data file should be found.
+
+!!! example "Template composition"
+
+    If your template is
+    [a complement of another template][applying-multiple-templates-to-the-same-subproject],
+    you can access the other template's answers with a pattern similar to this:
+
+    ```yaml title="copier.yml"
+    # Child template defaults to a different answers file, to avoid conflicts
+    _answers_file: .copier-answers.child-tpl.yml
+
+    # Child template loads parent answers
+    _external_data_files:
+        # A dynamic path. Make sure you answer that question
+        # before the first access to the data (with `_external_data.parent_tpl`)
+        parent_tpl: "{{ parent_tpl_answers_file }}"
+
+    # Ask user where they stored parent answers
+    parent_tpl_answers_file:
+        help: Where did you store answers of the parent template?
+        default: .copier-answers.yml
+
+    # Use a parent answer as the default value for a child question
+    target_version:
+        help: What version are you deploying?
+        # We already answered the `parent_tpl_answers_file` question, so we can
+        # now correctly access the external data from `_external_data.parent_tpl`
+        default: "{{ _external_data.parent_tpl.target_version }}"
+    ```
+
+!!! example "Loading secrets"
+
+    If your template has [secret questions][secret_questions], you can load the secrets
+    and use them, e.g., as default answers with a pattern similar to this:
+
+    ```yaml
+    # Template loads secrets from Git-ignored file
+    _external_data_files:
+        # A static path. If missing, it will return an empty dict
+        secrets: .secrets.yaml
+
+    # Use a secret answers as the default value for a secret question
+    password:
+        help: What is the password?
+        secret: true
+        # If `.secrets.yaml` exists, it has been loaded at this point and we can
+        # now correctly access the external data from `_external_data.secrets`
+        default: "{{ _external_data.secrets.password }}"
+    ```
+
+    A template might even render `.secrets.yaml` with the answers to secret questions
+    similar to this:
+
+    ```yaml title=".secrets.yaml.jinja"
+    password: "{{ password }}"
+    ```
+
 ### `envops`
 
 -   Format: `dict`
diff --git a/docs/creating.md b/docs/creating.md
index d0415f377..a64e701e3 100644
--- a/docs/creating.md
+++ b/docs/creating.md
@@ -121,6 +121,17 @@ The current commit hash from the template.
 
 The absolute path of the Python interpreter running Copier.
 
+### `_external_data`
+
+A dict of the data contained in [external_data][].
+
+When rendering the template, that data will be exposed in the special `_external_data`
+variable:
+
+-   Keys will be the same as in [external_data][].
+-   Values will be the files contents parsed as YAML. JSON is also compatible.
+-   Parsing is done lazily on first use.
+
 ### `_folder_name`
 
 The name of the project root directory.
diff --git a/tests/test_answersfile.py b/tests/test_answersfile.py
index e09cf090a..c3c405698 100644
--- a/tests/test_answersfile.py
+++ b/tests/test_answersfile.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 from pathlib import Path
 from textwrap import dedent
 
@@ -8,7 +9,7 @@
 import copier
 from copier.user_data import load_answersfile_data
 
-from .helpers import BRACKET_ENVOPS_JSON, SUFFIX_TMPL, build_file_tree
+from .helpers import BRACKET_ENVOPS_JSON, SUFFIX_TMPL, build_file_tree, git_save
 
 
 @pytest.fixture(scope="module")
@@ -129,3 +130,95 @@ def test_answersfile(
     assert log["round"] == "2nd"
     assert "password_1" not in log
     assert "password_2" not in log
+
+
+def test_external_data(tmp_path_factory: pytest.TempPathFactory) -> None:
+    parent1, parent2, child, dst = map(
+        tmp_path_factory.mktemp, ("parent1", "parent2", "child", "dst")
+    )
+    build_file_tree(
+        {
+            (parent1 / "copier.yaml"): "{name: P1, child: C1}",
+            (parent1 / "parent1.txt.jinja"): "{{ name }}",
+            (
+                parent1 / "{{ _copier_conf.answers_file }}.jinja"
+            ): "{{ _copier_answers|to_nice_yaml -}}",
+            (parent2 / "copier.yaml"): "name: P2",
+            (parent2 / "parent2.txt.jinja"): "{{ name }}",
+            (
+                parent2 / "{{ _copier_conf.answers_file }}.jinja"
+            ): "{{ _copier_answers|to_nice_yaml -}}",
+            (child / "copier.yml"): (
+                """\
+                _external_data:
+                    parent1: .copier-answers.yml
+                    parent2: "{{ parent2_answers }}"
+                parent2_answers: .parent2-answers.yml
+                name: "{{ _external_data.parent2.child | d(_external_data.parent1.child) }}"
+                """
+            ),
+            (child / "combined.json.jinja"): """\
+                {
+                    "parent1": {{ _external_data.parent1.name | tojson }},
+                    "parent2": {{ _external_data.parent2.name | tojson }},
+                    "child": {{ name | tojson }}
+                }
+            """,
+            (
+                child / "{{ _copier_conf.answers_file }}.jinja"
+            ): "{{ _copier_answers|to_nice_yaml -}}",
+        }
+    )
+    git_save(parent1, tag="v1.0+parent1")
+    git_save(parent2, tag="v1.0+parent2")
+    git_save(child, tag="v1.0+child")
+    # Apply parent 1. At this point we don't know we'll want more than 1
+    # template in the same subproject, so we leave the default answers file.
+    copier.run_copy(str(parent1), dst, defaults=True, overwrite=True)
+    git_save(dst)
+    assert (dst / "parent1.txt").read_text() == "P1"
+    expected_parent1_answers = {
+        "_src_path": str(parent1),
+        "_commit": "v1.0+parent1",
+        "name": "P1",
+        "child": "C1",
+    }
+    assert load_answersfile_data(dst, ".copier-answers.yml") == expected_parent1_answers
+    # Apply parent 2. It uses a different answers file.
+    copier.run_copy(
+        str(parent2),
+        dst,
+        defaults=True,
+        overwrite=True,
+        answers_file=".parent2-answers.yml",
+    )
+    git_save(dst)
+    assert (dst / "parent2.txt").read_text() == "P2"
+    expected_parent2_answers = {
+        "_commit": "v1.0+parent2",
+        "_src_path": str(parent2),
+        "name": "P2",
+    }
+    assert (
+        load_answersfile_data(dst, ".parent2-answers.yml") == expected_parent2_answers
+    )
+    # Apply child. It can access answers from both parents.
+    copier.run_copy(
+        str(child),
+        dst,
+        defaults=True,
+        overwrite=True,
+        answers_file=".child-answers.yml",
+    )
+    git_save(dst)
+    assert load_answersfile_data(dst, ".child-answers.yml") == {
+        "_commit": "v1.0+child",
+        "_src_path": str(child),
+        "name": "C1",
+        "parent2_answers": ".parent2-answers.yml",
+    }
+    assert json.loads((dst / "combined.json").read_text()) == {
+        "parent1": "P1",
+        "parent2": "P2",
+        "child": "C1",
+    }
diff --git a/tests/test_config.py b/tests/test_config.py
index 1c13dd146..54363c989 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -292,7 +292,7 @@ def is_subdict(small: dict[Any, Any], big: dict[Any, Any]) -> bool:
 def test_worker_good_data(tmp_path: Path) -> None:
     # This test is probably useless, as it tests the what and not the how
     conf = copier.Worker("./tests/demo_data", tmp_path)
-    assert conf._render_context()["_folder_name"] == tmp_path.name
+    assert conf._system_render_context()["_folder_name"] == tmp_path.name
     assert conf.all_exclusions == ("exclude1", "exclude2")
     assert conf.template.skip_if_exists == ["skip_if_exists1", "skip_if_exists2"]
     assert conf.template.tasks == [