Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete support for time step modification #58

Merged
merged 10 commits into from
Sep 4, 2024
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
exclude: '^(?!cstar_ocean/).*'
exclude: '^(?!cstar/).*'
ci:
autoupdate_schedule: monthly

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
files: '^cstar_ocean/'
files: '^cstar/'
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand Down
218 changes: 149 additions & 69 deletions cstar/base/additional_code.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import shutil
import tempfile
from typing import Optional, List
from pathlib import Path
from cstar.base.datasource import DataSource
from cstar.base.base_model import BaseModel
from cstar.base.utils import _clone_and_checkout

Expand All @@ -10,59 +11,73 @@ class AdditionalCode:
"""
Additional code contributing to a unique instance of a base model, e.g. namelists, source modifications, etc.

Additional code is assumed to be kept in a git-controlled repository (`source_repo`), and obtaining the code
is handled by git commands.
Additional code is assumed to be kept in a single directory or repository (described by the `source` attribute)
with this structure:

<additional_code_dir>
├── namelists
| └ <base_model_name>
| ├ <namelist_file_1>
| | ...
| └ <namelist_file_N>
└── source_mods
└ <base_model_name>
├ <source_code_file_1>
| ...
└ <source_code_file_N>

Attributes:
-----------
base_model: BaseModel
The base model with which this additional code is associated
source_repo: str
URL pointing to a git-controlled repository containing the additional code
checkout_target: str
A tag, git hash, or other target to check out the source repo at the correct point in its history
source_mods: str or list of strs
Path(s) from the top level of `source_repo` to any code that is needed to compile a unique instance of the base model
source: DataSource
Describes the location and type of source data (e.g. repository,directory)
checkout_target: Optional, str
Used if source.source_type is 'repository'. A tag, git hash, or other target to check out.
source_mods: Optional, str or list of strs
Path(s) relative to the top level of `source.location` to any code that is needed to compile a unique instance of the base model
namelists: str or list of strs
Path(s) from the top level of `source_repo` to any code that is needed at runtime for the base model
Path(s) relative to the top level of `source.location` to any code that is needed at runtime for the base model
exists_locally: bool, default None
True if the additional code has been fetched to the local machine, set when `check_exists_locally()` method is called

Set to True if source.location_type is 'path', or if AdditionalCode.get() has been called.
Is also set by the `check_exists_locally()` method.
local_path: str, default None
The path to where the additional code has been fetched locally, set when the `get()` method is called
The local path to the additional code. Set when `get()` method is called, or if source.location_type is 'path'.

Methods:
--------
get(local_path):
Clone the `source_repo` repository to a temporary directory, checkout `checkout_target`,
and move files associated with this AdditionalCode instance to `local_path`.
check_exists_locally(local_path):
Verify whether the files associated with this AdditionalCode instance can be found at `local_path`
get(local_dir):
Fetch the directory containing this additional code and copy it to `local_dir`.
If source.source_type is 'repository', and source.location_type is 'url',
clone repository to a temporary directory, checkout `checkout_target`,
and move files associated with this AdditionalCode instance to `local_dir`.
check_exists_locally(local_dir):
Verify whether the files associated with this AdditionalCode instance can be found at `local_dir`
"""

def __init__(
self,
base_model: BaseModel,
source_repo: str,
checkout_target: str,
location: str,
checkout_target: Optional[str] = None,
source_mods: Optional[List[str]] = None,
namelists: Optional[List[str]] = None,
):
"""
Initialize an AdditionalCode object from a repository URL and a list of code files
Initialize an AdditionalCode object from a DataSource and a list of code files

Parameters:
-----------
base_model: BaseModel
The base model with which this additional code is associated
source_repo: str
URL pointing to a git-controlled repository containing the additional code
checkout_target: str
A tag, git hash, or other target to check out the source repo at the correct point in its history
source_mods: str or list of strs
Path(s) from the top level of `source_repo` to any code that is needed to compile a unique instance of the base model
namelists: str or list of strs
Path(s) from the top level of `source_repo` to any code that is needed at runtime for the base model
location: str
url or path pointing to the additional code directory or repository, used to set `source` attribute
checkout_target: Optional, str
Used if source.source_type is 'repository'. A tag, git hash, or other target to check out.
source_mods: Optional, str or list of strs
Path(s) relative to the top level of `source.location` to any code that is needed to compile a unique instance of the base model
namelists: Optional, str or list of strs
Path(s) relative to the top level of `source.location` to any code that is needed at runtime for the base model

Returns:
--------
Expand All @@ -73,81 +88,143 @@ def __init__(

# TODO: Type check here
self.base_model: BaseModel = base_model
self.source_repo: str = source_repo
self.checkout_target: str = checkout_target
self.source: DataSource = DataSource(location)
self.checkout_target: Optional[str] = checkout_target
self.source_mods: Optional[List[str]] = source_mods
self.namelists: Optional[List[str]] = namelists
self.exists_locally: Optional[bool] = None
self.local_path: Optional[str] = None

# If there are namelists, make a parallel attribute to keep track of the ones we are editing
# AdditionalCode.get() determines which namelists are editable templates and updates this list
if self.namelists:
self.modified_namelists: list = []

if self.source.location_type == "path":
self.exists_locally = True
self.local_path = self.source.location

def __str__(self):
base_str = "AdditionalCode object" # associated with {self.base_model.name} base model"
base_str = (
"AdditionalCode" # associated with {self.base_model.name} base model"
)
base_str += "\n---------------------"
base_str += f"\nBase model: {self.base_model.name}"
base_str += f"\nAdditional code repository URL: {self.source_repo} (checkout target: {self.checkout_target})"
base_str += f"\nLocation: {self.source.location}"
if self.exists_locally is not None:
base_str += f"\n Exists locally: {self.exists_locally}"
if self.local_path is not None:
base_str += f"\n Local path: {self.local_path}"
if self.source_mods is not None:
base_str += "\nSource code modification files (paths relative to repository top level):"
base_str += (
"\nSource code modification files (paths relative to above location)):"
)
for filename in self.source_mods:
base_str += f"\n {filename}"
if self.namelists is not None:
base_str += "\nNamelist files (paths relative to repository top level):"
base_str += "\nNamelist files (paths relative to above location):"
for filename in self.namelists:
base_str += f"\n {filename}"
if filename[-9:] == "_TEMPLATE":
base_str += f" ({filename[:-9]} will be used by C-Star based on this template)"
return base_str

def __repr__(self):
return self.__str__()

def get(self, local_path: str):
def get(self, local_dir: str):
"""
Clone `source_repo` into a temporary directory and move required files to `local_path`.

This method:
1. Clones the `source_repo` repository into a temporary directory (deleted after call)
2. Checks out the `checkout_target` (a tag or commit hash) to move to the correct point in the commit history
3. Loops over the paths described in `source_mods` and `namelists` and
moves those files to `local_path/source_mods/base_model.name/` and `local_path/namelists/base_model.name`,
respectively.
Copy the required AdditionalCode files to `local_dir`

Clone the `source_repo` repository to a temporary directory, checkout `checkout_target`,
and move files associated with this AdditionalCode instance to `local_path`.
If AdditionalCode.source describes a remote repository, this is cloned into a temporary directory first.

Parameters:
-----------
local_path: str
local_dir: str
The local path (typically `Case.caseroot`) where the additional code will be curated
"""
with tempfile.TemporaryDirectory() as tmp_dir:
_clone_and_checkout(
source_repo=self.source_repo,
local_path=tmp_dir,
checkout_target=self.checkout_target,
)

try:
tmp_dir = None # initialise the tmp_dir variable in case we need it later

# CASE 1: Additional code is in a remote repository:
if (self.source.location_type == "url") and (
self.source.source_type == "repository"
):
if self.checkout_target is None:
raise ValueError(
"AdditionalCode.source points to a repository but AdditionalCode.checkout_target is None"
)
else:
assert isinstance(
self.checkout_target, str
), "We have just verified checkout_target is not None"
tmp_dir = tempfile.mkdtemp()
_clone_and_checkout(
source_repo=self.source.location,
local_path=tmp_dir,
checkout_target=self.checkout_target,
)
source_dir = Path(tmp_dir)
# CASE 2: Additional code is in a local directory/repository
elif (self.source.location_type == "path") and (
(self.source.source_type == "directory")
or (self.source.source_type == "repository")
):
source_dir = Path(self.source.location)

else:
raise ValueError(
"Invalid source for AdditionalCode. "
+ "AdditionalCode.source.location_type and "
+ "AdditionalCode.source.source_type should be "
+ "'url' and 'repository', or 'path' and 'repository', or"
+ "'path' and 'directory', not"
+ f"'{self.source.location_type}' and '{self.source.source_type}'"
)

# Now go through the files and copy them to local_dir
for file_type in ["source_mods", "namelists"]:
file_list = getattr(self, file_type)

if file_list is None:
continue
tgt_dir = local_path + "/" + file_type + "/" + self.base_model.name
os.makedirs(tgt_dir, exist_ok=True)
tgt_dir = Path(local_dir) / file_type / self.base_model.name
tgt_dir.mkdir(parents=True, exist_ok=True)

for f in file_list:
tmp_file_path = tmp_dir + "/" + f
tgt_file_path = tgt_dir + "/" + os.path.basename(f)
print("moving " + tmp_file_path + " to " + tgt_file_path)
if os.path.exists(tmp_file_path):
shutil.move(tmp_file_path, tgt_file_path)
src_file_path = source_dir / f
tgt_file_path = tgt_dir / Path(f).name
print(f"copying {src_file_path} to {tgt_file_path}")
if src_file_path.exists():
shutil.copy(src_file_path, tgt_file_path)
else:
raise FileNotFoundError(
f"Error: {tmp_file_path} does not exist."
f"Error: {src_file_path} does not exist."
)
self.local_path = local_path
self.exists_locally = True
# Special case for template namelists:
if (
file_type == "namelists"
and str(src_file_path)[-9:] == "_TEMPLATE"
):
print(
f"copying {tgt_file_path} to editable namelist {str(tgt_file_path)[:-9]}"
)
shutil.copy(tgt_file_path, str(tgt_file_path)[:-9])
if hasattr(self, "modified_namelists"):
self.modified_namelists.append(f[:-9])
else:
self.modified_namelists = [
f[:-9],
]

self.local_path = local_dir
self.exists_locally = True
finally:
if tmp_dir:
shutil.rmtree(tmp_dir)

def check_exists_locally(self, local_path: str) -> bool:
def check_exists_locally(self, local_dir: str) -> bool:
"""
Checks whether this AdditionalCode has already been fetched to the local machine

Expand All @@ -156,7 +233,7 @@ def check_exists_locally(self, local_path: str) -> bool:

Parameters:
-----------
local_path (str):
local_dir (str):
The local path to check for the existence of this additional code

Returns:
Expand All @@ -171,14 +248,17 @@ def check_exists_locally(self, local_path: str) -> bool:
file_list = getattr(self, file_type)
if file_list is None:
continue
tgt_dir = local_path + "/" + file_type + "/" + self.base_model.name

# tgt_dir = local_dir + "/" + file_type + "/" + self.base_model.name
tgt_dir = Path(local_dir) / file_type / self.base_model.name
for f in file_list:
tgt_file_path = tgt_dir + "/" + os.path.basename(f)
if not os.path.exists(tgt_file_path):
# tgt_file_path = tgt_dir + "/" + os.path.basename(f)
tgt_file_path = tgt_dir / Path(f).name
if not tgt_file_path.exists():
self.exists_locally = False
return False

if not self.exists_locally:
self.local_path = local_path
self.local_path = local_dir
self.exists_locally = True
return True
7 changes: 4 additions & 3 deletions cstar/base/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,15 @@ def __str__(self):
if isinstance(self.input_datasets, InputDataset)
else 0
)
base_str += f"\n{NAC} AdditionalCode repositories (query using ROMSComponent.additional_code)"
base_str += f"\n{NAC} AdditionalCode repositories (query using Component.additional_code)"
base_str += (
f"\n{NID} InputDataset objects (query using ROMSComponent.input_datasets"
f"\n{NID} InputDataset objects (query using Component.input_datasets"
)

# Discretisation
disc_str = ""
if hasattr(self, "time_step") and self.time_step is not None:
disc_str += "\ntime_step: " + str(self.time_step)
disc_str += "\ntime_step: " + str(self.time_step) + "s"
if hasattr(self, "n_procs_x") and self.n_procs_x is not None:
disc_str += (
"\nn_procs_x: "
Expand Down
Loading
Loading