From 1669a7cab4c977ee9b5c224c6132eb4dcfb61b89 Mon Sep 17 00:00:00 2001 From: Mike Walmsley Date: Thu, 11 Apr 2024 15:45:17 -0400 Subject: [PATCH 1/5] add a few new models --- docs/pretrained_models.rst | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/docs/pretrained_models.rst b/docs/pretrained_models.rst index 84a1e34b..c75e95c0 100755 --- a/docs/pretrained_models.rst +++ b/docs/pretrained_models.rst @@ -32,19 +32,29 @@ Zoobot includes weights for the following pretrained models: - Test loss - Finetune - HF |:hugging:| + * - ConvNeXT-Pico + - 9.1M + - 19.33 + - Yes + - `Link `__ * - ConvNeXT-Nano - 15.6M - 19.23 - Yes - `Link `__ + * - ConvNeXT-Tiny + - 44.6M + - 19.08 + - Yes + - `Link `__ * - ConvNeXT-Small - 58.5M - - 19.14 + - 19.06 - Yes - `Link `__ * - ConvNeXT-Base - 88.6M - - **19.04** + - **19.05** - Yes - `Link `__ * - ConvNeXT-Large @@ -52,6 +62,11 @@ Zoobot includes weights for the following pretrained models: - 19.09 - Yes - `Link `__ + * - MaxViT-Tiny + - 29.1M + - 19.22 + - Yes + - `Link `__ * - MaxViT-Small - 64.9M - 19.20 @@ -61,7 +76,7 @@ Zoobot includes weights for the following pretrained models: - 124.5 - 19.09 - Yes - - TODO + - `Link `__ * - Max-ViT-Large - 211.8M - 19.18 @@ -71,12 +86,12 @@ Zoobot includes weights for the following pretrained models: - 5.33M - 19.48 - Yes - - `Link `__ + - WIP * - EfficientNetV2-S - 48.3M - 19.33 - Yes - - `Link `__ + - WIP * - ResNet18 - 11.7M - 19.83 @@ -87,6 +102,11 @@ Zoobot includes weights for the following pretrained models: - 19.43 - Yes - `Link `__ + * - ResNet101 + - 44.5M + - 19.37 + - Yes + - `Link `__ .. note:: From 3bf38063a91d14f36b0ffdad7b8983b8a94d8ce0 Mon Sep 17 00:00:00 2001 From: Mike Walmsley Date: Tue, 16 Apr 2024 11:40:21 +0200 Subject: [PATCH 2/5] update examples for v2 --- .../finetuning/finetune_counts_full_tree.py | 19 +++++---- .../representations/get_representations.py | 42 ++++++++++++------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py b/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py index bca84a90..6ed2a231 100644 --- a/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py +++ b/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py @@ -10,6 +10,7 @@ from zoobot.pytorch.training import finetune from zoobot.pytorch.predictions import predict_on_catalog from zoobot.shared.schemas import gz_candels_ortho_schema +from zoobot.shared.load_predictions import prediction_hdf5_to_summary_parquet """ Example for finetuning Zoobot on counts of volunteer responses throughout a complex decision tree (here, GZ CANDELS). @@ -67,12 +68,12 @@ resize_after_crop=resize_after_crop ) - checkpoint_loc = os.path.join( - # TODO replace with path to downloaded checkpoints. See Zoobot README for download links. - repo_dir, 'gz-decals-classifiers/results/benchmarks/pytorch/evo/uploaded/effnetb0_greyscale_224px.ckpt') # decals hparams - - model = finetune.FinetuneableZoobotTree(checkpoint_loc=checkpoint_loc, schema=schema) - + model = finetune.FinetuneableZoobotTree( + name='hf_hub:mwalmsley/zoobot-encoder-convnext_nano', + schema=schema + ) + + # TODO set this to wherever you'd like to save your results save_dir = os.path.join( repo_dir, f'gz-decals-classifiers/results/finetune_{np.random.randint(1e8)}') @@ -86,12 +87,16 @@ # now save predictions on test set to evaluate performance datamodule_kwargs = {'batch_size': batch_size, 'resize_after_crop': resize_after_crop} trainer_kwargs = {'devices': 1, 'accelerator': accelerator} + + hdf5_loc = os.path.join(save_dir, 'test_predictions.hdf5') predict_on_catalog.predict( test_catalog, model, n_samples=1, label_cols=schema.label_cols, - save_loc=os.path.join(save_dir, 'test_predictions.csv'), + save_loc=hdf5_loc, datamodule_kwargs=datamodule_kwargs, trainer_kwargs=trainer_kwargs ) + + prediction_hdf5_to_summary_parquet(hdf5_loc=hdf5_loc, save_loc=hdf5_loc.replace('.hdf5', 'summary.parquet'), schema=schema) \ No newline at end of file diff --git a/zoobot/pytorch/examples/representations/get_representations.py b/zoobot/pytorch/examples/representations/get_representations.py index dc154485..d83974dc 100644 --- a/zoobot/pytorch/examples/representations/get_representations.py +++ b/zoobot/pytorch/examples/representations/get_representations.py @@ -1,32 +1,45 @@ import logging import os +import timm + from galaxy_datasets import demo_rings from zoobot.pytorch.training import finetune, representations from zoobot.pytorch.estimators import define_model from zoobot.pytorch.predictions import predict_on_catalog +from zoobot.pytorch.training import finetune from zoobot.shared import load_predictions, schemas -def main(catalog, checkpoint_loc, save_dir): +def main(catalog, save_dir, name="hf_hub:mwalmsley/zoobot-encoder-convnext_nano"): assert all([os.path.isfile(x) for x in catalog['file_loc']]) if not os.path.exists(save_dir): os.mkdir(save_dir) - # can load from either ZoobotTree checkpoint (if trained from scratch) - encoder = define_model.ZoobotTree.load_from_checkpoint(checkpoint_loc).encoder - # or FinetuneableZoobotTree (if finetuned) - # currently, FinetuneableZoobotTree checkpoints should be loaded as ZoobotTree with the args below - # this is a bit awkward and I'm working on a clearer method - but it does work. - # encoder = define_model.ZoobotTree.load_from_checkpoint(checkpoint_loc, output_dim=TODO, question_index_groups=[]).encoder + # load the encoder + + # OPTION 1 + # Load a pretrained model from HuggingFace, with no finetuning, only as published + model = representations.ZoobotEncoder.load_from_name(name) + # or equivalently (the above is just a wrapper for these two lines below) + # encoder = timm.create_model(model_name=name, pretrained=True) + # model = representations.ZoobotEncoder(encoder=encoder) - # convert to simple pytorch lightning model - model = representations.ZoobotEncoder(encoder=encoder, pyramid=False) + """ + # OPTION 2 - label_cols = [f'feat_{n}' for n in range(1280)] + # Load a model that has been finetuned on your own data + # (...do your usual finetuning..., or load a finetuned model with finetune.FinetuneableZoobotClassifier(checkpoint_loc=....ckpt) + encoder = finetuned_model.encoder + # and then convert to simple pytorch lightning model. You can use any pytorch model here. + model = representations.ZoobotEncoder(encoder=encoder) + """ + + encoder_dim = define_model.get_encoder_dim(model.encoder) + label_cols = [f'feat_{n}' for n in range(encoder_dim)] save_loc = os.path.join(save_dir, 'representations.hdf5') accelerator = 'cpu' # or 'gpu' if available @@ -52,20 +65,17 @@ def main(catalog, checkpoint_loc, save_dir): logging.basicConfig(level=logging.INFO) - # load the gz evo model for representations - checkpoint_loc = '/home/walml/repos/gz-decals-classifiers/results/benchmarks/pytorch/evo/evo_py_gr_11941/checkpoints/epoch=73-step=42698.ckpt' - # use this demo dataset # TODO change this to wherever you'd like, it will auto-download - data_dir = '/home/walml/repos/galaxy-datasets/roots/demo_rings' + data_dir = '/Users/user/repos/galaxy-datasets/roots/demo_rings' catalog, _ = demo_rings(root=data_dir, download=True, train=True) print(catalog.head()) # zoobot expects id_str and file_loc columns, so add these if needed # save the representations here # TODO change this to wherever you'd like - save_dir = os.path.join('/home/walml/repos/zoobot/results/pytorch/representations/example') + save_dir = os.path.join('/Users/user/repos/zoobot/results/pytorch/representations/example') - representations_loc = main(catalog, checkpoint_loc, save_dir) + representations_loc = main(catalog, save_dir) rep_df = load_predictions.single_forward_pass_hdf5s_to_df(representations_loc) print(rep_df) From 8c71a878c1c3831011dd6bac3cdffde3431c6e85 Mon Sep 17 00:00:00 2001 From: Mike Walmsley Date: Mon, 29 Apr 2024 22:05:40 -0400 Subject: [PATCH 3/5] add euclid :) --- zoobot/shared/schemas.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zoobot/shared/schemas.py b/zoobot/shared/schemas.py index 3f85dbbe..b0123fc3 100755 --- a/zoobot/shared/schemas.py +++ b/zoobot/shared/schemas.py @@ -299,3 +299,6 @@ def answers(self): gz_ukidss_schema = Schema(label_metadata.ukidss_ortho_pairs, label_metadata.ukidss_ortho_dependencies) gz_jwst_schema = Schema(label_metadata.jwst_ortho_pairs, label_metadata.jwst_ortho_dependencies) + +euclid_ortho_schema = Schema(label_metadata.euclid_ortho_pairs , label_metadata.euclid_ortho_dependencies) +euclid_schema = Schema(label_metadata.euclid_pairs , label_metadata.euclid_dependencies) From 2db053c1c9ed76dbc3658e3e88ed34d277ae9cbb Mon Sep 17 00:00:00 2001 From: Mike Walmsley Date: Mon, 13 May 2024 17:39:02 -0400 Subject: [PATCH 4/5] support greyscale models --- docs/pretrained_models.rst | 3 +++ zoobot/pytorch/training/finetune.py | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/pretrained_models.rst b/docs/pretrained_models.rst index c75e95c0..b590e168 100755 --- a/docs/pretrained_models.rst +++ b/docs/pretrained_models.rst @@ -113,6 +113,9 @@ Zoobot includes weights for the following pretrained models: Missing a model you need? Reach out! There's a good chance we can train any model supported by `timm `_. +.. note:: + + New in Zoobot v2.0.1: greyscale (single channel) versions are available `here `_. Which model should I use? =========================== diff --git a/zoobot/pytorch/training/finetune.py b/zoobot/pytorch/training/finetune.py index 67ef1997..145e08df 100644 --- a/zoobot/pytorch/training/finetune.py +++ b/zoobot/pytorch/training/finetune.py @@ -124,7 +124,14 @@ def __init__( if name is not None: assert encoder is None, 'Cannot pass both name and encoder to use' - self.encoder = timm.create_model(name, num_classes=0, pretrained=True) + if 'greyscale' in name: + # I'm not sure why timm is happy to convert color model stem to greyscale + # but doesn't correctly load greyscale model without this hack + logging.info('Loading greyscale model (auto-detected from name)') + timm_kwargs = {'in_chans': 1} + else: + timm_kwargs = {} + self.encoder = timm.create_model(name, num_classes=0, pretrained=True, **timm_kwargs) self.encoder_dim = self.encoder.num_features elif zoobot_checkpoint_loc is not None: From 8463e98c43ed1bee8324b93d70108db7b162516e Mon Sep 17 00:00:00 2001 From: Mike Walmsley Date: Tue, 14 May 2024 15:57:06 -0400 Subject: [PATCH 5/5] version bump --- README.md | 1 + setup.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fd9cb351..9e7d9b36 100755 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ CUDA 12.1 for PyTorch 2.1.0: ### Recent release features (v2.0.0) +- **New in 2.0.1** Add greyscale encoders. Use `hf_hub:mwalmsley/zoobot-encoder-greyscale-convnext_nano` or [similar](https://huggingface.co/collections/mwalmsley/zoobot-encoders-greyscale-66427c51133285ca01b490c6). - New pretrained architectures: ConvNeXT, EfficientNetV2, MaxViT, and more. Each in several sizes. - Reworked finetuning procedure. All these architectures are finetuneable through a common method. - Reworked finetuning options. Batch norm finetuning removed. Cosine schedule option added. diff --git a/setup.py b/setup.py index dde24b50..0faa3772 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="zoobot", - version="2.0.0", + version="2.0.1", author="Mike Walmsley", author_email="walmsleymk1@gmail.com", description="Galaxy morphology classifiers", @@ -117,6 +117,6 @@ 'webdataset', # for reading webdataset files 'huggingface_hub', # login may be required 'setuptools', # no longer pinned - 'galaxy-datasets>=0.0.17' # for dataset loading in both TF and Torch (see github/mwalmsley/galaxy-datasets) + 'galaxy-datasets>=0.0.18' # for dataset loading in both TF and Torch (see github/mwalmsley/galaxy-datasets) ] )