From 7ea7654c1f4e6d506f4f2c0dd83e5b3540a24ab8 Mon Sep 17 00:00:00 2001 From: "Ching Yi, Chan" Date: Fri, 8 Oct 2021 10:29:30 +0800 Subject: [PATCH 1/3] Add more actions to datasets Signed-off-by: Ching Yi, Chan --- docs/CLI/datasets.md | 352 +++++++++++++++++- docs/notebook/datasets.ipynb | 159 +++++++- docs/notebook/groups.ipynb | 2 +- primehub/datasets.py | 339 ++++++++++++++++- primehub/extras/templates/examples/config.md | 2 +- .../extras/templates/examples/datasets.md | 288 +++++++++++++- .../extras/templates/examples/deployments.md | 8 +- primehub/extras/templates/examples/files.md | 2 +- primehub/extras/templates/examples/images.md | 2 +- .../templates/examples/instancetypes.md | 2 +- primehub/extras/templates/examples/jobs.md | 2 +- .../extras/templates/examples/notebooks.md | 2 +- .../extras/templates/examples/schedules.md | 2 +- tests/test_datasets.py | 88 +++++ 14 files changed, 1209 insertions(+), 41 deletions(-) create mode 100644 tests/test_datasets.py diff --git a/docs/CLI/datasets.md b/docs/CLI/datasets.md index 86ee0e1..be5bd40 100644 --- a/docs/CLI/datasets.md +++ b/docs/CLI/datasets.md @@ -8,8 +8,12 @@ Usage: Get a dataset or list datasets Available Commands: + create Create a dataset + delete Delete a dataset by id get Get a dataset by name list List datasets + update Update the dataset + upload_secret Regenerate the secret of the upload server Options: -h, --help Show the help @@ -24,6 +28,36 @@ Global Options: ``` +### create + +Create a dataset + + +``` +primehub datasets create +``` + + +* *(optional)* file + + + + +### delete + +Delete a dataset by id + + +``` +primehub datasets delete +``` + +* id: The dataset id + + + + + ### get Get a dataset by name @@ -51,10 +85,42 @@ primehub datasets list + +### update + +Update the dataset + + +``` +primehub datasets update +``` + +* name + + + + + +### upload_secret + +Regenerate the secret of the upload server + + +``` +primehub datasets upload_secret +``` + +* id: The dataset id or name + + + + ## Examples +### Query datasets + The `datasets` command is a group specific resource. It only works after the `group` assigned. Using `list` to find all datasets in your group: @@ -64,22 +130,286 @@ $ primehub datasets list ``` ``` -id name displayName description type ------- ------ ------------- ------------- ------ -kaggle kaggle kaggle pv +id name displayName description type +----------- ----------- -------------------------- ------------------------------- ------ +pv-dataset pv-dataset the dataset created by SDK It is a PV dataset pv +env-dataset env-dataset env-dataset make changes to the description env ``` If you already know the name of a dataset, use the `get` to get a single entry: ``` -$ primehub datasets get kaggle +$ primehub datasets get dataset ``` ``` -primehub datasets get kaggle -id: kaggle -name: kaggle -displayName: kaggle -description: -type: pv -``` \ No newline at end of file +id: pv-dataset +name: pv-dataset +displayName: the dataset created by SDK +description: It is a PV dataset +type: pv +pvProvisioning: auto +volumeSize: 1 +enableUploadServer: True +uploadServerLink: http://primehub-python-sdk.primehub.io/dataset/hub/pv-dataset/browse +global: False +groups: [{'id': 'a962305b-c884-4413-9358-ef56373b287c', 'name': 'foobarbar', 'displayName': '', 'writable': False}, {'id': 'a7a283b5-c0e2-4b79-a78c-39c630324762', 'name': 'phusers', 'displayName': 'primehub users', 'writable': False}] +``` + +### Admin actions for datasets + +These actions only can be used by administrators: + +* create +* update +* delete + +For `create` and `update` require a dataset configuration, please see above examples. + +### Fields for creating or updating + +| field | required | type | description | +| --- | --- | --- | --- | +| name | required | string | it should be a valid resource name for kubernetes | +| displayName | optional | string | display name for this dataset | +| description | optional | string | | +| global | optional | boolean | when a dataset is global, it could be seen for each group | +| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] | +| url | conditional | string | **MUST** use with `git` type | +| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action | +| nfsServer | conditional | string | **MUST** use with `nfs` type | +| nfsPath | conditional | string | **MUST** use with `nfs` type | +| hostPath | conditional | string | **MUST** use with `hostPath` type | +| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{"key1":"value1","key2":"value2"}`. | +| groups | optional | list of connected groups (dict) | please see the `connect` examples | +| secret | optional | dict | **MAY** use with `git` type | bind a `secret` to the `git` dataset | +| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.| +| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types | + +> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload. + +For example, there is a configuration for creating env dataset: + +```bash +primehub datasets create <\n", + "\n", + "Get a dataset or list datasets\n", + "\n", + "Available Commands:\n", + " create Create a dataset\n", + " delete Delete a dataset by id\n", + " get Get a dataset by name\n", + " list List datasets\n", + " update Update the dataset\n", + " upload_secret Regenerate the secret of the upload server\n", + "```\n", + "\n", + "---\n", + "\n", + "All mutating actions require the `Admin` role:\n", + "\n", + "* create\n", + "* delete\n", + "* update\n", + "* upload_secret (`regenerate_upload_server_secret` for method name)\n", + "\n", + "## Dataset configuration\n", + "\n", + "You need a configuration `create` and `update` to operate. Here is an example to create a `pv-dataset`:\n", + "\n", + "```json\n", + "{\n", + " \"name\": \"pv-dataset\",\n", + " \"displayName\": \"the dataset created by SDK\",\n", + " \"description\": \"It is a PV dataset\",\n", + " \"type\": \"pv\",\n", + " \"global\": false,\n", + " \"groups\": {\n", + " \"connect\": [\n", + " {\n", + " \"id\": \"a7a283b5-c0e2-4b79-a78c-39c630324762\",\n", + " \"writable\": true\n", + " },\n", + " {\n", + " \"id\": \"a962305b-c884-4413-9358-ef56373b287c\",\n", + " \"writable\": false\n", + " }\n", + " ]\n", + " },\n", + " \"pvProvisioning\": \"auto\",\n", + " \"volumeSize\": 1\n", + "}\n", + "```\n", + "\n", + "In our system, there are 5 types for datasets: `['pv', 'nfs', 'hostPath', 'git', 'env']`. Please check the fields reference to give a proper configuration to create your own dataset.\n", + "\n", + "\n", + "\n", + "## Fields for creating or updating\n", + "\n", + "| field | required | type | description |\n", + "| --- | --- | --- | --- |\n", + "| name | required | string | it should be a valid resource name for kubernetes |\n", + "| displayName | optional | string | display name for this dataset |\n", + "| description | optional | string | |\n", + "| global | optional | boolean | when a dataset is global, it could be seen for each group |\n", + "| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] |\n", + "| url | conditional | string | **MUST** use with `git` type |\n", + "| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action |\n", + "| nfsServer | conditional | string | **MUST** use with `nfs` type |\n", + "| nfsPath | conditional | string | **MUST** use with `nfs` type |\n", + "| hostPath | conditional | string | **MUST** use with `hostPath` type |\n", + "| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{\"key1\":\"value1\",\"key2\":\"value2\"}`. |\n", + "| groups | optional | list of connected groups (dict) | please see the `connect` examples |\n", + "| secret | optional | dict | **MAY** use with `git` type | bind a `secret` to the `git` dataset |\n", + "| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.|\n", + "| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types |\n", + "\n", + "> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload.\n", + "\n", + "For example, there is a configuration for creating env dataset:\n", + "\n", + "```bash\n", + "primehub datasets create < list: + """ + Create a dataset + + :rtype: dict + :returns: the dataset + """ + + return self.create(primehub_load_config(filename=kwargs.get('file', None))) + + def create(self, config) -> list: + """ + Create a dataset + + :rtype: dict + :returns: the dataset + """ + + query = """ + mutation CreateDatasetMutation($payload: DatasetCreateInput!) { + createDataset(data: $payload) { + id + } + } + """ + + if not config: + invalid_config('Dataset configuration file is required.') + + if config.get('enableUploadServer', False): + query = """ + mutation CreateDatasetMutation($payload: DatasetCreateInput!) { + createDataset(data: $payload) { + id + uploadServerSecret { + username + password + } + } + } + """ + + variables = {'payload': validate_creation(validate(config))} + result = self.request(variables, query) + if 'data' in result and 'createDataset' in result['data']: + return waring_if_needed(result['data']['createDataset'], self.primehub.stderr) + return result + + @cmd(name='update', description='Update the dataset') + def _update_cmd(self, name: str, **kwargs) -> list: + """ + Update the dataset + + :type name: str + :rtype: dict + :returns: the dataset + """ + return self._update_cmd(name, primehub_load_config(filename=kwargs.get('file', None))) + + def update(self, name: str, config: dict) -> list: + """ + Update the dataset + + :type name: str + :type config: dict + :rtype: dict + + :returns: the dataset + """ + + query = """ + mutation UpdateDatasetMutation($payload: DatasetUpdateInput!, $where: DatasetWhereUniqueInput!) { + updateDataset(data: $payload, where: $where) { + id + } + } + """ + + if not config: + invalid_config('Dataset configuration file is required.') + + if config.get('enableUploadServer', False): + query = """ + mutation UpdateDatasetMutation($payload: DatasetUpdateInput!, $where: DatasetWhereUniqueInput!) { + updateDataset(data: $payload, where: $where) { + id + uploadServerSecret { + username + password + } + } + } + """ + + update_mode = True + variables = {'payload': validate(config, update_mode), 'where': {'id': name}} + result = self.request(variables, query) + if 'data' in result and 'updateDataset' in result['data']: + return waring_if_needed(result['data']['updateDataset'], self.primehub.stderr) + return result + @cmd(name='list', description='List datasets') def list(self) -> list: """ @@ -47,7 +257,130 @@ def get(self, name) -> Optional[dict]: :rtype: Optional[dict] :returns: a dataset """ - return self.do_get(Datasets.query, Datasets.resource_name, name) + + query = """ + query DatasetQuery($where: DatasetWhereUniqueInput!) { + dataset(where: $where) { + id + name + displayName + description + type + pvProvisioning + volumeSize + variables + nfsServer + nfsPath + hostPath + url + secret { + id + } + enableUploadServer + uploadServerLink + global + groups { + id + name + displayName + writable + } + } + } + """ + + def output(dataset: dict): + dataset_output = dict() + keep_fields = ['id', 'name', 'displayName', 'description', 'global', 'type', 'groups'] + + if dataset.get('type') == 'env': + keep_fields.append('variables') + + if dataset.get('type') == 'git': + keep_fields.append('url') + keep_fields.append('secret') + + if dataset.get('type') == 'pv': + keep_fields.append('pvProvisioning') + keep_fields.append('volumeSize') + + if dataset.get('type') == 'nfs': + keep_fields.append('nfsServer') + keep_fields.append('nfsPath') + + if dataset.get('type') == 'hostPath': + keep_fields.append('hostPath') + + if dataset.get('type') in ['pv', 'nfs', 'hostPath']: + keep_fields.append('enableUploadServer') + keep_fields.append('uploadServerLink') + + for k, v in dataset.items(): + if k in keep_fields: + dataset_output[k] = v + + return dataset_output + + result = self.request({'where': {'id': name}}, query) + if 'data' in result and 'dataset' in result['data']: + return output(result['data']['dataset']) + + return result + + @cmd(name='delete', description='Delete a dataset by id', return_required=True) + def delete(self, id): + """ + Delete a dataset by id + + :type id: str + :param id: The dataset id + + :rtype dict + :return the result of the deleted dataset + """ + + query = """ + mutation DeleteDatasetMutation($where: DatasetWhereUniqueInput!) { + deleteDataset(where: $where) { + id + } + } + """ + + result = self.request({'where': {'id': id}}, query) + if 'data' in result and 'deleteDataset' in result['data']: + return result['data']['deleteDataset'] + return result + + @cmd(name='upload_secret', description='Regenerate the secret of the upload server', + return_required=True) + def regenerate_upload_server_secret(self, id): + """ + Regenerate the secret of the upload server + + :type id: str + :param id: The dataset id or name + + :rtype dict + :return the result of the deleted dataset + """ + + query = """ + mutation RegenerateUploadServerSecretMutation($where: DatasetWhereUniqueInput!) { + regenerateUploadServerSecret(where: $where) { + id + uploadServerSecret { + username + password + } + } + } + """ + + result = self.request({'where': {'id': id}}, query) + if 'data' in result and 'regenerateUploadServerSecret' in result['data']: + return waring_if_needed(result['data']['regenerateUploadServerSecret'], self.primehub.stderr) + return result def help_description(self): return "Get a dataset or list datasets" diff --git a/primehub/extras/templates/examples/config.md b/primehub/extras/templates/examples/config.md index f13cfc2..2bb5bb5 100644 --- a/primehub/extras/templates/examples/config.md +++ b/primehub/extras/templates/examples/config.md @@ -68,4 +68,4 @@ When all actions have done, the configuration got updated: ``` Found old configuration, backup it to /home/phadmin/.primehub/config-20211001161504.json PrimeHub SDK Config has been updated: /home/phadmin/.primehub/config.json -``` \ No newline at end of file +``` diff --git a/primehub/extras/templates/examples/datasets.md b/primehub/extras/templates/examples/datasets.md index 0d5d3fa..8f6262a 100644 --- a/primehub/extras/templates/examples/datasets.md +++ b/primehub/extras/templates/examples/datasets.md @@ -1,3 +1,5 @@ +### Query datasets + The `datasets` command is a group specific resource. It only works after the `group` assigned. Using `list` to find all datasets in your group: @@ -7,22 +9,286 @@ $ primehub datasets list ``` ``` -id name displayName description type ------- ------ ------------- ------------- ------ -kaggle kaggle kaggle pv +id name displayName description type +----------- ----------- -------------------------- ------------------------------- ------ +pv-dataset pv-dataset the dataset created by SDK It is a PV dataset pv +env-dataset env-dataset env-dataset make changes to the description env ``` If you already know the name of a dataset, use the `get` to get a single entry: ``` -$ primehub datasets get kaggle +$ primehub datasets get dataset ``` ``` -primehub datasets get kaggle -id: kaggle -name: kaggle -displayName: kaggle -description: -type: pv -``` \ No newline at end of file +id: pv-dataset +name: pv-dataset +displayName: the dataset created by SDK +description: It is a PV dataset +type: pv +pvProvisioning: auto +volumeSize: 1 +enableUploadServer: True +uploadServerLink: http://primehub-python-sdk.primehub.io/dataset/hub/pv-dataset/browse +global: False +groups: [{'id': 'a962305b-c884-4413-9358-ef56373b287c', 'name': 'foobarbar', 'displayName': '', 'writable': False}, {'id': 'a7a283b5-c0e2-4b79-a78c-39c630324762', 'name': 'phusers', 'displayName': 'primehub users', 'writable': False}] +``` + +### Admin actions for datasets + +These actions only can be used by administrators: + +* create +* update +* delete + +For `create` and `update` require a dataset configuration, please see above examples. + +### Fields for creating or updating + +| field | required | type | description | +| --- | --- | --- | --- | +| name | required | string | it should be a valid resource name for kubernetes | +| displayName | optional | string | display name for this dataset | +| description | optional | string | | +| global | optional | boolean | when a dataset is global, it could be seen for each group | +| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] | +| url | conditional | string | **MUST** use with `git` type | +| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action | +| nfsServer | conditional | string | **MUST** use with `nfs` type | +| nfsPath | conditional | string | **MUST** use with `nfs` type | +| hostPath | conditional | string | **MUST** use with `hostPath` type | +| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{"key1":"value1","key2":"value2"}`. | +| groups | optional | list of connected groups (dict) | please see the `connect` examples | +| secret | optional | dict | **MAY** use with `git` type | bind a `secret` to the `git` dataset | +| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.| +| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types | + +> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload. + +For example, there is a configuration for creating env dataset: + +```bash +primehub datasets create < None: + super(TestDatasets, self).setUp() + + def check_required(self, input: dict, message: str): + with self.assertRaises(PrimeHubException) as context: + validate(input) + + self.assertTrue(isinstance(context.exception, PrimeHubException)) + self.assertEqual(message, context.exception.args[0]) + + def test_validator(self): + # check required fields + self.check_required({}, 'name is required') + self.check_required({'name': 'dataset-name'}, 'type is required') + + # check formats + self.check_required({'name': '-name', 'type': 'pv'}, + "[name] should be lower case alphanumeric characters, '-' or '.', " + "and must start and end with an alphanumeric character.") + + self.check_required({'name': 'name', 'type': 'whatever'}, + "[type] should be one of ['pv', 'nfs', 'hostPath', 'git', 'env']") + + # check writable groups + self.check_required({'name': 'name', 'type': 'git', 'enableUploadServer': False}, + "[enableUploadServer] only can use with should be one of ['pv', 'nfs', 'hostPath'] types") + + # check groups connect/disconnect + self.check_required({'name': 'name', 'type': 'pv', 'groups': {'connect': [{'name': 'my-group'}]}}, + "group connect should be a pair {id, writable}") + + self.check_required( + {'name': 'name', 'type': 'pv', 'groups': {'disconnect': [{'id': 'my-id', 'writable': True}]}}, + "disconnect connect should be an entry {id}") + + def check_creation_required(self, input: dict, message: str): + with self.assertRaises(PrimeHubException) as context: + validate_creation(input) + + self.assertTrue(isinstance(context.exception, PrimeHubException)) + self.assertEqual(message, context.exception.args[0]) + + def test_pv_create_validator(self): + # check required fields + self.check_creation_required({'name': 'name', 'type': 'pv'}, + "pvProvisioning is required for pv type " + "and its value should be one of ['auto', 'manual']") + + self.check_creation_required({'name': 'name', 'type': 'pv', 'pvProvisioning': 'no-such-way'}, + "pvProvisioning is required for pv type " + "and its value should be one of ['auto', 'manual']") + + valid_input = {'name': 'name', 'type': 'pv', 'pvProvisioning': 'auto'} + self.assertEqual(valid_input, validate_creation(valid_input)) + + def test_nfs_create_validator(self): + # check required fields + self.check_creation_required({'name': 'name', 'type': 'nfs'}, + "nfsServer and nfsPath are required for nfs type") + + self.check_creation_required({'name': 'name', 'type': 'nfs', 'nfsServer': '127.0.0.1'}, + "nfsServer and nfsPath are required for nfs type") + + valid_input = {'name': 'name', 'type': 'nfs', 'nfsServer': '127.0.0.1', 'nfsPath': '/data'} + self.assertEqual(valid_input, validate_creation(valid_input)) + + def test_hostPath_create_validator(self): + # check required fields + self.check_creation_required({'name': 'name', 'type': 'hostPath'}, + "hostPath is required for hostPath type") + + valid_input = {'name': 'name', 'type': 'hostPath', 'hostPath': '/data'} + self.assertEqual(valid_input, validate_creation(valid_input)) + + def test_git_create_validator(self): + # check required fields + self.check_creation_required({'name': 'name', 'type': 'git'}, + "url is required for git type") + + valid_input = {'name': 'name', 'type': 'git', 'url': 'https://github.com/InfuseAI/primehub-python-sdk'} + self.assertEqual(valid_input, validate_creation(valid_input)) From e97d7490e06a012f4a106943a2c1e04653a60b73 Mon Sep 17 00:00:00 2001 From: "Ching Yi, Chan" Date: Thu, 14 Oct 2021 12:16:14 +0800 Subject: [PATCH 2/3] Add admin group Signed-off-by: Ching Yi, Chan --- docs/CLI/datasets.md | 64 ------ primehub/__init__.py | 36 ++- primehub/admin_datasets.py | 399 +++++++++++++++++++++++++++++++++ primehub/cli.py | 25 ++- primehub/datasets.py | 341 +--------------------------- tests/test_datasets.py | 2 +- tests/test_sdk_to_admin_cli.py | 35 +++ 7 files changed, 491 insertions(+), 411 deletions(-) create mode 100644 primehub/admin_datasets.py create mode 100644 tests/test_sdk_to_admin_cli.py diff --git a/docs/CLI/datasets.md b/docs/CLI/datasets.md index be5bd40..f730a17 100644 --- a/docs/CLI/datasets.md +++ b/docs/CLI/datasets.md @@ -8,12 +8,8 @@ Usage: Get a dataset or list datasets Available Commands: - create Create a dataset - delete Delete a dataset by id get Get a dataset by name list List datasets - update Update the dataset - upload_secret Regenerate the secret of the upload server Options: -h, --help Show the help @@ -28,36 +24,6 @@ Global Options: ``` -### create - -Create a dataset - - -``` -primehub datasets create -``` - - -* *(optional)* file - - - - -### delete - -Delete a dataset by id - - -``` -primehub datasets delete -``` - -* id: The dataset id - - - - - ### get Get a dataset by name @@ -85,36 +51,6 @@ primehub datasets list - -### update - -Update the dataset - - -``` -primehub datasets update -``` - -* name - - - - - -### upload_secret - -Regenerate the secret of the upload server - - -``` -primehub datasets upload_secret -``` - -* id: The dataset id or name - - - - ## Examples diff --git a/primehub/__init__.py b/primehub/__init__.py index a52f0ff..c2137f1 100644 --- a/primehub/__init__.py +++ b/primehub/__init__.py @@ -192,7 +192,9 @@ class PrimeHub(object): def __init__(self, config: PrimeHubConfig): self.primehub_config = config self.json_output = True + self.usage_role = 'user' self.commands: Dict[str, Module] = dict() + self.admin_commands: Dict[str, Module] = dict() self._stderr = sys.stderr self._stdout = sys.stdout @@ -212,6 +214,9 @@ def __init__(self, config: PrimeHubConfig): self.register_command('apptemplates', 'AppTemplate') self.register_command('apps', 'Apps') + # register admin commands + self.register_admin_command('admin_datasets', 'AdminDatasets', 'datasets') + # initial self._ensure_config_details(config) @@ -236,19 +241,42 @@ def request_logs(self, endpint: str, follow: bool, tail: int): def request_file(self, endpint: str, dest: str): return Client(self.primehub_config).request_file(endpint, dest) - def register_command(self, module_name: str, command_class: Union[str, Callable], command_name=None): - if not command_name: - command_name = module_name - + def _find_command_class(self, command_class, module_name): # create command instance if isinstance(command_class, str): clazz = importlib.import_module('primehub.' + module_name).__getattribute__(command_class) else: clazz = command_class + return clazz + + def register_command(self, module_name: str, command_class: Union[str, Callable], command_name=None): + if not command_name: + command_name = module_name + + clazz = self._find_command_class(command_class, module_name) # register to the commands table self.commands[command_name] = clazz(self) + def register_admin_command(self, module_name: str, command_class: Union[str, Callable], command_name=None): + if not command_name: + command_name = module_name + + clazz = self._find_command_class(command_class, module_name) + + # register to the commands table + self.admin_commands[command_name] = clazz(self) + + def switch_admin_role(self): + self.usage_role = 'admin' + self.commands = self.admin_commands + + @property + def admin(self): + admin_primehub = PrimeHub(self.primehub_config) + admin_primehub.commands = self.admin_commands + return admin_primehub + def __getattr__(self, item): if item in self.commands: return self.commands[item] diff --git a/primehub/admin_datasets.py b/primehub/admin_datasets.py new file mode 100644 index 0000000..6ddea8c --- /dev/null +++ b/primehub/admin_datasets.py @@ -0,0 +1,399 @@ +import json +import re +from typing import Optional, Union, Any, Dict + +from primehub import Helpful, cmd, Module, primehub_load_config +from primehub.utils import PrimeHubException +from primehub.utils.optionals import file_flag + + +def waring_if_needed(data: dict, stderr): + if data and 'uploadServerSecret' in data: + if data.get('uploadServerSecret') is None: + print('WARNING: you got a nil uploadServerSecret, ' + 'because there is another one has been generated.\n', + file=stderr) + return data + + +class AdminDatasets(Helpful, Module): + + @cmd(name='create', description='Create a dataset', optionals=[('file', file_flag)]) + def _create_cmd(self, **kwargs) -> list: + """ + Create a dataset + + :rtype: dict + :returns: the dataset + """ + + return self.create(primehub_load_config(filename=kwargs.get('file', None))) + + def create(self, config) -> list: + """ + Create a dataset + + :rtype: dict + :returns: the dataset + """ + + query = """ + mutation CreateDatasetMutation($payload: DatasetCreateInput!) { + createDataset(data: $payload) { + id + } + } + """ + + if not config: + invalid_config('Dataset configuration file is required.') + + if config.get('enableUploadServer', False): + query = """ + mutation CreateDatasetMutation($payload: DatasetCreateInput!) { + createDataset(data: $payload) { + id + uploadServerSecret { + username + password + } + } + } + """ + + variables = {'payload': validate_creation(validate(config))} + result = self.request(variables, query) + if 'data' in result and 'createDataset' in result['data']: + return waring_if_needed(result['data']['createDataset'], self.primehub.stderr) + return result + + @cmd(name='update', description='Update the dataset') + def _update_cmd(self, name: str, **kwargs) -> list: + """ + Update the dataset + + :type name: str + :rtype: dict + :returns: the dataset + """ + return self._update_cmd(name, primehub_load_config(filename=kwargs.get('file', None))) + + def update(self, name: str, config: dict) -> list: + """ + Update the dataset + + :type name: str + :type config: dict + :rtype: dict + + :returns: the dataset + """ + + query = """ + mutation UpdateDatasetMutation($payload: DatasetUpdateInput!, $where: DatasetWhereUniqueInput!) { + updateDataset(data: $payload, where: $where) { + id + } + } + """ + + if not config: + invalid_config('Dataset configuration file is required.') + + if config.get('enableUploadServer', False): + query = """ + mutation UpdateDatasetMutation($payload: DatasetUpdateInput!, $where: DatasetWhereUniqueInput!) { + updateDataset(data: $payload, where: $where) { + id + uploadServerSecret { + username + password + } + } + } + """ + + update_mode = True + variables = {'payload': validate(config, update_mode), 'where': {'id': name}} + result = self.request(variables, query) + if 'data' in result and 'updateDataset' in result['data']: + return waring_if_needed(result['data']['updateDataset'], self.primehub.stderr) + return result + + @cmd(name='upload_secret', description='Regenerate the secret of the upload server', + return_required=True) + def regenerate_upload_server_secret(self, id): + """ + Regenerate the secret of the upload server + + :type id: str + :param id: The dataset id or name + + :rtype dict + :return the result of the deleted dataset + """ + + query = """ + mutation RegenerateUploadServerSecretMutation($where: DatasetWhereUniqueInput!) { + regenerateUploadServerSecret(where: $where) { + id + uploadServerSecret { + username + password + } + } + } + """ + + result = self.request({'where': {'id': id}}, query) + if 'data' in result and 'regenerateUploadServerSecret' in result['data']: + return waring_if_needed(result['data']['regenerateUploadServerSecret'], self.primehub.stderr) + return result + + @cmd(name='list', description='Delete a dataset by id', return_required=True, optionals=[('page', int)]) + def list(self, **kwargs): + query = """ + query GetDatasets($page: Int, $orderBy: DatasetOrderByInput, $where: DatasetWhereInput) { + datasetsConnection(page: $page, orderBy: $orderBy, where: $where) { + edges { + cursor + node { + id + name + displayName + description + type + uploadServerLink + } + } + pageInfo { + currentPage + totalPage + } + } + } + """ + variables = {'page': 1} + page = kwargs.get('page', 0) + if page: + variables['page'] = page + results = self.request(variables, query) + for e in results['data']['datasetsConnection']['edges']: + yield e['node'] + return + + page = 1 + while True: + variables['page'] = page + results = self.request(variables, query) + if results['data']['datasetsConnection']['edges']: + for e in results['data']['datasetsConnection']['edges']: + yield e['node'] + page = page + 1 + else: + break + + @cmd(name='delete', description='Delete a dataset by id', return_required=True) + def delete(self, id): + """ + Delete a dataset by id + + :type id: str + :param id: The dataset id + + :rtype dict + :return the result of the deleted dataset + """ + + query = """ + mutation DeleteDatasetMutation($where: DatasetWhereUniqueInput!) { + deleteDataset(where: $where) { + id + } + } + """ + + result = self.request({'where': {'id': id}}, query) + if 'data' in result and 'deleteDataset' in result['data']: + return result['data']['deleteDataset'] + return result + + @cmd(name='get', description='Get a dataset by name', return_required=True) + def get(self, name) -> Optional[dict]: + """ + Get a dataset from the current group + + :type name: str + :param name: the name of a dataset + + :rtype: Optional[dict] + :returns: a dataset + """ + + query = """ + query DatasetQuery($where: DatasetWhereUniqueInput!) { + dataset(where: $where) { + id + name + displayName + description + type + pvProvisioning + volumeSize + variables + nfsServer + nfsPath + hostPath + url + secret { + id + } + enableUploadServer + uploadServerLink + global + groups { + id + name + displayName + writable + } + } + } + """ + + result = self.request({'where': {'id': name}}, query) + if 'data' in result and 'dataset' in result['data']: + return dataset_output(result['data']['dataset']) + + return result + + def help_description(self): + return "Manage datasets" + + +def invalid_config(message: str): + example = """ + {"name":"my-dataset-name","displayName":"the dataset created by SDK", + "description":"desc","type":"pv","global":false,"groups": + {"connect":[{"id":"a7a283b5-c0e2-4b79-a78c-39c630324762","writable":true}]},"pvProvisioning":"auto","volumeSize":1} + """.strip() + raise PrimeHubException(message + "\n\nExample:\n" + json.dumps(json.loads(example), indent=2)) + + +def validate(payload: dict, for_update=False): + # check required fields + if not for_update: + if 'name' not in payload: + raise PrimeHubException('name is required') + if 'type' not in payload: + raise PrimeHubException('type is required') + + matched: Union[str, Any, None] = re.match( + r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*', + payload.get('name')) + + # check formats + if not matched: + raise PrimeHubException("[name] should be lower case alphanumeric characters, '-' or '.', " + "and must start and end with an alphanumeric character.") + + # check type values + valid_types = ['pv', 'nfs', 'hostPath', 'git', 'env'] + if payload.get('type') not in valid_types and not for_update: + raise PrimeHubException(f'[type] should be one of {valid_types}') + + # writable means could connect to groups for writable and enabling upload server + writable_types = ['pv', 'nfs', 'hostPath'] + if 'enableUploadServer' in payload and payload.get('type') not in writable_types: + raise PrimeHubException(f'[enableUploadServer] only can use with should be one of {writable_types} types') + + # check groups format + if 'groups' in payload: + groups: Optional[Dict[Any, Any]] = payload.get('groups') + if groups: + for g in groups.get('connect', []): + if not isinstance(g, dict): + raise PrimeHubException('group connect should be a pair {id, writable}') + if 'id' in g and 'writable' in g: + continue + raise PrimeHubException('group connect should be a pair {id, writable}') + + for g in groups.get('disconnect', []): + if not isinstance(g, dict): + raise PrimeHubException('disconnect connect should be an entry {id}') + if 'id' in g and len(g) == 1: + continue + raise PrimeHubException('disconnect connect should be an entry {id}') + + return payload + + +def validate_creation(payload: dict): + # validate type specific fields + provision_types = ['auto', 'manual'] + if 'pv' == payload.get('type'): + provision = payload.get('pvProvisioning', '') + if provision not in provision_types: + raise PrimeHubException( + f'pvProvisioning is required for pv type and its value should be one of {provision_types}') + + if 'nfs' == payload.get('type'): + if not payload.get('nfsServer') or not payload.get('nfsPath'): + raise PrimeHubException( + 'nfsServer and nfsPath are required for nfs type') + + if 'hostPath' == payload.get('type'): + if 'hostPath' not in payload: + raise PrimeHubException( + 'hostPath is required for hostPath type') + + if 'git' == payload.get('type'): + if 'url' not in payload: + raise PrimeHubException( + 'url is required for git type') + if 'secret' in payload: + secret_connect = payload.get('secret', {}).get('connect', {}) + if isinstance(secret_connect, dict): + if 'id' not in secret_connect: + raise PrimeHubException('secret connect should have an entry') + else: + raise PrimeHubException('secret connect should have an entry') + + if 'secret' in payload and payload.get('type') != 'git': + raise PrimeHubException( + 'secret only is used with git type') + + return payload + + +def dataset_output(dataset: dict): + output = dict() + keep_fields = ['id', 'name', 'displayName', 'description', 'global', 'type', 'groups'] + + if dataset.get('type') == 'env': + keep_fields.append('variables') + + if dataset.get('type') == 'git': + keep_fields.append('url') + keep_fields.append('secret') + + if dataset.get('type') == 'pv': + keep_fields.append('pvProvisioning') + keep_fields.append('volumeSize') + + if dataset.get('type') == 'nfs': + keep_fields.append('nfsServer') + keep_fields.append('nfsPath') + + if dataset.get('type') == 'hostPath': + keep_fields.append('hostPath') + + if dataset.get('type') in ['pv', 'nfs', 'hostPath']: + keep_fields.append('enableUploadServer') + keep_fields.append('uploadServerLink') + + for k, v in dataset.items(): + if k in keep_fields: + output[k] = v + + return output diff --git a/primehub/cli.py b/primehub/cli.py index 78a4683..ff047de 100644 --- a/primehub/cli.py +++ b/primehub/cli.py @@ -46,6 +46,8 @@ def create_commands(parser, sdk): # Create the group parser p = parsers[command_group] = create_command_parser(description=target.help_description()) p.usage = """primehub {} """.format(command_group) + if sdk.usage_role != 'user': + p.usage = """primehub {} {} """.format(sdk.usage_role, command_group) for action in find_actions(target): name, description = action['name'], action['description'] @@ -190,12 +192,14 @@ def run_action_noargs(sdk, selected_component, sub_parsers, target, args): def main(sdk=None): - main_parser = create_command_parser() - main_parser.usage = """primehub """ - if not sdk: sdk = create_sdk() + main_parser = create_command_parser() + main_parser.usage = """primehub """ + if sdk.usage_role != 'user': + main_parser.usage = f"""primehub {sdk.usage_role} """ + # enable @ask_for_permission for command-line enable_ask_for_permission_feature() @@ -204,10 +208,20 @@ def main(sdk=None): hide_help = False helper = None exit_normally = False + follow_inner_exit_code = False try: logger.debug('start to parse {}'.format(sys.argv)) args, remaining_args = main_parser.parse_known_args() command_name = args.command + + # switch commands for different roles + if command_name == 'admin': + sys.argv.remove('admin') + sdk.switch_admin_role() + follow_inner_exit_code = True + main(sdk) + return + logger.debug("args: {}".format(args)) logger.debug("remaining_args: {}".format(remaining_args)) reconfigure_primehub_config_if_needed(args, sdk) @@ -257,7 +271,10 @@ def main(sdk=None): exit_normally = False print(e, file=sdk.stderr) sys.exit(1) - except SystemExit: + except SystemExit as e: + if follow_inner_exit_code: + sys.exit(e.args[0]) + if not hide_help: if helper: helper.print_help(file=sdk.stderr) diff --git a/primehub/datasets.py b/primehub/datasets.py index 75bc181..d413629 100644 --- a/primehub/datasets.py +++ b/primehub/datasets.py @@ -1,115 +1,7 @@ -import json -import re -from typing import Optional, Union, Any, Dict +from typing import Optional -from primehub import Helpful, cmd, Module, primehub_load_config +from primehub import Helpful, cmd, Module from primehub.resource_operations import GroupResourceOperation -from primehub.utils import PrimeHubException -from primehub.utils.optionals import file_flag - - -def invalid_config(message: str): - example = """ - {"name":"my-dataset-name","displayName":"the dataset created by SDK", - "description":"desc","type":"pv","global":false,"groups": - {"connect":[{"id":"a7a283b5-c0e2-4b79-a78c-39c630324762","writable":true}]},"pvProvisioning":"auto","volumeSize":1} - """.strip() - raise PrimeHubException(message + "\n\nExample:\n" + json.dumps(json.loads(example), indent=2)) - - -def validate(payload: dict, for_update=False): - # check required fields - if not for_update: - if 'name' not in payload: - raise PrimeHubException('name is required') - if 'type' not in payload: - raise PrimeHubException('type is required') - - matched: Union[str, Any, None] = re.match( - r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*', - payload.get('name')) - - # check formats - if not matched: - raise PrimeHubException("[name] should be lower case alphanumeric characters, '-' or '.', " - "and must start and end with an alphanumeric character.") - - # check type values - valid_types = ['pv', 'nfs', 'hostPath', 'git', 'env'] - if payload.get('type') not in valid_types and not for_update: - raise PrimeHubException(f'[type] should be one of {valid_types}') - - # writable means could connect to groups for writable and enabling upload server - writable_types = ['pv', 'nfs', 'hostPath'] - if 'enableUploadServer' in payload and payload.get('type') not in writable_types: - raise PrimeHubException(f'[enableUploadServer] only can use with should be one of {writable_types} types') - - # check groups format - if 'groups' in payload: - groups: Optional[Dict[Any, Any]] = payload.get('groups') - if groups: - for g in groups.get('connect', []): - if not isinstance(g, dict): - raise PrimeHubException('group connect should be a pair {id, writable}') - if 'id' in g and 'writable' in g: - continue - raise PrimeHubException('group connect should be a pair {id, writable}') - - for g in groups.get('disconnect', []): - if not isinstance(g, dict): - raise PrimeHubException('disconnect connect should be an entry {id}') - if 'id' in g and len(g) == 1: - continue - raise PrimeHubException('disconnect connect should be an entry {id}') - - return payload - - -def validate_creation(payload: dict): - # validate type specific fields - provision_types = ['auto', 'manual'] - if 'pv' == payload.get('type'): - provision = payload.get('pvProvisioning', '') - if provision not in provision_types: - raise PrimeHubException( - f'pvProvisioning is required for pv type and its value should be one of {provision_types}') - - if 'nfs' == payload.get('type'): - if not payload.get('nfsServer') or not payload.get('nfsPath'): - raise PrimeHubException( - 'nfsServer and nfsPath are required for nfs type') - - if 'hostPath' == payload.get('type'): - if 'hostPath' not in payload: - raise PrimeHubException( - 'hostPath is required for hostPath type') - - if 'git' == payload.get('type'): - if 'url' not in payload: - raise PrimeHubException( - 'url is required for git type') - if 'secret' in payload: - secret_connect = payload.get('secret', {}).get('connect', {}) - if isinstance(secret_connect, dict): - if 'id' not in secret_connect: - raise PrimeHubException('secret connect should have an entry') - else: - raise PrimeHubException('secret connect should have an entry') - - if 'secret' in payload and payload.get('type') != 'git': - raise PrimeHubException( - 'secret only is used with git type') - - return payload - - -def waring_if_needed(data: dict, stderr): - if data and 'uploadServerSecret' in data: - if data.get('uploadServerSecret') is None: - print('WARNING: you got a nil uploadServerSecret, ' - 'because there is another one has been generated.\n', - file=stderr) - return data class Datasets(Helpful, Module, GroupResourceOperation): @@ -134,108 +26,6 @@ class Datasets(Helpful, Module, GroupResourceOperation): } """ - @cmd(name='create', description='Create a dataset', optionals=[('file', file_flag)]) - def _create_cmd(self, **kwargs) -> list: - """ - Create a dataset - - :rtype: dict - :returns: the dataset - """ - - return self.create(primehub_load_config(filename=kwargs.get('file', None))) - - def create(self, config) -> list: - """ - Create a dataset - - :rtype: dict - :returns: the dataset - """ - - query = """ - mutation CreateDatasetMutation($payload: DatasetCreateInput!) { - createDataset(data: $payload) { - id - } - } - """ - - if not config: - invalid_config('Dataset configuration file is required.') - - if config.get('enableUploadServer', False): - query = """ - mutation CreateDatasetMutation($payload: DatasetCreateInput!) { - createDataset(data: $payload) { - id - uploadServerSecret { - username - password - } - } - } - """ - - variables = {'payload': validate_creation(validate(config))} - result = self.request(variables, query) - if 'data' in result and 'createDataset' in result['data']: - return waring_if_needed(result['data']['createDataset'], self.primehub.stderr) - return result - - @cmd(name='update', description='Update the dataset') - def _update_cmd(self, name: str, **kwargs) -> list: - """ - Update the dataset - - :type name: str - :rtype: dict - :returns: the dataset - """ - return self._update_cmd(name, primehub_load_config(filename=kwargs.get('file', None))) - - def update(self, name: str, config: dict) -> list: - """ - Update the dataset - - :type name: str - :type config: dict - :rtype: dict - - :returns: the dataset - """ - - query = """ - mutation UpdateDatasetMutation($payload: DatasetUpdateInput!, $where: DatasetWhereUniqueInput!) { - updateDataset(data: $payload, where: $where) { - id - } - } - """ - - if not config: - invalid_config('Dataset configuration file is required.') - - if config.get('enableUploadServer', False): - query = """ - mutation UpdateDatasetMutation($payload: DatasetUpdateInput!, $where: DatasetWhereUniqueInput!) { - updateDataset(data: $payload, where: $where) { - id - uploadServerSecret { - username - password - } - } - } - """ - - update_mode = True - variables = {'payload': validate(config, update_mode), 'where': {'id': name}} - result = self.request(variables, query) - if 'data' in result and 'updateDataset' in result['data']: - return waring_if_needed(result['data']['updateDataset'], self.primehub.stderr) - return result - @cmd(name='list', description='List datasets') def list(self) -> list: """ @@ -250,137 +40,12 @@ def list(self) -> list: def get(self, name) -> Optional[dict]: """ Get a dataset from the current group - :type name: str :param name: the name of a dataset - :rtype: Optional[dict] :returns: a dataset """ - - query = """ - query DatasetQuery($where: DatasetWhereUniqueInput!) { - dataset(where: $where) { - id - name - displayName - description - type - pvProvisioning - volumeSize - variables - nfsServer - nfsPath - hostPath - url - secret { - id - } - enableUploadServer - uploadServerLink - global - groups { - id - name - displayName - writable - } - } - } - """ - - def output(dataset: dict): - dataset_output = dict() - keep_fields = ['id', 'name', 'displayName', 'description', 'global', 'type', 'groups'] - - if dataset.get('type') == 'env': - keep_fields.append('variables') - - if dataset.get('type') == 'git': - keep_fields.append('url') - keep_fields.append('secret') - - if dataset.get('type') == 'pv': - keep_fields.append('pvProvisioning') - keep_fields.append('volumeSize') - - if dataset.get('type') == 'nfs': - keep_fields.append('nfsServer') - keep_fields.append('nfsPath') - - if dataset.get('type') == 'hostPath': - keep_fields.append('hostPath') - - if dataset.get('type') in ['pv', 'nfs', 'hostPath']: - keep_fields.append('enableUploadServer') - keep_fields.append('uploadServerLink') - - for k, v in dataset.items(): - if k in keep_fields: - dataset_output[k] = v - - return dataset_output - - result = self.request({'where': {'id': name}}, query) - if 'data' in result and 'dataset' in result['data']: - return output(result['data']['dataset']) - - return result - - @cmd(name='delete', description='Delete a dataset by id', return_required=True) - def delete(self, id): - """ - Delete a dataset by id - - :type id: str - :param id: The dataset id - - :rtype dict - :return the result of the deleted dataset - """ - - query = """ - mutation DeleteDatasetMutation($where: DatasetWhereUniqueInput!) { - deleteDataset(where: $where) { - id - } - } - """ - - result = self.request({'where': {'id': id}}, query) - if 'data' in result and 'deleteDataset' in result['data']: - return result['data']['deleteDataset'] - return result - - @cmd(name='upload_secret', description='Regenerate the secret of the upload server', - return_required=True) - def regenerate_upload_server_secret(self, id): - """ - Regenerate the secret of the upload server - - :type id: str - :param id: The dataset id or name - - :rtype dict - :return the result of the deleted dataset - """ - - query = """ - mutation RegenerateUploadServerSecretMutation($where: DatasetWhereUniqueInput!) { - regenerateUploadServerSecret(where: $where) { - id - uploadServerSecret { - username - password - } - } - } - """ - - result = self.request({'where': {'id': id}}, query) - if 'data' in result and 'regenerateUploadServerSecret' in result['data']: - return waring_if_needed(result['data']['regenerateUploadServerSecret'], self.primehub.stderr) - return result + return self.do_get(Datasets.query, Datasets.resource_name, name) def help_description(self): return "Get a dataset or list datasets" diff --git a/tests/test_datasets.py b/tests/test_datasets.py index e06fd7e..e8bbe87 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,4 +1,4 @@ -from primehub.datasets import validate, validate_creation +from primehub.admin_datasets import validate, validate_creation from primehub.utils import PrimeHubException from tests import BaseTestCase diff --git a/tests/test_sdk_to_admin_cli.py b/tests/test_sdk_to_admin_cli.py new file mode 100644 index 0000000..3eb5a47 --- /dev/null +++ b/tests/test_sdk_to_admin_cli.py @@ -0,0 +1,35 @@ +import json + +from primehub import Helpful, Module, cmd +from tests import BaseTestCase + + +class FakeCommand(Helpful, Module): + + @cmd(name='list', description='action_no_args') + def action_no_args(self): + return [1, 2, 3] + + def help_description(self): + return "helpful" + + +class TestAdminCommandGroupToCommandLine(BaseTestCase): + + def setUp(self) -> None: + super(TestAdminCommandGroupToCommandLine, self).setUp() + + # clean commands, add the FakeCommand + self.sdk.register_admin_command('test_datasets', FakeCommand) + + def fake(self) -> FakeCommand: + return FakeCommand(self.sdk) + + def test_primehub_admin(self): + output = self.cli_stdout(['app.py', 'admin', 'test_datasets', 'list']) + self.assertEqual([1, 2, 3], json.loads(output)) + + def test_primehub_admin_help(self): + output = self.cli_stderr(['app.py', 'admin', 'test_datasets', '-h']) + usage = [x.strip() for x in output.split('\n') if '' in x][0] + self.assertEqual('primehub admin test_datasets ', usage) From 3a6f14e4cc01a4f0fa710b83d6a081a1e2b204ae Mon Sep 17 00:00:00 2001 From: "Ching Yi, Chan" Date: Thu, 14 Oct 2021 15:36:20 +0800 Subject: [PATCH 3/3] Update docs Signed-off-by: Ching Yi, Chan --- docs/CLI/admin/datasets.md | 446 ++++++++++++++++++ docs/CLI/apps.md | 2 +- docs/CLI/datasets.md | 288 +---------- docs/CLI/deployments.md | 2 +- docs/notebook/admin/datasets.ipynb | 228 +++++++++ docs/notebook/datasets.ipynb | 159 +------ primehub/admin_datasets.py | 4 +- primehub/extras/doc_generator.py | 35 +- primehub/extras/templates/cli.tpl.md | 6 +- .../templates/examples/admin/datasets.md | 323 +++++++++++++ primehub/extras/templates/examples/apps.md | 4 +- .../extras/templates/examples/datasets.md | 288 +---------- 12 files changed, 1058 insertions(+), 727 deletions(-) create mode 100644 docs/CLI/admin/datasets.md create mode 100644 docs/notebook/admin/datasets.ipynb create mode 100644 primehub/extras/templates/examples/admin/datasets.md diff --git a/docs/CLI/admin/datasets.md b/docs/CLI/admin/datasets.md new file mode 100644 index 0000000..4088336 --- /dev/null +++ b/docs/CLI/admin/datasets.md @@ -0,0 +1,446 @@ + +# Primehub Datasets + +``` +Usage: + primehub admin datasets + +Manage datasets + +Available Commands: + create Create a dataset + delete Delete a dataset by id + get Get a dataset by name + list Delete a dataset by id + regen-upload-secret Regenerate the secret of the upload server + update Update the dataset + +Options: + -h, --help Show the help + +Global Options: + --config CONFIG Change the path of the config file (Default: ~/.primehub/config.json) + --endpoint ENDPOINT Override the GraphQL API endpoint + --token TOKEN Override the API Token + --group GROUP Override the current group + --json Output the json format (output human-friendly format by default) + +``` + + +### create + +Create a dataset + + +``` +primehub admin datasets create +``` + + +* *(optional)* file + + + + +### delete + +Delete a dataset by id + + +``` +primehub admin datasets delete +``` + +* id: The dataset id + + + + + +### get + +Get a dataset by name + + +``` +primehub admin datasets get +``` + +* name: the name of a dataset + + + + + +### list + +Delete a dataset by id + + +``` +primehub admin datasets list +``` + + +* *(optional)* page + + + + +### regen-upload-secret + +Regenerate the secret of the upload server + + +``` +primehub admin datasets regen-upload-secret +``` + +* id: The dataset id or name + + + + + +### update + +Update the dataset + + +``` +primehub admin datasets update +``` + +* name + + + + + + +## Examples + +### Query datasets + +The `datasets` command is a group specific resource. It only works after the `group` assigned. + +Using `list` to find all datasets in your group: + +``` +$ primehub admin datasets list +``` + +``` +id name displayName description type +----------- ----------- -------------------------- ------------------------------- ------ +pv-dataset pv-dataset the dataset created by SDK It is a PV dataset pv +env-dataset env-dataset env-dataset make changes to the description env +``` + +If you already know the name of a dataset, use the `get` to get a single entry: + +``` +$ primehub admin datasets get dataset +``` + +``` +id: pv-dataset +name: pv-dataset +displayName: the dataset created by SDK +description: It is a PV dataset +type: pv +pvProvisioning: auto +volumeSize: 1 +enableUploadServer: True +uploadServerLink: http://primehub-python-sdk.primehub.io/dataset/hub/pv-dataset/browse +global: False +groups: [{'id': 'a962305b-c884-4413-9358-ef56373b287c', 'name': 'foobarbar', 'displayName': '', 'writable': False}, {'id': 'a7a283b5-c0e2-4b79-a78c-39c630324762', 'name': 'phusers', 'displayName': 'primehub users', 'writable': False}] +``` + +### Admin actions for datasets + +These actions only can be used by administrators: + +* create +* update +* delete + +For `create` and `update` require a dataset configuration, please see above examples. + +### Fields for creating or updating + +| field | required | type | description | +| --- | --- | --- | --- | +| name | required | string | it should be a valid resource name for kubernetes | +| displayName | optional | string | display name for this dataset | +| description | optional | string | | +| global | optional | boolean | when a dataset is global, it could be seen for each group | +| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] | +| url | conditional | string | **MUST** use with `git` type | +| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action | +| nfsServer | conditional | string | **MUST** use with `nfs` type | +| nfsPath | conditional | string | **MUST** use with `nfs` type | +| hostPath | conditional | string | **MUST** use with `hostPath` type | +| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{"key1":"value1","key2":"value2"}`. | +| groups | optional | list of connected groups (dict) | please see the `connect` examples | +| secret | optional | dict | **MAY** use with `git` type, it binds a `secret` to the `git` dataset | +| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.| +| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types | + +> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload. + +For example, there is a configuration for creating env dataset: + +```bash +primehub datasets create < There is not options for global `write`, you have to set the `write` to each group by the `groups.connect` field + +#### groups.connect + +Here is our example in the `connect`: + +```json +{ + "connect": [ + { + "id": "a7a283b5-c0e2-4b79-a78c-39c630324762", + "writable": true + } + ] +} +``` + +The `groups.connect` can be used with: + +* primehub admin datasets create +* primehub admin datasets update + +However, the `writable` only has meanings to `writable` datasets (one of `['pv', 'nfs', 'hostPath']` writable types). +When a writable dataset got a group with `writable: false` setting, it will make the group read-only to the dataset. + + +#### groups.disconnect + +The `groups.disconnect` can be used with: + +* primehub admin datasets update + + +```json +{ + "connect": [ + { + "id": "a7a283b5-c0e2-4b79-a78c-39c630324762", + "writable": true + } + ], + "disconnect": [ + { + "id": "a7a283b5-c0e2-4b79-a78c-39c630324762" + } + ] +} +``` + +`groups.disconnect` will remove the association between the dataset and group. + +The result depends on `global` value: +* `true` -> the removed group could read the dataset +* `false` -> the remove group would not see the dataset anymore \ No newline at end of file diff --git a/docs/CLI/apps.md b/docs/CLI/apps.md index bb189c9..e8ea802 100644 --- a/docs/CLI/apps.md +++ b/docs/CLI/apps.md @@ -214,4 +214,4 @@ stop: False status: Ready message: Deployment is ready pods: [{'logEndpoint': 'http://primehub-python-sdk.primehub.io/api/logs/pods/app-code-server-26fcc-765bf579c5-srcft'}] -``` +``` \ No newline at end of file diff --git a/docs/CLI/datasets.md b/docs/CLI/datasets.md index f730a17..86ee0e1 100644 --- a/docs/CLI/datasets.md +++ b/docs/CLI/datasets.md @@ -55,8 +55,6 @@ primehub datasets list ## Examples -### Query datasets - The `datasets` command is a group specific resource. It only works after the `group` assigned. Using `list` to find all datasets in your group: @@ -66,286 +64,22 @@ $ primehub datasets list ``` ``` -id name displayName description type ------------ ----------- -------------------------- ------------------------------- ------ -pv-dataset pv-dataset the dataset created by SDK It is a PV dataset pv -env-dataset env-dataset env-dataset make changes to the description env +id name displayName description type +------ ------ ------------- ------------- ------ +kaggle kaggle kaggle pv ``` If you already know the name of a dataset, use the `get` to get a single entry: ``` -$ primehub datasets get dataset +$ primehub datasets get kaggle ``` ``` -id: pv-dataset -name: pv-dataset -displayName: the dataset created by SDK -description: It is a PV dataset -type: pv -pvProvisioning: auto -volumeSize: 1 -enableUploadServer: True -uploadServerLink: http://primehub-python-sdk.primehub.io/dataset/hub/pv-dataset/browse -global: False -groups: [{'id': 'a962305b-c884-4413-9358-ef56373b287c', 'name': 'foobarbar', 'displayName': '', 'writable': False}, {'id': 'a7a283b5-c0e2-4b79-a78c-39c630324762', 'name': 'phusers', 'displayName': 'primehub users', 'writable': False}] -``` - -### Admin actions for datasets - -These actions only can be used by administrators: - -* create -* update -* delete - -For `create` and `update` require a dataset configuration, please see above examples. - -### Fields for creating or updating - -| field | required | type | description | -| --- | --- | --- | --- | -| name | required | string | it should be a valid resource name for kubernetes | -| displayName | optional | string | display name for this dataset | -| description | optional | string | | -| global | optional | boolean | when a dataset is global, it could be seen for each group | -| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] | -| url | conditional | string | **MUST** use with `git` type | -| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action | -| nfsServer | conditional | string | **MUST** use with `nfs` type | -| nfsPath | conditional | string | **MUST** use with `nfs` type | -| hostPath | conditional | string | **MUST** use with `hostPath` type | -| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{"key1":"value1","key2":"value2"}`. | -| groups | optional | list of connected groups (dict) | please see the `connect` examples | -| secret | optional | dict | **MAY** use with `git` type | bind a `secret` to the `git` dataset | -| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.| -| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types | - -> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload. - -For example, there is a configuration for creating env dataset: - -```bash -primehub datasets create <\n", + "\n", + "Manage datasets\n", + "\n", + "Available Commands:\n", + " create Create a dataset\n", + " delete Delete a dataset by id\n", + " get Get a dataset by name\n", + " list Delete a dataset by id\n", + " update Update the dataset\n", + " upload_secret Regenerate the secret of the upload server\n", + "```\n", + "\n", + "---\n", + "\n", + "\n", + "## Dataset configuration\n", + "\n", + "You need a configuration `create` and `update` to operate. Here is an example to create a `pv-dataset`:\n", + "\n", + "```json\n", + "{\n", + " \"name\": \"pv-dataset\",\n", + " \"displayName\": \"the dataset created by SDK\",\n", + " \"description\": \"It is a PV dataset\",\n", + " \"type\": \"pv\",\n", + " \"global\": true,\n", + " \"pvProvisioning\": \"auto\",\n", + " \"volumeSize\": 1\n", + "}\n", + "```\n", + "\n", + "In our system, there are 5 types for datasets: `['pv', 'nfs', 'hostPath', 'git', 'env']`. Please check the fields reference to give a proper configuration to create your own dataset.\n", + "\n", + "\n", + "\n", + "## Fields for creating or updating\n", + "\n", + "| field | required | type | description |\n", + "| --- | --- | --- | --- |\n", + "| name | required | string | it should be a valid resource name for kubernetes |\n", + "| displayName | optional | string | display name for this dataset |\n", + "| description | optional | string | |\n", + "| global | optional | boolean | when a dataset is global, it could be seen for each group |\n", + "| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] |\n", + "| url | conditional | string | **MUST** use with `git` type |\n", + "| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action |\n", + "| nfsServer | conditional | string | **MUST** use with `nfs` type |\n", + "| nfsPath | conditional | string | **MUST** use with `nfs` type |\n", + "| hostPath | conditional | string | **MUST** use with `hostPath` type |\n", + "| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{\"key1\":\"value1\",\"key2\":\"value2\"}`. |\n", + "| groups | optional | list of connected groups (dict) | please see the `connect` examples |\n", + "| secret | optional | dict | **MAY** use with `git` type. it binds a `secret` to the `git` dataset |\n", + "| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.|\n", + "| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types |\n", + "\n", + "> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload.\n", + "\n", + "For example, there is a configuration for creating env dataset:\n", + "\n", + "```bash\n", + "primehub admin datasets create <\n", - "\n", - "Get a dataset or list datasets\n", - "\n", - "Available Commands:\n", - " create Create a dataset\n", - " delete Delete a dataset by id\n", - " get Get a dataset by name\n", - " list List datasets\n", - " update Update the dataset\n", - " upload_secret Regenerate the secret of the upload server\n", - "```\n", - "\n", - "---\n", - "\n", - "All mutating actions require the `Admin` role:\n", - "\n", - "* create\n", - "* delete\n", - "* update\n", - "* upload_secret (`regenerate_upload_server_secret` for method name)\n", - "\n", - "## Dataset configuration\n", - "\n", - "You need a configuration `create` and `update` to operate. Here is an example to create a `pv-dataset`:\n", - "\n", - "```json\n", - "{\n", - " \"name\": \"pv-dataset\",\n", - " \"displayName\": \"the dataset created by SDK\",\n", - " \"description\": \"It is a PV dataset\",\n", - " \"type\": \"pv\",\n", - " \"global\": false,\n", - " \"groups\": {\n", - " \"connect\": [\n", - " {\n", - " \"id\": \"a7a283b5-c0e2-4b79-a78c-39c630324762\",\n", - " \"writable\": true\n", - " },\n", - " {\n", - " \"id\": \"a962305b-c884-4413-9358-ef56373b287c\",\n", - " \"writable\": false\n", - " }\n", - " ]\n", - " },\n", - " \"pvProvisioning\": \"auto\",\n", - " \"volumeSize\": 1\n", - "}\n", - "```\n", - "\n", - "In our system, there are 5 types for datasets: `['pv', 'nfs', 'hostPath', 'git', 'env']`. Please check the fields reference to give a proper configuration to create your own dataset.\n", - "\n", - "\n", - "\n", - "## Fields for creating or updating\n", - "\n", - "| field | required | type | description |\n", - "| --- | --- | --- | --- |\n", - "| name | required | string | it should be a valid resource name for kubernetes |\n", - "| displayName | optional | string | display name for this dataset |\n", - "| description | optional | string | |\n", - "| global | optional | boolean | when a dataset is global, it could be seen for each group |\n", - "| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] |\n", - "| url | conditional | string | **MUST** use with `git` type |\n", - "| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action |\n", - "| nfsServer | conditional | string | **MUST** use with `nfs` type |\n", - "| nfsPath | conditional | string | **MUST** use with `nfs` type |\n", - "| hostPath | conditional | string | **MUST** use with `hostPath` type |\n", - "| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{\"key1\":\"value1\",\"key2\":\"value2\"}`. |\n", - "| groups | optional | list of connected groups (dict) | please see the `connect` examples |\n", - "| secret | optional | dict | **MAY** use with `git` type | bind a `secret` to the `git` dataset |\n", - "| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.|\n", - "| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types |\n", - "\n", - "> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload.\n", - "\n", - "For example, there is a configuration for creating env dataset:\n", - "\n", - "```bash\n", - "primehub datasets create < list: :rtype: dict :returns: the dataset """ - return self._update_cmd(name, primehub_load_config(filename=kwargs.get('file', None))) + return self.update(name, primehub_load_config(filename=kwargs.get('file', None))) def update(self, name: str, config: dict) -> list: """ @@ -120,7 +120,7 @@ def update(self, name: str, config: dict) -> list: return waring_if_needed(result['data']['updateDataset'], self.primehub.stderr) return result - @cmd(name='upload_secret', description='Regenerate the secret of the upload server', + @cmd(name='regen-upload-secret', description='Regenerate the secret of the upload server', return_required=True) def regenerate_upload_server_secret(self, id): """ diff --git a/primehub/extras/doc_generator.py b/primehub/extras/doc_generator.py index eb0765a..89c46b3 100644 --- a/primehub/extras/doc_generator.py +++ b/primehub/extras/doc_generator.py @@ -15,9 +15,12 @@ ) -def get_example(command): +def get_example(command, role=''): try: - return env.get_template('examples/{}.md'.format(command)).render() + if role: + return env.get_template('examples/{}/{}.md'.format(role, command)).render() + else: + return env.get_template('examples/{}.md'.format(command)).render() except BaseException: pass return "TBD: please write example for [{}]".format(command) @@ -30,20 +33,31 @@ def get_doc_path(): return p -def create_cli_doc_path(name): - doc_path = os.path.join(get_doc_path(), 'CLI', name + ".md") +def create_cli_doc_path(name, role=''): + if role: + doc_path = os.path.join(get_doc_path(), 'CLI', role, name + ".md") + else: + doc_path = os.path.join(get_doc_path(), 'CLI', name + ".md") os.makedirs(os.path.dirname(doc_path), exist_ok=True) return doc_path def generate_command_document(*args, **kwargs): + if kwargs['role']: + kwargs['role_title'] = f'<{kwargs["role"].upper()}> ' + kwargs['role'] = f'{kwargs["role"]} ' + return env.get_template('cli.tpl.md').render(*args, **kwargs) -def generate_help_for_command(sdk: PrimeHub, name): +def generate_help_for_command(sdk: PrimeHub, name, role=''): sdk.stderr = io.StringIO() sdk.stdout = io.StringIO() - sys.argv = ['primehub', name, '-h'] + + if role: + sys.argv = ['primehub', role, name, '-h'] + else: + sys.argv = ['primehub', name, '-h'] try: cli_main(sdk=sdk) except SystemExit: @@ -51,11 +65,11 @@ def generate_help_for_command(sdk: PrimeHub, name): command_help = sdk.stderr.getvalue() actions = find_actions(sdk.commands[name]) attach_template_information_to_action(actions, name, sdk) - document = generate_command_document(command=name, command_help=command_help, - actions=actions, examples=get_example(name)) + document = generate_command_document(command=name, command_help=command_help, role=role, + actions=actions, examples=get_example(name, role)) print("Generate doc", name) - p = create_cli_doc_path(name) + p = create_cli_doc_path(name, role) with open(p, "w") as fh: fh.write(document) @@ -105,6 +119,9 @@ def main(): continue generate_help_for_command(sdk, k) + for k, v in sdk.admin_commands.items(): + generate_help_for_command(sdk, k, 'admin') + if __name__ == '__main__': main() diff --git a/primehub/extras/templates/cli.tpl.md b/primehub/extras/templates/cli.tpl.md index 2624ecd..1912d5c 100644 --- a/primehub/extras/templates/cli.tpl.md +++ b/primehub/extras/templates/cli.tpl.md @@ -1,5 +1,5 @@ -# Primehub {{command.capitalize()}} +# {{role_title}}Primehub {{command.capitalize()}} ``` {{command_help}} @@ -12,11 +12,11 @@ {% if item['required_arguments'] %} ``` -primehub {{command}} {{item['name']}} {{item['required_arguments_string']}} +primehub {{role}}{{command}} {{item['name']}} {{item['required_arguments_string']}} ``` {% else %} ``` -primehub {{command}} {{item['name']}} +primehub {{role}}{{command}} {{item['name']}} ``` {% endif %} diff --git a/primehub/extras/templates/examples/admin/datasets.md b/primehub/extras/templates/examples/admin/datasets.md new file mode 100644 index 0000000..64e2c0d --- /dev/null +++ b/primehub/extras/templates/examples/admin/datasets.md @@ -0,0 +1,323 @@ +### Query datasets + +The `datasets` command is a group specific resource. It only works after the `group` assigned. + +Using `list` to find all datasets in your group: + +``` +$ primehub admin datasets list +``` + +``` +id name displayName description type +----------- ----------- -------------------------- ------------------------------- ------ +pv-dataset pv-dataset the dataset created by SDK It is a PV dataset pv +env-dataset env-dataset env-dataset make changes to the description env +``` + +If you already know the name of a dataset, use the `get` to get a single entry: + +``` +$ primehub admin datasets get dataset +``` + +``` +id: pv-dataset +name: pv-dataset +displayName: the dataset created by SDK +description: It is a PV dataset +type: pv +pvProvisioning: auto +volumeSize: 1 +enableUploadServer: True +uploadServerLink: http://primehub-python-sdk.primehub.io/dataset/hub/pv-dataset/browse +global: False +groups: [{'id': 'a962305b-c884-4413-9358-ef56373b287c', 'name': 'foobarbar', 'displayName': '', 'writable': False}, {'id': 'a7a283b5-c0e2-4b79-a78c-39c630324762', 'name': 'phusers', 'displayName': 'primehub users', 'writable': False}] +``` + +### Admin actions for datasets + +These actions only can be used by administrators: + +* create +* update +* delete + +For `create` and `update` require a dataset configuration, please see above examples. + +### Fields for creating or updating + +| field | required | type | description | +| --- | --- | --- | --- | +| name | required | string | it should be a valid resource name for kubernetes | +| displayName | optional | string | display name for this dataset | +| description | optional | string | | +| global | optional | boolean | when a dataset is global, it could be seen for each group | +| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] | +| url | conditional | string | **MUST** use with `git` type | +| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action | +| nfsServer | conditional | string | **MUST** use with `nfs` type | +| nfsPath | conditional | string | **MUST** use with `nfs` type | +| hostPath | conditional | string | **MUST** use with `hostPath` type | +| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{"key1":"value1","key2":"value2"}`. | +| groups | optional | list of connected groups (dict) | please see the `connect` examples | +| secret | optional | dict | **MAY** use with `git` type, it binds a `secret` to the `git` dataset | +| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.| +| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types | + +> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload. + +For example, there is a configuration for creating env dataset: + +```bash +primehub datasets create < There is not options for global `write`, you have to set the `write` to each group by the `groups.connect` field + +#### groups.connect + +Here is our example in the `connect`: + +```json +{ + "connect": [ + { + "id": "a7a283b5-c0e2-4b79-a78c-39c630324762", + "writable": true + } + ] +} +``` + +The `groups.connect` can be used with: + +* primehub admin datasets create +* primehub admin datasets update + +However, the `writable` only has meanings to `writable` datasets (one of `['pv', 'nfs', 'hostPath']` writable types). +When a writable dataset got a group with `writable: false` setting, it will make the group read-only to the dataset. + + +#### groups.disconnect + +The `groups.disconnect` can be used with: + +* primehub admin datasets update + + +```json +{ + "connect": [ + { + "id": "a7a283b5-c0e2-4b79-a78c-39c630324762", + "writable": true + } + ], + "disconnect": [ + { + "id": "a7a283b5-c0e2-4b79-a78c-39c630324762" + } + ] +} +``` + +`groups.disconnect` will remove the association between the dataset and group. + +The result depends on `global` value: +* `true` -> the removed group could read the dataset +* `false` -> the remove group would not see the dataset anymore \ No newline at end of file diff --git a/primehub/extras/templates/examples/apps.md b/primehub/extras/templates/examples/apps.md index 2026f9e..45b9faa 100644 --- a/primehub/extras/templates/examples/apps.md +++ b/primehub/extras/templates/examples/apps.md @@ -65,12 +65,12 @@ instanceTypeSpec: memoryLimit: 2 gpuLimit: 0 scope: primehub -appUrl: https://qty0712-microk8s.aws.primehub.io/console/apps/code-server-26fcc +appUrl: http://primehub-python-sdk.primehub.io/console/apps/code-server-26fcc internalAppUrl: http://app-code-server-26fcc:8080/console/apps/code-server-26fcc svcEndpoints: ['app-code-server-26fcc:8080'] env: [{'name': 'key1', 'value': 'value1'}] stop: False status: Ready message: Deployment is ready -pods: [{'logEndpoint': 'https://qty0712-microk8s.aws.primehub.io/api/logs/pods/app-code-server-26fcc-765bf579c5-srcft'}] +pods: [{'logEndpoint': 'http://primehub-python-sdk.primehub.io/api/logs/pods/app-code-server-26fcc-765bf579c5-srcft'}] ``` \ No newline at end of file diff --git a/primehub/extras/templates/examples/datasets.md b/primehub/extras/templates/examples/datasets.md index 8f6262a..0d5d3fa 100644 --- a/primehub/extras/templates/examples/datasets.md +++ b/primehub/extras/templates/examples/datasets.md @@ -1,5 +1,3 @@ -### Query datasets - The `datasets` command is a group specific resource. It only works after the `group` assigned. Using `list` to find all datasets in your group: @@ -9,286 +7,22 @@ $ primehub datasets list ``` ``` -id name displayName description type ------------ ----------- -------------------------- ------------------------------- ------ -pv-dataset pv-dataset the dataset created by SDK It is a PV dataset pv -env-dataset env-dataset env-dataset make changes to the description env +id name displayName description type +------ ------ ------------- ------------- ------ +kaggle kaggle kaggle pv ``` If you already know the name of a dataset, use the `get` to get a single entry: ``` -$ primehub datasets get dataset +$ primehub datasets get kaggle ``` ``` -id: pv-dataset -name: pv-dataset -displayName: the dataset created by SDK -description: It is a PV dataset -type: pv -pvProvisioning: auto -volumeSize: 1 -enableUploadServer: True -uploadServerLink: http://primehub-python-sdk.primehub.io/dataset/hub/pv-dataset/browse -global: False -groups: [{'id': 'a962305b-c884-4413-9358-ef56373b287c', 'name': 'foobarbar', 'displayName': '', 'writable': False}, {'id': 'a7a283b5-c0e2-4b79-a78c-39c630324762', 'name': 'phusers', 'displayName': 'primehub users', 'writable': False}] -``` - -### Admin actions for datasets - -These actions only can be used by administrators: - -* create -* update -* delete - -For `create` and `update` require a dataset configuration, please see above examples. - -### Fields for creating or updating - -| field | required | type | description | -| --- | --- | --- | --- | -| name | required | string | it should be a valid resource name for kubernetes | -| displayName | optional | string | display name for this dataset | -| description | optional | string | | -| global | optional | boolean | when a dataset is global, it could be seen for each group | -| type | required | string | one of ['pv', 'nfs', 'hostPath', 'git', 'env'] | -| url | conditional | string | **MUST** use with `git` type | -| pvProvisioning | conditional | string | onf of ['auto', 'manual'], **MUST** use with `pv` type. This field only uses in `CREATE` action | -| nfsServer | conditional | string | **MUST** use with `nfs` type | -| nfsPath | conditional | string | **MUST** use with `nfs` type | -| hostPath | conditional | string | **MUST** use with `hostPath` type | -| variables | optional | dict | **MAY** use with `env` type. It is key value pairs. All values have to a string value. For example: `{"key1":"value1","key2":"value2"}`. | -| groups | optional | list of connected groups (dict) | please see the `connect` examples | -| secret | optional | dict | **MAY** use with `git` type | bind a `secret` to the `git` dataset | -| volumeSize | conditional | integer | **MUST** use with `pv` type. The unit is `GB`.| -| enableUploadServer | optional | boolean | it only works with one of ['pv', 'nfs', 'hostPath'] writable types | - -> There is a simple rule to use fields for `UPDATE`. All required fields should not be in the payload. - -For example, there is a configuration for creating env dataset: - -```bash -primehub datasets create <